# SPDX-License-Identifier: AGPL-3.0-or-later """`Hugging Face`_ search engine for SearXNG. .. _Hugging Face: https://huggingface.co Configuration ============= The engine has the following additional settings: - :py:obj:`huggingface_endpoint` Configurations for endpoints: .. code:: yaml - name: huggingface engine: huggingface shortcut: hf - name: huggingface datasets huggingface_endpoint: datasets engine: huggingface shortcut: hfd - name: huggingface spaces huggingface_endpoint: spaces engine: huggingface shortcut: hfs Implementations =============== """ from urllib.parse import urlencode from datetime import datetime from searx.exceptions import SearxEngineAPIException from searx.utils import html_to_text from searx.result_types import EngineResults, MainResult about = { "website": "https://huggingface.co/", "wikidata_id": "Q108943604", "official_api_documentation": "https://huggingface.co/docs/hub/en/api", "use_official_api": True, "require_api_key": False, "results": "JSON", } categories = ['it', 'repos'] base_url = "https://huggingface.co" huggingface_endpoint = 'models' """Hugging Face supports datasets, models, spaces as search endpoint. - ``datasets``: search for datasets - ``models``: search for models - ``spaces``: search for spaces """ def init(_): if huggingface_endpoint not in ('datasets', 'models', 'spaces'): raise SearxEngineAPIException(f"Unsupported Hugging Face endpoint: {huggingface_endpoint}") def request(query, params): query_params = { "direction": -1, "search": query, } params["url"] = f"{base_url}/api/{huggingface_endpoint}?{urlencode(query_params)}" return params def response(resp) -> EngineResults: results = EngineResults() data = resp.json() for entry in data: if huggingface_endpoint != 'models': url = f"{base_url}/{huggingface_endpoint}/{entry['id']}" else: url = f"{base_url}/{entry['id']}" published_date = None try: published_date = datetime.strptime(entry["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ") except (ValueError, TypeError): pass contents = [] if entry.get("likes"): contents.append(f"Likes: {entry['likes']}") if entry.get("downloads"): contents.append(f"Downloads: {entry['downloads']:,}") if entry.get("tags"): contents.append(f"Tags: {', '.join(entry['tags'])}") if entry.get("description"): contents.append(f"Description: {entry['description']}") item = MainResult( title=entry["id"], content=html_to_text(" | ".join(contents)), url=url, publishedDate=published_date, ) results.add(item) return results