123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """`Hugging Face`_ search engine for SearXNG.
- .. _Hugging Face: https://huggingface.co
- Configuration
- =============
- The engine has the following additional settings:
- - :py:obj:`huggingface_endpoint`
- Configurations for endpoints:
- .. code:: yaml
- - name: huggingface
- engine: huggingface
- shortcut: hf
- - name: huggingface datasets
- huggingface_endpoint: datasets
- engine: huggingface
- shortcut: hfd
- - name: huggingface spaces
- huggingface_endpoint: spaces
- engine: huggingface
- shortcut: hfs
- Implementations
- ===============
- """
- from urllib.parse import urlencode
- from datetime import datetime
- from searx.exceptions import SearxEngineAPIException
- from searx.utils import html_to_text
- from searx.result_types import EngineResults, MainResult
- about = {
- "website": "https://huggingface.co/",
- "wikidata_id": "Q108943604",
- "official_api_documentation": "https://huggingface.co/docs/hub/en/api",
- "use_official_api": True,
- "require_api_key": False,
- "results": "JSON",
- }
- categories = ['it', 'repos']
- base_url = "https://huggingface.co"
- huggingface_endpoint = 'models'
- """Hugging Face supports datasets, models, spaces as search endpoint.
- - ``datasets``: search for datasets
- - ``models``: search for models
- - ``spaces``: search for spaces
- """
- def init(_):
- if huggingface_endpoint not in ('datasets', 'models', 'spaces'):
- raise SearxEngineAPIException(f"Unsupported Hugging Face endpoint: {huggingface_endpoint}")
- def request(query, params):
- query_params = {
- "direction": -1,
- "search": query,
- }
- params["url"] = f"{base_url}/api/{huggingface_endpoint}?{urlencode(query_params)}"
- return params
- def response(resp) -> EngineResults:
- results = EngineResults()
- data = resp.json()
- for entry in data:
- if huggingface_endpoint != 'models':
- url = f"{base_url}/{huggingface_endpoint}/{entry['id']}"
- else:
- url = f"{base_url}/{entry['id']}"
- published_date = None
- try:
- published_date = datetime.strptime(entry["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
- except (ValueError, TypeError):
- pass
- contents = []
- if entry.get("likes"):
- contents.append(f"Likes: {entry['likes']}")
- if entry.get("downloads"):
- contents.append(f"Downloads: {entry['downloads']:,}")
- if entry.get("tags"):
- contents.append(f"Tags: {', '.join(entry['tags'])}")
- if entry.get("description"):
- contents.append(f"Description: {entry['description']}")
- item = MainResult(
- title=entry["id"],
- content=html_to_text(" | ".join(contents)),
- url=url,
- publishedDate=published_date,
- )
- results.add(item)
- return results
|