Browse Source

[feat] engines: add Hugging Face engine

Zhijie He 1 month ago
parent
commit
f94802f2d2
3 changed files with 140 additions and 0 deletions
  1. 8 0
      docs/dev/engines/online/huggingface.rst
  2. 116 0
      searx/engines/huggingface.py
  3. 16 0
      searx/settings.yml

+ 8 - 0
docs/dev/engines/online/huggingface.rst

@@ -0,0 +1,8 @@
+.. _huggingface engine:
+
+============
+Hugging Face
+============
+
+.. automodule:: searx.engines.huggingface
+   :members:

+ 116 - 0
searx/engines/huggingface.py

@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""`Hugging Face`_ search engine for SearXNG.
+
+.. _Hugging Face: https://huggingface.co
+
+Configuration
+=============
+
+The engine has the following additional settings:
+
+- :py:obj:`huggingface_endpoint`
+
+Configurations for endpoints:
+
+.. code:: yaml
+
+  - name: huggingface
+    engine: huggingface
+    shortcut: hf
+
+  - name: huggingface datasets
+    huggingface_endpoint: datasets
+    engine: huggingface
+    shortcut: hfd
+
+  - name: huggingface spaces
+    huggingface_endpoint: spaces
+    engine: huggingface
+    shortcut: hfs
+
+Implementations
+===============
+
+"""
+
+from urllib.parse import urlencode
+from datetime import datetime
+
+from searx.exceptions import SearxEngineAPIException
+from searx.utils import html_to_text
+from searx.result_types import EngineResults, MainResult
+
+about = {
+    "website": "https://huggingface.co/",
+    "wikidata_id": "Q108943604",
+    "official_api_documentation": "https://huggingface.co/docs/hub/en/api",
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": "JSON",
+}
+
+categories = ['it', 'repos']
+
+base_url = "https://huggingface.co"
+
+huggingface_endpoint = 'models'
+"""Hugging Face supports datasets, models, spaces as search endpoint.
+
+- ``datasets``: search for datasets
+- ``models``: search for models
+- ``spaces``: search for spaces
+"""
+
+
+def init(_):
+    if huggingface_endpoint not in ('datasets', 'models', 'spaces'):
+        raise SearxEngineAPIException(f"Unsupported Hugging Face endpoint: {huggingface_endpoint}")
+
+
+def request(query, params):
+    query_params = {
+        "direction": -1,
+        "search": query,
+    }
+
+    params["url"] = f"{base_url}/api/{huggingface_endpoint}?{urlencode(query_params)}"
+
+    return params
+
+
+def response(resp) -> EngineResults:
+    results = EngineResults()
+
+    data = resp.json()
+
+    for entry in data:
+        if huggingface_endpoint != 'models':
+            url = f"{base_url}/{huggingface_endpoint}/{entry['id']}"
+        else:
+            url = f"{base_url}/{entry['id']}"
+
+        published_date = None
+        try:
+            published_date = datetime.strptime(entry["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
+        except (ValueError, TypeError):
+            pass
+
+        contents = []
+        if entry.get("likes"):
+            contents.append(f"Likes: {entry['likes']}")
+        if entry.get("downloads"):
+            contents.append(f"Downloads: {entry['downloads']:,}")
+        if entry.get("tags"):
+            contents.append(f"Tags: {', '.join(entry['tags'])}")
+        if entry.get("description"):
+            contents.append(f"Description: {entry['description']}")
+
+        item = MainResult(
+            title=entry["id"],
+            content=html_to_text(" | ".join(contents)),
+            url=url,
+            publishedDate=published_date,
+        )
+        results.add(item)
+
+    return results

+ 16 - 0
searx/settings.yml

@@ -1134,6 +1134,22 @@ engines:
   - name: il post
   - name: il post
     engine: il_post
     engine: il_post
     shortcut: pst
     shortcut: pst
+
+  - name: huggingface
+    engine: huggingface
+    shortcut: hf
+    disabled: true
+
+  - name: huggingface datasets
+    huggingface_endpoint: datasets
+    engine: huggingface
+    shortcut: hfd
+    disabled: true
+
+  - name: huggingface spaces
+    huggingface_endpoint: spaces
+    engine: huggingface
+    shortcut: hfs
     disabled: true
     disabled: true
 
 
   - name: imdb
   - name: imdb