huggingface.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """`Hugging Face`_ search engine for SearXNG.
  3. .. _Hugging Face: https://huggingface.co
  4. Configuration
  5. =============
  6. The engine has the following additional settings:
  7. - :py:obj:`huggingface_endpoint`
  8. Configurations for endpoints:
  9. .. code:: yaml
  10. - name: huggingface
  11. engine: huggingface
  12. shortcut: hf
  13. - name: huggingface datasets
  14. huggingface_endpoint: datasets
  15. engine: huggingface
  16. shortcut: hfd
  17. - name: huggingface spaces
  18. huggingface_endpoint: spaces
  19. engine: huggingface
  20. shortcut: hfs
  21. Implementations
  22. ===============
  23. """
  24. from urllib.parse import urlencode
  25. from datetime import datetime
  26. from searx.exceptions import SearxEngineAPIException
  27. from searx.utils import html_to_text
  28. from searx.result_types import EngineResults, MainResult
  29. about = {
  30. "website": "https://huggingface.co/",
  31. "wikidata_id": "Q108943604",
  32. "official_api_documentation": "https://huggingface.co/docs/hub/en/api",
  33. "use_official_api": True,
  34. "require_api_key": False,
  35. "results": "JSON",
  36. }
  37. categories = ['it', 'repos']
  38. base_url = "https://huggingface.co"
  39. huggingface_endpoint = 'models'
  40. """Hugging Face supports datasets, models, spaces as search endpoint.
  41. - ``datasets``: search for datasets
  42. - ``models``: search for models
  43. - ``spaces``: search for spaces
  44. """
  45. def init(_):
  46. if huggingface_endpoint not in ('datasets', 'models', 'spaces'):
  47. raise SearxEngineAPIException(f"Unsupported Hugging Face endpoint: {huggingface_endpoint}")
  48. def request(query, params):
  49. query_params = {
  50. "direction": -1,
  51. "search": query,
  52. }
  53. params["url"] = f"{base_url}/api/{huggingface_endpoint}?{urlencode(query_params)}"
  54. return params
  55. def response(resp) -> EngineResults:
  56. results = EngineResults()
  57. data = resp.json()
  58. for entry in data:
  59. if huggingface_endpoint != 'models':
  60. url = f"{base_url}/{huggingface_endpoint}/{entry['id']}"
  61. else:
  62. url = f"{base_url}/{entry['id']}"
  63. published_date = None
  64. try:
  65. published_date = datetime.strptime(entry["createdAt"], "%Y-%m-%dT%H:%M:%S.%fZ")
  66. except (ValueError, TypeError):
  67. pass
  68. contents = []
  69. if entry.get("likes"):
  70. contents.append(f"Likes: {entry['likes']}")
  71. if entry.get("downloads"):
  72. contents.append(f"Downloads: {entry['downloads']:,}")
  73. if entry.get("tags"):
  74. contents.append(f"Tags: {', '.join(entry['tags'])}")
  75. if entry.get("description"):
  76. contents.append(f"Description: {entry['description']}")
  77. item = MainResult(
  78. title=entry["id"],
  79. content=html_to_text(" | ".join(contents)),
  80. url=url,
  81. publishedDate=published_date,
  82. )
  83. results.add(item)
  84. return results