yacy.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """YaCy_ is a free distributed search engine, built on the principles of
  4. peer-to-peer (P2P) networks.
  5. API: Dev:APIyacysearch_
  6. Releases:
  7. - https://github.com/yacy/yacy_search_server/tags
  8. - https://download.yacy.net/
  9. .. _Yacy: https://yacy.net/
  10. .. _Dev:APIyacysearch: https://wiki.yacy.net/index.php/Dev:APIyacysearch
  11. Configuration
  12. =============
  13. The engine has the following (additional) settings:
  14. .. code:: yaml
  15. - name: yacy
  16. engine: yacy
  17. shortcut: ya
  18. base_url: http://localhost:8090
  19. # Yacy search mode. 'global' or 'local'.
  20. search_mode: 'global'
  21. number_of_results: 5
  22. http_digest_auth_user: ""
  23. http_digest_auth_pass: ""
  24. Implementations
  25. ===============
  26. """
  27. # pylint: disable=fixme
  28. from json import loads
  29. from urllib.parse import urlencode
  30. from dateutil import parser
  31. from httpx import DigestAuth
  32. from searx.utils import html_to_text
  33. # about
  34. about = {
  35. "website": 'https://yacy.net/',
  36. "wikidata_id": 'Q1759675',
  37. "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
  38. "use_official_api": True,
  39. "require_api_key": False,
  40. "results": 'JSON',
  41. }
  42. # engine dependent config
  43. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  44. paging = True
  45. number_of_results = 5
  46. http_digest_auth_user = ""
  47. http_digest_auth_pass = ""
  48. search_mode = 'global'
  49. """Yacy search mode ``global`` or ``local``. By default, Yacy operates in ``global``
  50. mode.
  51. ``global``
  52. Peer-to-Peer search
  53. ``local``
  54. Privacy or Stealth mode, restricts the search to local yacy instance.
  55. """
  56. # search-url
  57. base_url = 'http://localhost:8090'
  58. search_url = (
  59. '/yacysearch.json?{query}'
  60. '&startRecord={offset}'
  61. '&maximumRecords={limit}'
  62. '&contentdom={search_type}'
  63. '&resource={resource}'
  64. )
  65. # yacy specific type-definitions
  66. search_types = {'general': 'text', 'images': 'image', 'files': 'app', 'music': 'audio', 'videos': 'video'}
  67. def request(query, params):
  68. offset = (params['pageno'] - 1) * number_of_results
  69. search_type = search_types.get(params.get('category'), '0')
  70. params['url'] = base_url + search_url.format(
  71. query=urlencode({'query': query}),
  72. offset=offset,
  73. limit=number_of_results,
  74. search_type=search_type,
  75. resource=search_mode,
  76. )
  77. if http_digest_auth_user and http_digest_auth_pass:
  78. params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
  79. # add language tag if specified
  80. if params['language'] != 'all':
  81. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  82. return params
  83. def response(resp):
  84. results = []
  85. raw_search_results = loads(resp.text)
  86. # return empty array if there are no results
  87. if not raw_search_results:
  88. return []
  89. search_results = raw_search_results.get('channels', [])
  90. if len(search_results) == 0:
  91. return []
  92. for result in search_results[0].get('items', []):
  93. # parse image results
  94. if resp.search_params.get('category') == 'images':
  95. result_url = ''
  96. if 'url' in result:
  97. result_url = result['url']
  98. elif 'link' in result:
  99. result_url = result['link']
  100. else:
  101. continue
  102. # append result
  103. results.append(
  104. {
  105. 'url': result_url,
  106. 'title': result['title'],
  107. 'content': '',
  108. 'img_src': result['image'],
  109. 'template': 'images.html',
  110. }
  111. )
  112. # parse general results
  113. else:
  114. publishedDate = parser.parse(result['pubDate'])
  115. # append result
  116. results.append(
  117. {
  118. 'url': result['link'],
  119. 'title': result['title'],
  120. 'content': html_to_text(result['description']),
  121. 'publishedDate': publishedDate,
  122. }
  123. )
  124. # TODO parse video, audio and file results
  125. return results