yacy.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Yacy (Web, Images, Videos, Music, Files)
  4. """
  5. from json import loads
  6. from dateutil import parser
  7. from urllib.parse import urlencode
  8. from httpx import DigestAuth
  9. from searx.utils import html_to_text
  10. # about
  11. about = {
  12. "website": 'https://yacy.net/',
  13. "wikidata_id": 'Q1759675',
  14. "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
  15. "use_official_api": True,
  16. "require_api_key": False,
  17. "results": 'JSON',
  18. }
  19. # engine dependent config
  20. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  21. paging = True
  22. number_of_results = 5
  23. http_digest_auth_user = ""
  24. http_digest_auth_pass = ""
  25. search_mode = 'global' # 'global', 'local'. By default, in yacy this is 'global'.
  26. # search-url
  27. base_url = 'http://localhost:8090'
  28. search_url = (
  29. '/yacysearch.json?{query}'
  30. '&startRecord={offset}'
  31. '&maximumRecords={limit}'
  32. '&contentdom={search_type}'
  33. '&resource={resource}'
  34. )
  35. # yacy specific type-definitions
  36. search_types = {'general': 'text', 'images': 'image', 'files': 'app', 'music': 'audio', 'videos': 'video'}
  37. # do search-request
  38. def request(query, params):
  39. offset = (params['pageno'] - 1) * number_of_results
  40. search_type = search_types.get(params.get('category'), '0')
  41. params['url'] = base_url + search_url.format(
  42. query=urlencode({'query': query}),
  43. offset=offset,
  44. limit=number_of_results,
  45. search_type=search_type,
  46. resource=search_mode,
  47. )
  48. if http_digest_auth_user and http_digest_auth_pass:
  49. params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
  50. # add language tag if specified
  51. if params['language'] != 'all':
  52. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  53. return params
  54. # get response from search-request
  55. def response(resp):
  56. results = []
  57. raw_search_results = loads(resp.text)
  58. # return empty array if there are no results
  59. if not raw_search_results:
  60. return []
  61. search_results = raw_search_results.get('channels', [])
  62. if len(search_results) == 0:
  63. return []
  64. for result in search_results[0].get('items', []):
  65. # parse image results
  66. if resp.search_params.get('category') == 'images':
  67. result_url = ''
  68. if 'url' in result:
  69. result_url = result['url']
  70. elif 'link' in result:
  71. result_url = result['link']
  72. else:
  73. continue
  74. # append result
  75. results.append(
  76. {
  77. 'url': result_url,
  78. 'title': result['title'],
  79. 'content': '',
  80. 'img_src': result['image'],
  81. 'template': 'images.html',
  82. }
  83. )
  84. # parse general results
  85. else:
  86. publishedDate = parser.parse(result['pubDate'])
  87. # append result
  88. results.append(
  89. {
  90. 'url': result['link'],
  91. 'title': result['title'],
  92. 'content': html_to_text(result['description']),
  93. 'publishedDate': publishedDate,
  94. }
  95. )
  96. # TODO parse video, audio and file results
  97. return results