yacy.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Yacy (Web, Images, Videos, Music, Files)
  4. """
  5. from json import loads
  6. from dateutil import parser
  7. from urllib.parse import urlencode
  8. from httpx import DigestAuth
  9. from searx.utils import html_to_text
  10. # about
  11. about = {
  12. "website": 'https://yacy.net/',
  13. "wikidata_id": 'Q1759675',
  14. "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
  15. "use_official_api": True,
  16. "require_api_key": False,
  17. "results": 'JSON',
  18. }
  19. # engine dependent config
  20. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  21. paging = True
  22. number_of_results = 5
  23. http_digest_auth_user = ""
  24. http_digest_auth_pass = ""
  25. # search-url
  26. base_url = 'http://localhost:8090'
  27. search_url = (
  28. '/yacysearch.json?{query}'
  29. '&startRecord={offset}'
  30. '&maximumRecords={limit}'
  31. '&contentdom={search_type}'
  32. '&resource=global'
  33. )
  34. # yacy specific type-definitions
  35. search_types = {'general': 'text', 'images': 'image', 'files': 'app', 'music': 'audio', 'videos': 'video'}
  36. # do search-request
  37. def request(query, params):
  38. offset = (params['pageno'] - 1) * number_of_results
  39. search_type = search_types.get(params.get('category'), '0')
  40. params['url'] = base_url + search_url.format(
  41. query=urlencode({'query': query}), offset=offset, limit=number_of_results, search_type=search_type
  42. )
  43. if http_digest_auth_user and http_digest_auth_pass:
  44. params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass)
  45. # add language tag if specified
  46. if params['language'] != 'all':
  47. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  48. return params
  49. # get response from search-request
  50. def response(resp):
  51. results = []
  52. raw_search_results = loads(resp.text)
  53. # return empty array if there are no results
  54. if not raw_search_results:
  55. return []
  56. search_results = raw_search_results.get('channels', [])
  57. if len(search_results) == 0:
  58. return []
  59. for result in search_results[0].get('items', []):
  60. # parse image results
  61. if resp.search_params.get('category') == 'images':
  62. result_url = ''
  63. if 'url' in result:
  64. result_url = result['url']
  65. elif 'link' in result:
  66. result_url = result['link']
  67. else:
  68. continue
  69. # append result
  70. results.append(
  71. {
  72. 'url': result_url,
  73. 'title': result['title'],
  74. 'content': '',
  75. 'img_src': result['image'],
  76. 'template': 'images.html',
  77. }
  78. )
  79. # parse general results
  80. else:
  81. publishedDate = parser.parse(result['pubDate'])
  82. # append result
  83. results.append(
  84. {
  85. 'url': result['link'],
  86. 'title': result['title'],
  87. 'content': html_to_text(result['description']),
  88. 'publishedDate': publishedDate,
  89. }
  90. )
  91. # TODO parse video, audio and file results
  92. return results