yacy.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Yacy (Web, Images, Videos, Music, Files)
  4. """
  5. from json import loads
  6. from dateutil import parser
  7. from urllib.parse import urlencode
  8. from requests.auth import HTTPDigestAuth
  9. from searx.utils import html_to_text
  10. # about
  11. about = {
  12. "website": 'https://yacy.net/',
  13. "wikidata_id": 'Q1759675',
  14. "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
  15. "use_official_api": True,
  16. "require_api_key": False,
  17. "results": 'JSON',
  18. }
  19. # engine dependent config
  20. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  21. paging = True
  22. language_support = True
  23. number_of_results = 5
  24. http_digest_auth_user = ""
  25. http_digest_auth_pass = ""
  26. # search-url
  27. base_url = 'http://localhost:8090'
  28. search_url = '/yacysearch.json?{query}'\
  29. '&startRecord={offset}'\
  30. '&maximumRecords={limit}'\
  31. '&contentdom={search_type}'\
  32. '&resource=global'
  33. # yacy specific type-definitions
  34. search_types = {'general': 'text',
  35. 'images': 'image',
  36. 'files': 'app',
  37. 'music': 'audio',
  38. 'videos': 'video'}
  39. # do search-request
  40. def request(query, params):
  41. offset = (params['pageno'] - 1) * number_of_results
  42. search_type = search_types.get(params.get('category'), '0')
  43. params['url'] = base_url +\
  44. search_url.format(query=urlencode({'query': query}),
  45. offset=offset,
  46. limit=number_of_results,
  47. search_type=search_type)
  48. if http_digest_auth_user and http_digest_auth_pass:
  49. params['auth'] = HTTPDigestAuth(http_digest_auth_user, http_digest_auth_pass)
  50. # add language tag if specified
  51. if params['language'] != 'all':
  52. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  53. return params
  54. # get response from search-request
  55. def response(resp):
  56. results = []
  57. raw_search_results = loads(resp.text)
  58. # return empty array if there are no results
  59. if not raw_search_results:
  60. return []
  61. search_results = raw_search_results.get('channels', [])
  62. if len(search_results) == 0:
  63. return []
  64. for result in search_results[0].get('items', []):
  65. # parse image results
  66. if resp.search_params.get('category') == 'images':
  67. result_url = ''
  68. if 'url' in result:
  69. result_url = result['url']
  70. elif 'link' in result:
  71. result_url = result['link']
  72. else:
  73. continue
  74. # append result
  75. results.append({'url': result_url,
  76. 'title': result['title'],
  77. 'content': '',
  78. 'img_src': result['image'],
  79. 'template': 'images.html'})
  80. # parse general results
  81. else:
  82. publishedDate = parser.parse(result['pubDate'])
  83. # append result
  84. results.append({'url': result['link'],
  85. 'title': result['title'],
  86. 'content': html_to_text(result['description']),
  87. 'publishedDate': publishedDate})
  88. # TODO parse video, audio and file results
  89. return results