yacy.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. # Yacy (Web, Images, Videos, Music, Files)
  2. #
  3. # @website http://yacy.net
  4. # @provide-api yes
  5. # (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
  6. #
  7. # @using-api yes
  8. # @results JSON
  9. # @stable yes
  10. # @parse (general) url, title, content, publishedDate
  11. # @parse (images) url, title, img_src
  12. #
  13. # @todo parse video, audio and file results
  14. from json import loads
  15. from dateutil import parser
  16. from urllib.parse import urlencode
  17. from requests.auth import HTTPDigestAuth
  18. from searx.utils import html_to_text
  19. # engine dependent config
  20. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  21. paging = True
  22. language_support = True
  23. number_of_results = 5
  24. http_digest_auth_user = ""
  25. http_digest_auth_pass = ""
  26. # search-url
  27. base_url = 'http://localhost:8090'
  28. search_url = '/yacysearch.json?{query}'\
  29. '&startRecord={offset}'\
  30. '&maximumRecords={limit}'\
  31. '&contentdom={search_type}'\
  32. '&resource=global'
  33. # yacy specific type-definitions
  34. search_types = {'general': 'text',
  35. 'images': 'image',
  36. 'files': 'app',
  37. 'music': 'audio',
  38. 'videos': 'video'}
  39. # do search-request
  40. def request(query, params):
  41. offset = (params['pageno'] - 1) * number_of_results
  42. search_type = search_types.get(params.get('category'), '0')
  43. params['url'] = base_url +\
  44. search_url.format(query=urlencode({'query': query}),
  45. offset=offset,
  46. limit=number_of_results,
  47. search_type=search_type)
  48. if http_digest_auth_user and http_digest_auth_pass:
  49. params['auth'] = HTTPDigestAuth(http_digest_auth_user, http_digest_auth_pass)
  50. # add language tag if specified
  51. if params['language'] != 'all':
  52. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  53. return params
  54. # get response from search-request
  55. def response(resp):
  56. results = []
  57. raw_search_results = loads(resp.text)
  58. # return empty array if there are no results
  59. if not raw_search_results:
  60. return []
  61. search_results = raw_search_results.get('channels', [])
  62. if len(search_results) == 0:
  63. return []
  64. for result in search_results[0].get('items', []):
  65. # parse image results
  66. if resp.search_params.get('category') == 'images':
  67. result_url = ''
  68. if 'url' in result:
  69. result_url = result['url']
  70. elif 'link' in result:
  71. result_url = result['link']
  72. else:
  73. continue
  74. # append result
  75. results.append({'url': result_url,
  76. 'title': result['title'],
  77. 'content': '',
  78. 'img_src': result['image'],
  79. 'template': 'images.html'})
  80. # parse general results
  81. else:
  82. publishedDate = parser.parse(result['pubDate'])
  83. # append result
  84. results.append({'url': result['link'],
  85. 'title': result['title'],
  86. 'content': html_to_text(result['description']),
  87. 'publishedDate': publishedDate})
  88. # TODO parse video, audio and file results
  89. return results