yacy.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. # Yacy (Web, Images, Videos, Music, Files)
  2. #
  3. # @website http://yacy.net
  4. # @provide-api yes
  5. # (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
  6. #
  7. # @using-api yes
  8. # @results JSON
  9. # @stable yes
  10. # @parse (general) url, title, content, publishedDate
  11. # @parse (images) url, title, img_src
  12. #
  13. # @todo parse video, audio and file results
  14. from json import loads
  15. from dateutil import parser
  16. from searx.url_utils import urlencode
  17. from searx.utils import html_to_text
  18. # engine dependent config
  19. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  20. paging = True
  21. language_support = True
  22. number_of_results = 5
  23. # search-url
  24. base_url = 'http://localhost:8090'
  25. search_url = '/yacysearch.json?{query}'\
  26. '&startRecord={offset}'\
  27. '&maximumRecords={limit}'\
  28. '&contentdom={search_type}'\
  29. '&resource=global'
  30. # yacy specific type-definitions
  31. search_types = {'general': 'text',
  32. 'images': 'image',
  33. 'files': 'app',
  34. 'music': 'audio',
  35. 'videos': 'video'}
  36. # do search-request
  37. def request(query, params):
  38. offset = (params['pageno'] - 1) * number_of_results
  39. search_type = search_types.get(params.get('category'), '0')
  40. params['url'] = base_url +\
  41. search_url.format(query=urlencode({'query': query}),
  42. offset=offset,
  43. limit=number_of_results,
  44. search_type=search_type)
  45. params['url'] += '&lr=lang_' + params['language'].split('-')[0]
  46. return params
  47. # get response from search-request
  48. def response(resp):
  49. results = []
  50. raw_search_results = loads(resp.text)
  51. # return empty array if there are no results
  52. if not raw_search_results:
  53. return []
  54. search_results = raw_search_results.get('channels', [])
  55. if len(search_results) == 0:
  56. return []
  57. for result in search_results[0].get('items', []):
  58. # parse image results
  59. if result.get('image'):
  60. # append result
  61. results.append({'url': result['url'],
  62. 'title': result['title'],
  63. 'content': '',
  64. 'img_src': result['image'],
  65. 'template': 'images.html'})
  66. # parse general results
  67. else:
  68. publishedDate = parser.parse(result['pubDate'])
  69. # append result
  70. results.append({'url': result['link'],
  71. 'title': result['title'],
  72. 'content': html_to_text(result['description']),
  73. 'publishedDate': publishedDate})
  74. # TODO parse video, audio and file results
  75. # return results
  76. return results