yacy.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. ## Yacy (Web, Images, Videos, Music, Files)
  2. #
  3. # @website http://yacy.net
  4. # @provide-api yes
  5. # (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
  6. #
  7. # @using-api yes
  8. # @results JSON
  9. # @stable yes
  10. # @parse (general) url, title, content, publishedDate
  11. # @parse (images) url, title, img_src
  12. #
  13. # @todo parse video, audio and file results
  14. from json import loads
  15. from urllib import urlencode
  16. from dateutil import parser
  17. # engine dependent config
  18. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  19. paging = True
  20. language_support = True
  21. number_of_results = 5
  22. # search-url
  23. base_url = 'http://localhost:8090'
  24. search_url = '/yacysearch.json?{query}&startRecord={offset}&maximumRecords={limit}&contentdom={search_type}&resource=global'
  25. # yacy specific type-definitions
  26. search_types = {'general': 'text',
  27. 'images': 'image',
  28. 'files': 'app',
  29. 'music': 'audio',
  30. 'videos': 'video'}
  31. # do search-request
  32. def request(query, params):
  33. offset = (params['pageno'] - 1) * number_of_results
  34. search_type = search_types.get(params['category'], '0')
  35. params['url'] = base_url + search_url.format(query=urlencode({'query': query}),
  36. offset=offset,
  37. limit=number_of_results,
  38. search_type=search_type)
  39. # add language tag if specified
  40. if params['language'] != 'all':
  41. params['url'] += '&lr=lang_' + params['language'].split('_')[0]
  42. return params
  43. # get response from search-request
  44. def response(resp):
  45. results = []
  46. raw_search_results = loads(resp.text)
  47. # return empty array if there are no results
  48. if not raw_search_results:
  49. return []
  50. search_results = raw_search_results.get('channels', {})[0].get('items', [])
  51. if resp.search_params['category'] == 'general':
  52. # parse general results
  53. for result in search_results:
  54. publishedDate = parser.parse(result['pubDate'])
  55. # append result
  56. results.append({'url': result['link'],
  57. 'title': result['title'],
  58. 'content': result['description'],
  59. 'publishedDate': publishedDate})
  60. elif resp.search_params['category'] == 'images':
  61. # parse image results
  62. for result in search_results:
  63. # append result
  64. results.append({'url': result['url'],
  65. 'title': result['title'],
  66. 'content': '',
  67. 'img_src': result['image'],
  68. 'template': 'images.html'})
  69. #TODO parse video, audio and file results
  70. # return results
  71. return results