yacy.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. ## Yacy (Web, Images, Videos, Music, Files)
  2. #
  3. # @website http://yacy.net
  4. # @provide-api yes (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
  5. #
  6. # @using-api yes
  7. # @results JSON
  8. # @stable yes
  9. # @parse (general) url, title, content, publishedDate
  10. # @parse (images) url, title, img_src
  11. #
  12. # @todo parse video, audio and file results
  13. from json import loads
  14. from urllib import urlencode
  15. from dateutil import parser
  16. # engine dependent config
  17. categories = ['general', 'images'] #TODO , 'music', 'videos', 'files'
  18. paging = True
  19. language_support = True
  20. number_of_results = 5
  21. # search-url
  22. base_url = 'http://localhost:8090'
  23. search_url = '/yacysearch.json?{query}&startRecord={offset}&maximumRecords={limit}&contentdom={search_type}&resource=global'
  24. # yacy specific type-definitions
  25. search_types = {'general': 'text',
  26. 'images': 'image',
  27. 'files': 'app',
  28. 'music': 'audio',
  29. 'videos': 'video'}
  30. # do search-request
  31. def request(query, params):
  32. offset = (params['pageno'] - 1) * number_of_results
  33. search_type = search_types.get(params['category'], '0')
  34. params['url'] = base_url + search_url.format(query=urlencode({'query': query}),
  35. offset=offset,
  36. limit=number_of_results,
  37. search_type=search_type)
  38. # add language tag if specified
  39. if params['language'] != 'all':
  40. params['url'] += '&lr=lang_' + params['language'].split('_')[0]
  41. return params
  42. # get response from search-request
  43. def response(resp):
  44. results = []
  45. raw_search_results = loads(resp.text)
  46. # return empty array if there are no results
  47. if not raw_search_results:
  48. return []
  49. search_results = raw_search_results.get('channels', {})[0].get('items', [])
  50. if resp.search_params['category'] == 'general':
  51. # parse general results
  52. for result in search_results:
  53. publishedDate = parser.parse(result['pubDate'])
  54. # append result
  55. results.append({'url': result['link'],
  56. 'title': result['title'],
  57. 'content': result['description'],
  58. 'publishedDate': publishedDate})
  59. elif resp.search_params['category'] == 'images':
  60. # parse image results
  61. for result in search_results:
  62. # append result
  63. results.append({'url': result['url'],
  64. 'title': result['title'],
  65. 'content': '',
  66. 'img_src': result['image'],
  67. 'template': 'images.html'})
  68. #TODO parse video, audio and file results
  69. # return results
  70. return results