__init__.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. from os.path import realpath, dirname, splitext, join
  2. from os import listdir
  3. from imp import load_source
  4. import grequests
  5. from itertools import izip_longest, chain
  6. from operator import itemgetter
  7. engine_dir = dirname(realpath(__file__))
  8. engines = {}
  9. for filename in listdir(engine_dir):
  10. modname = splitext(filename)[0]
  11. if filename.startswith('_') or not filename.endswith('.py'):
  12. continue
  13. filepath = join(engine_dir, filename)
  14. engine = load_source(modname, filepath)
  15. if not hasattr(engine, 'request') or not hasattr(engine, 'response'):
  16. continue
  17. engines[modname] = engine
  18. def default_request_params():
  19. return {'method': 'GET', 'headers': {}, 'data': {}, 'url': ''}
  20. def make_callback(engine_name, results, callback):
  21. def process_callback(response, **kwargs):
  22. cb_res = []
  23. for result in callback(response):
  24. result['engine'] = engine_name
  25. cb_res.append(result)
  26. results[engine_name] = cb_res
  27. return process_callback
  28. def search(query, request, selected_engines):
  29. global engines
  30. requests = []
  31. results = {}
  32. user_agent = request.headers.get('User-Agent', '')
  33. for ename, engine in engines.items():
  34. if ename not in selected_engines:
  35. continue
  36. headers = default_request_params()
  37. headers['User-Agent'] = user_agent
  38. request_params = engine.request(query, headers)
  39. callback = make_callback(ename, results, engine.response)
  40. if request_params['method'] == 'GET':
  41. req = grequests.get(request_params['url']
  42. ,headers=headers
  43. ,hooks=dict(response=callback)
  44. )
  45. else:
  46. req = grequests.post(request_params['url']
  47. ,data=request_params['data']
  48. ,headers=headers
  49. ,hooks=dict(response=callback)
  50. )
  51. requests.append(req)
  52. grequests.map(requests)
  53. flat_res = list(filter(None, chain(*izip_longest(*results.values()))))
  54. flat_len = len(flat_res)
  55. results = []
  56. # deduplication + scoring
  57. for i,res in enumerate(flat_res):
  58. score = flat_len - i
  59. duplicated = False
  60. for new_res in results:
  61. if res['url'] == new_res['url']:
  62. duplicated = new_res
  63. break
  64. if duplicated:
  65. if len(res['content']) > len(duplicated):
  66. duplicated['content'] = res['content']
  67. duplicated['score'] += score
  68. else:
  69. res['score'] = score
  70. results.append(res)
  71. return sorted(results, key=itemgetter('score'), reverse=True)