gigablast.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. """
  2. Gigablast (Web)
  3. @website https://gigablast.com
  4. @provide-api yes (https://gigablast.com/api.html)
  5. @using-api yes
  6. @results XML
  7. @stable yes
  8. @parse url, title, content
  9. """
  10. import random
  11. from json import loads
  12. from time import time
  13. from lxml.html import fromstring
  14. from searx.poolrequests import get
  15. from searx.url_utils import urlencode
  16. from searx.utils import eval_xpath
  17. # engine dependent config
  18. categories = ['general']
  19. paging = True
  20. number_of_results = 10
  21. language_support = True
  22. safesearch = True
  23. # search-url
  24. base_url = 'https://gigablast.com/'
  25. search_string = 'search?{query}'\
  26. '&n={number_of_results}'\
  27. '&c=main'\
  28. '&s={offset}'\
  29. '&format=json'\
  30. '&langcountry={lang}'\
  31. '&ff={safesearch}'\
  32. '&rand={rxikd}'
  33. # specific xpath variables
  34. results_xpath = '//response//result'
  35. url_xpath = './/url'
  36. title_xpath = './/title'
  37. content_xpath = './/sum'
  38. supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
  39. extra_param = '' # gigablast requires a random extra parameter
  40. # which can be extracted from the source code of the search page
  41. def parse_extra_param(text):
  42. global extra_param
  43. param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')]
  44. extra_param = ''
  45. for l in param_lines:
  46. extra_param += l.split("'")[1]
  47. extra_param = extra_param.split('&')[-1]
  48. def init(engine_settings=None):
  49. parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text)
  50. # do search-request
  51. def request(query, params):
  52. print("EXTRAPARAM:", extra_param)
  53. offset = (params['pageno'] - 1) * number_of_results
  54. if params['language'] == 'all':
  55. language = 'xx'
  56. else:
  57. language = params['language'].replace('-', '_').lower()
  58. if language.split('-')[0] != 'zh':
  59. language = language.split('-')[0]
  60. if params['safesearch'] >= 1:
  61. safesearch = 1
  62. else:
  63. safesearch = 0
  64. # rxieu is some kind of hash from the search query, but accepts random atm
  65. search_path = search_string.format(query=urlencode({'q': query}),
  66. offset=offset,
  67. number_of_results=number_of_results,
  68. lang=language,
  69. rxikd=int(time() * 1000),
  70. safesearch=safesearch)
  71. params['url'] = base_url + search_path + '&' + extra_param
  72. return params
  73. # get response from search-request
  74. def response(resp):
  75. results = []
  76. # parse results
  77. try:
  78. response_json = loads(resp.text)
  79. except:
  80. parse_extra_param(resp.text)
  81. raise Exception('extra param expired, please reload')
  82. for result in response_json['results']:
  83. # append result
  84. results.append({'url': result['url'],
  85. 'title': result['title'],
  86. 'content': result['sum']})
  87. # return results
  88. return results
  89. # get supported languages from their site
  90. def _fetch_supported_languages(resp):
  91. supported_languages = []
  92. dom = fromstring(resp.text)
  93. links = eval_xpath(dom, '//span[@id="menu2"]/a')
  94. for link in links:
  95. href = eval_xpath(link, './@href')[0].split('lang%3A')
  96. if len(href) == 2:
  97. code = href[1].split('_')
  98. if len(code) == 2:
  99. code = code[0] + '-' + code[1].upper()
  100. else:
  101. code = code[0]
  102. supported_languages.append(code)
  103. return supported_languages