gigablast.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. """
  2. Gigablast (Web)
  3. @website https://gigablast.com
  4. @provide-api yes (https://gigablast.com/api.html)
  5. @using-api yes
  6. @results XML
  7. @stable yes
  8. @parse url, title, content
  9. """
  10. from json import loads
  11. from random import randint
  12. from time import time
  13. from urllib import urlencode
  14. from lxml.html import fromstring
  15. # engine dependent config
  16. categories = ['general']
  17. paging = True
  18. number_of_results = 10
  19. language_support = True
  20. safesearch = True
  21. # search-url
  22. base_url = 'https://gigablast.com/'
  23. search_string = 'search?{query}'\
  24. '&n={number_of_results}'\
  25. '&c=main'\
  26. '&s={offset}'\
  27. '&format=json'\
  28. '&qh=0'\
  29. '&qlang={lang}'\
  30. '&ff={safesearch}'\
  31. '&rxikd={rxikd}' # random number - 9 digits
  32. # specific xpath variables
  33. results_xpath = '//response//result'
  34. url_xpath = './/url'
  35. title_xpath = './/title'
  36. content_xpath = './/sum'
  37. supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
  38. # do search-request
  39. def request(query, params):
  40. offset = (params['pageno'] - 1) * number_of_results
  41. if params['language'] == 'all':
  42. language = 'xx'
  43. else:
  44. language = params['language'].replace('-', '_').lower()
  45. if language.split('-')[0] != 'zh':
  46. language = language.split('-')[0]
  47. if params['safesearch'] >= 1:
  48. safesearch = 1
  49. else:
  50. safesearch = 0
  51. search_path = search_string.format(query=urlencode({'q': query}),
  52. offset=offset,
  53. number_of_results=number_of_results,
  54. rxikd=str(time())[:9],
  55. lang=language,
  56. safesearch=safesearch)
  57. params['url'] = base_url + search_path
  58. return params
  59. # get response from search-request
  60. def response(resp):
  61. results = []
  62. # parse results
  63. response_json = loads(resp.text)
  64. for result in response_json['results']:
  65. # append result
  66. results.append({'url': result['url'],
  67. 'title': result['title'],
  68. 'content': result['sum']})
  69. # return results
  70. return results
  71. # get supported languages from their site
  72. def _fetch_supported_languages(resp):
  73. supported_languages = []
  74. dom = fromstring(resp.text)
  75. links = dom.xpath('//span[@id="menu2"]/a')
  76. for link in links:
  77. href = link.xpath('./@href')[0].split('lang%3A')
  78. if len(href) == 2:
  79. code = href[1].split('_')
  80. if len(code) == 2:
  81. code = code[0] + '-' + code[1].upper()
  82. else:
  83. code = code[0]
  84. supported_languages.append(code)
  85. return supported_languages