gigablast.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. """
  2. Gigablast (Web)
  3. @website https://gigablast.com
  4. @provide-api yes (https://gigablast.com/api.html)
  5. @using-api yes
  6. @results XML
  7. @stable yes
  8. @parse url, title, content
  9. """
  10. from json import loads
  11. from random import randint
  12. from time import time
  13. from urllib import urlencode
  14. from requests import get
  15. from lxml.html import fromstring
  16. # engine dependent config
  17. categories = ['general']
  18. paging = True
  19. number_of_results = 10
  20. language_support = True
  21. safesearch = True
  22. # search-url
  23. base_url = 'https://gigablast.com/'
  24. search_string = 'search?{query}'\
  25. '&n={number_of_results}'\
  26. '&c=main'\
  27. '&s={offset}'\
  28. '&format=json'\
  29. '&qh=0'\
  30. '&qlang={lang}'\
  31. '&ff={safesearch}'\
  32. '&rxikd={rxikd}' # random number - 9 digits
  33. # specific xpath variables
  34. results_xpath = '//response//result'
  35. url_xpath = './/url'
  36. title_xpath = './/title'
  37. content_xpath = './/sum'
  38. supported_languages_url = 'https://gigablast.com/search?&rxikd=1'
  39. # do search-request
  40. def request(query, params):
  41. offset = (params['pageno'] - 1) * number_of_results
  42. if params['language'] == 'all':
  43. language = 'xx'
  44. else:
  45. language = params['language'].replace('-', '_').lower()
  46. if language.split('-')[0] != 'zh':
  47. language = language.split('-')[0]
  48. if params['safesearch'] >= 1:
  49. safesearch = 1
  50. else:
  51. safesearch = 0
  52. search_path = search_string.format(query=urlencode({'q': query}),
  53. offset=offset,
  54. number_of_results=number_of_results,
  55. rxikd=str(time())[:9],
  56. lang=language,
  57. safesearch=safesearch)
  58. params['url'] = base_url + search_path
  59. return params
  60. # get response from search-request
  61. def response(resp):
  62. results = []
  63. # parse results
  64. response_json = loads(resp.text)
  65. for result in response_json['results']:
  66. # append result
  67. results.append({'url': result['url'],
  68. 'title': result['title'],
  69. 'content': result['sum']})
  70. # return results
  71. return results
  72. # get supported languages from their site
  73. def fetch_supported_languages():
  74. supported_languages = []
  75. response = get(supported_languages_url)
  76. dom = fromstring(response.text)
  77. links = dom.xpath('//span[@id="menu2"]/a')
  78. for link in links:
  79. code = link.xpath('./@href')[0][-2:]
  80. if code != 'xx' and code not in supported_languages:
  81. supported_languages.append(code)
  82. return supported_languages