gigablast.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. """
  2. Gigablast (Web)
  3. @website https://gigablast.com
  4. @provide-api yes (https://gigablast.com/api.html)
  5. @using-api yes
  6. @results XML
  7. @stable yes
  8. @parse url, title, content
  9. """
  10. from urllib import urlencode
  11. from cgi import escape
  12. from lxml import etree
  13. from random import randint
  14. from time import time
  15. # engine dependent config
  16. categories = ['general']
  17. paging = True
  18. number_of_results = 10
  19. language_support = True
  20. safesearch = True
  21. # search-url
  22. base_url = 'https://gigablast.com/'
  23. search_string = 'search?{query}'\
  24. '&n={number_of_results}'\
  25. '&s={offset}'\
  26. '&format=xml'\
  27. '&qh=0'\
  28. '&rxiyd={rxiyd}'\
  29. '&rand={rand}'\
  30. '&qlang={lang}'\
  31. '&ff={safesearch}'
  32. # specific xpath variables
  33. results_xpath = '//response//result'
  34. url_xpath = './/url'
  35. title_xpath = './/title'
  36. content_xpath = './/sum'
  37. # do search-request
  38. def request(query, params):
  39. offset = (params['pageno'] - 1) * number_of_results
  40. if params['language'] == 'all':
  41. language = 'xx'
  42. else:
  43. language = params['language'][0:2]
  44. if params['safesearch'] >= 1:
  45. safesearch = 1
  46. else:
  47. safesearch = 0
  48. search_path = search_string.format(query=urlencode({'q': query}),
  49. offset=offset,
  50. number_of_results=number_of_results,
  51. rxiyd=randint(10000, 10000000),
  52. rand=int(time()),
  53. lang=language,
  54. safesearch=safesearch)
  55. params['url'] = base_url + search_path
  56. return params
  57. # get response from search-request
  58. def response(resp):
  59. results = []
  60. dom = etree.fromstring(resp.content)
  61. # parse results
  62. for result in dom.xpath(results_xpath):
  63. url = result.xpath(url_xpath)[0].text
  64. title = result.xpath(title_xpath)[0].text
  65. content = escape(result.xpath(content_xpath)[0].text)
  66. # append result
  67. results.append({'url': url,
  68. 'title': title,
  69. 'content': content})
  70. # return results
  71. return results