gigablast.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. """
  2. Gigablast (Web)
  3. @website https://gigablast.com
  4. @provide-api yes (https://gigablast.com/api.html)
  5. @using-api yes
  6. @results XML
  7. @stable yes
  8. @parse url, title, content
  9. """
  10. from cgi import escape
  11. from json import loads
  12. from random import randint
  13. from time import time
  14. from urllib import urlencode
  15. # engine dependent config
  16. categories = ['general']
  17. paging = True
  18. number_of_results = 10
  19. language_support = True
  20. safesearch = True
  21. # search-url
  22. base_url = 'https://gigablast.com/'
  23. search_string = 'search?{query}'\
  24. '&n={number_of_results}'\
  25. '&c=main'\
  26. '&s={offset}'\
  27. '&format=json'\
  28. '&qh=0'\
  29. '&qlang={lang}'\
  30. '&ff={safesearch}'\
  31. '&rxikd={rxikd}' # random number - 9 digits
  32. # specific xpath variables
  33. results_xpath = '//response//result'
  34. url_xpath = './/url'
  35. title_xpath = './/title'
  36. content_xpath = './/sum'
  37. # do search-request
  38. def request(query, params):
  39. offset = (params['pageno'] - 1) * number_of_results
  40. if params['language'] == 'all':
  41. language = 'xx'
  42. else:
  43. language = params['language'][0:2]
  44. if params['safesearch'] >= 1:
  45. safesearch = 1
  46. else:
  47. safesearch = 0
  48. search_path = search_string.format(query=urlencode({'q': query}),
  49. offset=offset,
  50. number_of_results=number_of_results,
  51. rxikd=str(time())[:9],
  52. lang=language,
  53. safesearch=safesearch)
  54. params['url'] = base_url + search_path
  55. return params
  56. # get response from search-request
  57. def response(resp):
  58. results = []
  59. # parse results
  60. response_json = loads(resp.text)
  61. for result in response_json['results']:
  62. # append result
  63. results.append({'url': result['url'],
  64. 'title': escape(result['title']),
  65. 'content': escape(result['sum'])})
  66. # return results
  67. return results