google_videos.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Google (Videos)
  4. """
  5. from datetime import date, timedelta
  6. from urllib.parse import urlencode
  7. from lxml import html
  8. from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
  9. import re
  10. # about
  11. about = {
  12. "website": 'https://www.google.com',
  13. "wikidata_id": 'Q219885',
  14. "official_api_documentation": 'https://developers.google.com/custom-search/',
  15. "use_official_api": False,
  16. "require_api_key": False,
  17. "results": 'HTML',
  18. }
  19. # engine dependent config
  20. categories = ['videos']
  21. paging = True
  22. safesearch = True
  23. time_range_support = True
  24. number_of_results = 10
  25. search_url = 'https://www.google.com/search'\
  26. '?q={query}'\
  27. '&tbm=vid'\
  28. '&{search_options}'
  29. time_range_attr = "qdr:{range}"
  30. time_range_custom_attr = "cdr:1,cd_min:{start},cd_max{end}"
  31. time_range_dict = {'day': 'd',
  32. 'week': 'w',
  33. 'month': 'm'}
  34. # do search-request
  35. def request(query, params):
  36. search_options = {
  37. 'ijn': params['pageno'] - 1,
  38. 'start': (params['pageno'] - 1) * number_of_results
  39. }
  40. if params['time_range'] in time_range_dict:
  41. search_options['tbs'] = time_range_attr.format(range=time_range_dict[params['time_range']])
  42. elif params['time_range'] == 'year':
  43. now = date.today()
  44. then = now - timedelta(days=365)
  45. start = then.strftime('%m/%d/%Y')
  46. end = now.strftime('%m/%d/%Y')
  47. search_options['tbs'] = time_range_custom_attr.format(start=start, end=end)
  48. if safesearch and params['safesearch']:
  49. search_options['safe'] = 'on'
  50. params['url'] = search_url.format(query=urlencode({'q': query}),
  51. search_options=urlencode(search_options))
  52. return params
  53. # get response from search-request
  54. def response(resp):
  55. results = []
  56. dom = html.fromstring(resp.text)
  57. # parse results
  58. for result in eval_xpath_list(dom, '//div[@class="g"]'):
  59. title = extract_text(eval_xpath(result, './/h3'))
  60. url = eval_xpath_getindex(result, './/div[@class="r"]/a/@href', 0)
  61. content = extract_text(eval_xpath(result, './/span[@class="st"]'))
  62. # get thumbnails
  63. script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
  64. ids = result.xpath('.//div[@class="s"]//img/@id')
  65. if len(ids) > 0:
  66. thumbnails_data = \
  67. re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + ids[0],
  68. script)
  69. tmp = []
  70. if len(thumbnails_data) != 0:
  71. tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
  72. thumbnail = ''
  73. if len(tmp) != 0:
  74. thumbnail = tmp[-1]
  75. # append result
  76. results.append({'url': url,
  77. 'title': title,
  78. 'content': content,
  79. 'thumbnail': thumbnail,
  80. 'template': 'videos.html'})
  81. return results