google_images.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """This is the implementation of the google images engine using the google internal API used the Google Go Android app.
  4. This internal API offer results in
  5. - JSON (_fmt:json)
  6. - Protobuf (_fmt:pb)
  7. - Protobuf compressed? (_fmt:pc)
  8. - HTML (_fmt:html)
  9. - Protobuf encoded in JSON (_fmt:jspb).
  10. .. admonition:: Content-Security-Policy (CSP)
  11. This engine needs to allow images from the `data URLs`_ (prefixed with the
  12. ``data:`` scheme)::
  13. Header set Content-Security-Policy "img-src 'self' data: ;"
  14. .. _data URLs:
  15. https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
  16. """
  17. from urllib.parse import urlencode
  18. from json import loads
  19. from searx.engines.google import (
  20. get_lang_info,
  21. time_range_dict,
  22. detect_google_sorry,
  23. )
  24. # pylint: disable=unused-import
  25. from searx.engines.google import supported_languages_url, _fetch_supported_languages
  26. # pylint: enable=unused-import
  27. # about
  28. about = {
  29. "website": 'https://images.google.com',
  30. "wikidata_id": 'Q521550',
  31. "official_api_documentation": 'https://developers.google.com/custom-search',
  32. "use_official_api": False,
  33. "require_api_key": False,
  34. "results": 'JSON',
  35. }
  36. # engine dependent config
  37. categories = ['images', 'web']
  38. paging = True
  39. use_locale_domain = True
  40. time_range_support = True
  41. safesearch = True
  42. send_accept_language_header = True
  43. filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
  44. def request(query, params):
  45. """Google-Image search request"""
  46. lang_info = get_lang_info(params, supported_languages, language_aliases, False)
  47. query_url = (
  48. 'https://'
  49. + lang_info['subdomain']
  50. + '/search'
  51. + "?"
  52. + urlencode(
  53. {
  54. 'q': query,
  55. 'tbm': "isch",
  56. **lang_info['params'],
  57. 'ie': "utf8",
  58. 'oe': "utf8",
  59. 'asearch': 'isch',
  60. 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
  61. }
  62. )
  63. )
  64. if params['time_range'] in time_range_dict:
  65. query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
  66. if params['safesearch']:
  67. query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
  68. params['url'] = query_url
  69. params['headers'].update(lang_info['headers'])
  70. params['headers']['User-Agent'] = 'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12; US) gzip'
  71. params['headers']['Accept'] = '*/*'
  72. return params
  73. def response(resp):
  74. """Get response from google's search request"""
  75. results = []
  76. detect_google_sorry(resp)
  77. response_2nd_line = resp.text.split("\n", 1)[1]
  78. json_data = loads(response_2nd_line)["ischj"]
  79. for item in json_data["metadata"]:
  80. results.append(
  81. {
  82. 'url': item["result"]["referrer_url"],
  83. 'title': item["result"]["page_title"],
  84. 'content': item["text_in_grid"]["snippet"],
  85. 'source': item["result"]["site_title"],
  86. 'format': f'{item["original_image"]["width"]} x item["original_image"]["height"]',
  87. 'img_src': item["original_image"]["url"],
  88. 'thumbnail_src': item["thumbnail"]["url"],
  89. 'template': 'images.html',
  90. }
  91. )
  92. return results