google_images.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """This is the implementation of the google images engine using the google
  4. internal API used the Google Go Android app.
  5. This internal API offer results in
  6. - JSON (_fmt:json)
  7. - Protobuf (_fmt:pb)
  8. - Protobuf compressed? (_fmt:pc)
  9. - HTML (_fmt:html)
  10. - Protobuf encoded in JSON (_fmt:jspb).
  11. """
  12. from urllib.parse import urlencode
  13. from json import loads
  14. from searx.engines.google import (
  15. get_lang_info,
  16. time_range_dict,
  17. detect_google_sorry,
  18. )
  19. # pylint: disable=unused-import
  20. from searx.engines.google import supported_languages_url, _fetch_supported_languages
  21. # pylint: enable=unused-import
  22. # about
  23. about = {
  24. "website": 'https://images.google.com',
  25. "wikidata_id": 'Q521550',
  26. "official_api_documentation": 'https://developers.google.com/custom-search',
  27. "use_official_api": False,
  28. "require_api_key": False,
  29. "results": 'JSON',
  30. }
  31. # engine dependent config
  32. categories = ['images', 'web']
  33. paging = True
  34. use_locale_domain = True
  35. time_range_support = True
  36. safesearch = True
  37. send_accept_language_header = True
  38. filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
  39. def request(query, params):
  40. """Google-Image search request"""
  41. lang_info = get_lang_info(params, supported_languages, language_aliases, False)
  42. query_url = (
  43. 'https://'
  44. + lang_info['subdomain']
  45. + '/search'
  46. + "?"
  47. + urlencode(
  48. {
  49. 'q': query,
  50. 'tbm': "isch",
  51. **lang_info['params'],
  52. 'ie': "utf8",
  53. 'oe': "utf8",
  54. 'asearch': 'isch',
  55. 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
  56. }
  57. )
  58. )
  59. if params['time_range'] in time_range_dict:
  60. query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
  61. if params['safesearch']:
  62. query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
  63. params['url'] = query_url
  64. params['headers'].update(lang_info['headers'])
  65. params['headers']['User-Agent'] = 'NSTN/3.60.474802233.release Dalvik/2.1.0 (Linux; U; Android 12; US) gzip'
  66. params['headers']['Accept'] = '*/*'
  67. return params
  68. def response(resp):
  69. """Get response from google's search request"""
  70. results = []
  71. detect_google_sorry(resp)
  72. json_start = resp.text.find('{"ischj":')
  73. json_data = loads(resp.text[json_start:])
  74. for item in json_data["ischj"]["metadata"]:
  75. result_item = {
  76. 'url': item["result"]["referrer_url"],
  77. 'title': item["result"]["page_title"],
  78. 'content': item["text_in_grid"]["snippet"],
  79. 'source': item["result"]["site_title"],
  80. 'img_format': f'{item["original_image"]["width"]} x {item["original_image"]["height"]}',
  81. 'img_src': item["original_image"]["url"],
  82. 'thumbnail_src': item["thumbnail"]["url"],
  83. 'template': 'images.html',
  84. }
  85. author = item["result"].get('iptc', {}).get('creator')
  86. if author:
  87. result_item['author'] = ', '.join(author)
  88. copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
  89. if copyright_notice:
  90. result_item['source'] += ' / ' + copyright_notice
  91. file_size = item.get('gsa', {}).get('file_size')
  92. if file_size:
  93. result_item['source'] += ' (%s)' % file_size
  94. results.append(result_item)
  95. return results