google_images.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """This is the implementation of the Google Images engine using the internal
  3. Google API used by the Google Go Android app.
  4. This internal API offer results in
  5. - JSON (``_fmt:json``)
  6. - Protobuf_ (``_fmt:pb``)
  7. - Protobuf_ compressed? (``_fmt:pc``)
  8. - HTML (``_fmt:html``)
  9. - Protobuf_ encoded in JSON (``_fmt:jspb``).
  10. .. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
  11. """
  12. from typing import TYPE_CHECKING
  13. from urllib.parse import urlencode
  14. from json import loads
  15. from searx.engines.google import fetch_traits # pylint: disable=unused-import
  16. from searx.engines.google import (
  17. get_google_info,
  18. time_range_dict,
  19. detect_google_sorry,
  20. )
  21. if TYPE_CHECKING:
  22. import logging
  23. from searx.enginelib.traits import EngineTraits
  24. logger: logging.Logger
  25. traits: EngineTraits
  26. # about
  27. about = {
  28. "website": 'https://images.google.com',
  29. "wikidata_id": 'Q521550',
  30. "official_api_documentation": 'https://developers.google.com/custom-search',
  31. "use_official_api": False,
  32. "require_api_key": False,
  33. "results": 'JSON',
  34. }
  35. # engine dependent config
  36. categories = ['images', 'web']
  37. paging = True
  38. max_page = 50
  39. time_range_support = True
  40. safesearch = True
  41. send_accept_language_header = True
  42. filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
  43. def request(query, params):
  44. """Google-Image search request"""
  45. google_info = get_google_info(params, traits)
  46. query_url = (
  47. 'https://'
  48. + google_info['subdomain']
  49. + '/search'
  50. + "?"
  51. + urlencode(
  52. {
  53. 'q': query,
  54. 'tbm': "isch",
  55. **google_info['params'],
  56. 'asearch': 'isch',
  57. 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
  58. }
  59. )
  60. )
  61. if params['time_range'] in time_range_dict:
  62. query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
  63. if params['safesearch']:
  64. query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
  65. params['url'] = query_url
  66. params['cookies'] = google_info['cookies']
  67. params['headers'].update(google_info['headers'])
  68. return params
  69. def response(resp):
  70. """Get response from google's search request"""
  71. results = []
  72. detect_google_sorry(resp)
  73. json_start = resp.text.find('{"ischj":')
  74. json_data = loads(resp.text[json_start:])
  75. for item in json_data["ischj"].get("metadata", []):
  76. result_item = {
  77. 'url': item["result"]["referrer_url"],
  78. 'title': item["result"]["page_title"],
  79. 'content': item["text_in_grid"]["snippet"],
  80. 'source': item["result"]["site_title"],
  81. 'resolution': f'{item["original_image"]["width"]} x {item["original_image"]["height"]}',
  82. 'img_src': item["original_image"]["url"],
  83. 'thumbnail_src': item["thumbnail"]["url"],
  84. 'template': 'images.html',
  85. }
  86. author = item["result"].get('iptc', {}).get('creator')
  87. if author:
  88. result_item['author'] = ', '.join(author)
  89. copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
  90. if copyright_notice:
  91. result_item['source'] += ' | ' + copyright_notice
  92. freshness_date = item["result"].get("freshness_date")
  93. if freshness_date:
  94. result_item['source'] += ' | ' + freshness_date
  95. file_size = item.get('gsa', {}).get('file_size')
  96. if file_size:
  97. result_item['source'] += ' (%s)' % file_size
  98. results.append(result_item)
  99. return results