google_images.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """This is the implementation of the Google Images engine using the internal
  4. Google API used by the Google Go Android app.
  5. This internal API offer results in
  6. - JSON (``_fmt:json``)
  7. - Protobuf_ (``_fmt:pb``)
  8. - Protobuf_ compressed? (``_fmt:pc``)
  9. - HTML (``_fmt:html``)
  10. - Protobuf_ encoded in JSON (``_fmt:jspb``).
  11. .. _Protobuf: https://en.wikipedia.org/wiki/Protocol_Buffers
  12. """
  13. from typing import TYPE_CHECKING
  14. from urllib.parse import urlencode
  15. from json import loads
  16. from searx.engines.google import fetch_traits # pylint: disable=unused-import
  17. from searx.engines.google import (
  18. get_google_info,
  19. time_range_dict,
  20. detect_google_sorry,
  21. )
  22. if TYPE_CHECKING:
  23. import logging
  24. from searx.enginelib.traits import EngineTraits
  25. logger: logging.Logger
  26. traits: EngineTraits
  27. # about
  28. about = {
  29. "website": 'https://images.google.com',
  30. "wikidata_id": 'Q521550',
  31. "official_api_documentation": 'https://developers.google.com/custom-search',
  32. "use_official_api": False,
  33. "require_api_key": False,
  34. "results": 'JSON',
  35. }
  36. # engine dependent config
  37. categories = ['images', 'web']
  38. paging = True
  39. max_page = 50
  40. time_range_support = True
  41. safesearch = True
  42. send_accept_language_header = True
  43. filter_mapping = {0: 'images', 1: 'active', 2: 'active'}
  44. def request(query, params):
  45. """Google-Image search request"""
  46. google_info = get_google_info(params, traits)
  47. query_url = (
  48. 'https://'
  49. + google_info['subdomain']
  50. + '/search'
  51. + "?"
  52. + urlencode(
  53. {
  54. 'q': query,
  55. 'tbm': "isch",
  56. **google_info['params'],
  57. 'asearch': 'isch',
  58. 'async': '_fmt:json,p:1,ijn:' + str(params['pageno']),
  59. }
  60. )
  61. )
  62. if params['time_range'] in time_range_dict:
  63. query_url += '&' + urlencode({'tbs': 'qdr:' + time_range_dict[params['time_range']]})
  64. if params['safesearch']:
  65. query_url += '&' + urlencode({'safe': filter_mapping[params['safesearch']]})
  66. params['url'] = query_url
  67. params['cookies'] = google_info['cookies']
  68. params['headers'].update(google_info['headers'])
  69. return params
  70. def response(resp):
  71. """Get response from google's search request"""
  72. results = []
  73. detect_google_sorry(resp)
  74. json_start = resp.text.find('{"ischj":')
  75. json_data = loads(resp.text[json_start:])
  76. for item in json_data["ischj"].get("metadata", []):
  77. result_item = {
  78. 'url': item["result"]["referrer_url"],
  79. 'title': item["result"]["page_title"],
  80. 'content': item["text_in_grid"]["snippet"],
  81. 'source': item["result"]["site_title"],
  82. 'img_format': f'{item["original_image"]["width"]} x {item["original_image"]["height"]}',
  83. 'img_src': item["original_image"]["url"],
  84. 'thumbnail_src': item["thumbnail"]["url"],
  85. 'template': 'images.html',
  86. }
  87. author = item["result"].get('iptc', {}).get('creator')
  88. if author:
  89. result_item['author'] = ', '.join(author)
  90. copyright_notice = item["result"].get('iptc', {}).get('copyright_notice')
  91. if copyright_notice:
  92. result_item['source'] += ' | ' + copyright_notice
  93. freshness_date = item["result"].get("freshness_date")
  94. if freshness_date:
  95. result_item['source'] += ' | ' + freshness_date
  96. file_size = item.get('gsa', {}).get('file_size')
  97. if file_size:
  98. result_item['source'] += ' (%s)' % file_size
  99. results.append(result_item)
  100. return results