only_show_green_results.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. '''
  2. SPDX-License-Identifier: AGPL-3.0-or-later
  3. '''
  4. from flask_babel import gettext
  5. import re
  6. from searx.url_utils import urlunparse, parse_qsl, urlencode
  7. import requests
  8. import logging
  9. import os
  10. import sqlite3
  11. from urllib.parse import urlparse
  12. logger = logging.getLogger(__name__)
  13. name = gettext('Only show green hosted results')
  14. description = gettext('Any results not being hosted on green infrastructure will be filtered')
  15. default_on = True
  16. preference_section = 'privacy'
  17. allow_api_connections = True
  18. database_name = "url2green.db"
  19. class GreenCheck:
  20. def __init__(self):
  21. try:
  22. self.db = bool(os.stat(database_name))
  23. logger.debug(f"Database found at {database_name}. Using it for lookups instead of the Greencheck API")
  24. except FileNotFoundError:
  25. self.db = False
  26. if allow_api_connections:
  27. logger.debug(f"No database found at {database_name}.")
  28. logger.debug(
  29. (f"Falling back to the instead of the Greencheck API, as ",
  30. "'allow_api_connections' is set to {allow_api_connections}.")
  31. )
  32. else:
  33. logger.debug(
  34. f("No database found at {database_name}. Not making any checks ",
  35. "because 'allow_api_connections' is set to {allow_api_connections}")
  36. )
  37. def check_url(self, url=None) -> bool:
  38. """
  39. Check a url passed in, and return a true or false result,
  40. based on whether the domain is marked as a one running on
  41. green energy.
  42. """
  43. try:
  44. parsed_domain = self.get_domain_from_url(url)
  45. except Exception as e:
  46. logger.exception(f"unable to parse url: {url}")
  47. if parsed_domain:
  48. logger.debug(f"Checking {parsed_domain}, parsed from {url}")
  49. if self.db:
  50. return self.check_in_db(parsed_domain)
  51. else:
  52. if allow_api_connections:
  53. return self.check_against_api(parsed_domain)
  54. else:
  55. return False
  56. def get_domain_from_url(self, url=None):
  57. return urlparse(url).hostname
  58. def check_in_db(self, domain=None):
  59. # we basically treat the the sqlite database like an immutable,
  60. # read-only datastructure. This allows multiple concurrent
  61. # connections as no state is ever being changed - only read with SELECT
  62. # https://docs.python.org/3.8/library/sqlite3.html#//apple_ref/Function/sqlite3.connect
  63. # https://sqlite.org/lockingv3.html
  64. with sqlite3.connect(
  65. f"file:{database_name}?mode=ro",
  66. uri=True,
  67. check_same_thread=False
  68. ) as con:
  69. cur = con.cursor()
  70. cur.execute("SELECT green FROM green_presenting WHERE url=? LIMIT 1",
  71. [domain])
  72. res = cur.fetchone()
  73. logger.debug(res)
  74. return bool(res)
  75. def check_against_api(self, domain=None):
  76. api_server = "https://api.thegreenwebfoundation.org"
  77. api_url = f"{api_server}/greencheck/{domain}"
  78. logger.debug(api_url)
  79. response = requests.get(api_url).json()
  80. if response.get("green"):
  81. return True
  82. greencheck = GreenCheck()
  83. # attach callback to the post search hook
  84. # request: flask request object
  85. # ctx: the whole local context of the pre search hook
  86. def post_search(request, search):
  87. green_results = [
  88. entry for entry in list(search.result_container._merged_results)
  89. if get_green(entry)
  90. ]
  91. search.result_container._merged_results = green_results
  92. return True
  93. def get_green(result):
  94. logger.debug(result.get('url'))
  95. green = greencheck.check_url(result.get('url'))
  96. if green:
  97. return True