only_show_green_results.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Only show green hosted results"""
  3. import os
  4. import logging
  5. import sqlite3
  6. import requests
  7. from flask_babel import gettext
  8. try:
  9. from urllib.parse import urlparse
  10. except ImportError:
  11. from urlparse import urlparse
  12. logger = logging.getLogger(__name__)
  13. name = gettext('Only show green hosted results')
  14. description = gettext('Any results not being hosted on green infrastructure will be filtered')
  15. default_on = False
  16. preference_section = 'privacy'
  17. allow_api_connections = True
  18. database_name = "url2green.db"
  19. api_server = "https://api.thegreenwebfoundation.org"
  20. class GreenCheck:
  21. """Implement methods to check if a domain is part of the Green WEB"""
  22. def __init__(self):
  23. self.db = True # pylint: disable=invalid-name
  24. try:
  25. self.db = bool(os.stat(database_name))
  26. except Exception: # pylint: disable=broad-except
  27. self.db = False
  28. if self.db:
  29. logger.debug(
  30. "Database found at %s. Using it for lookups instead of the Greencheck API",
  31. database_name)
  32. return
  33. logger.debug("No database found at %s.", database_name)
  34. if allow_api_connections:
  35. logger.debug(
  36. "Falling back to the the Greencheck API, as 'allow_api_connections' is set to %s.",
  37. allow_api_connections)
  38. else:
  39. logger.debug(
  40. "filtering inactive: no database found at %s and 'allow_api_connections=%s'",
  41. database_name, allow_api_connections)
  42. def check_url(self, url=None):
  43. """Check a url passed in, and return a true or false result, based on whether
  44. the domain is marked as a one running on green energy."""
  45. logger.debug(url)
  46. parsed_domain = urlparse(url).hostname
  47. ret_val = False
  48. if parsed_domain:
  49. logger.debug("Checking %s, parsed from %s", parsed_domain, url)
  50. if self.db:
  51. ret_val = self.check_in_db(parsed_domain)
  52. elif allow_api_connections:
  53. ret_val = self.check_against_api(parsed_domain)
  54. return ret_val
  55. def check_in_db(self, domain=None): # pylint: disable=no-self-use
  56. """Checks wether ``domain`` is in the green database
  57. We basically treat the the sqlite database like an immutable, read-only
  58. datastructure. This allows multiple concurrent connections as no state
  59. is ever being changed - only read with SELECT
  60. - https://docs.python.org/3.8/library/sqlite3.html#//apple_ref/Function/sqlite3.connect
  61. - https://sqlite.org/lockingv3.html
  62. """
  63. with sqlite3.connect(
  64. "file:{}?mode=ro".format(database_name),
  65. uri=True,
  66. check_same_thread=False
  67. ) as con:
  68. cur = con.cursor()
  69. cur.execute("SELECT green FROM green_presenting WHERE url=? LIMIT 1",
  70. [domain])
  71. res = cur.fetchone()
  72. logger.debug(res)
  73. return bool(res)
  74. def check_against_api(self, domain=None): # pylint: disable=no-self-use
  75. """Checks ``domain`` against https://api.thegreenwebfoundation.org API"""
  76. api_url = "{}/greencheck/{}".format(api_server, domain)
  77. logger.debug(api_url)
  78. response = requests.get(api_url).json()
  79. return bool(response.get("green"))
  80. GC = GreenCheck()
  81. def post_search(request, search): # pylint: disable=unused-argument
  82. """Filter searx results."""
  83. # pylint: disable=protected-access
  84. green_results = [
  85. result for result in list(search.result_container._merged_results)
  86. if GC.check_url(result.get('url'))
  87. ]
  88. search.result_container._merged_results = green_results