only_show_green_results.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Only show green hosted results"""
  3. import os
  4. import logging
  5. import sqlite3
  6. from urllib.parse import urlparse
  7. from flask_babel import gettext
  8. from searx import network
  9. logger = logging.getLogger(__name__)
  10. name = gettext('Only show green hosted results')
  11. description = gettext('Any results not being hosted on green infrastructure will be filtered')
  12. default_on = False
  13. preference_section = 'general'
  14. allow_api_connections = True
  15. database_name = "url2green.db"
  16. api_server = "https://api.thegreenwebfoundation.org"
  17. class GreenCheck:
  18. """Implement methods to check if a domain is part of the Green WEB"""
  19. def __init__(self):
  20. self.db = True # pylint: disable=invalid-name
  21. try:
  22. self.db = bool(os.stat(database_name))
  23. except Exception: # pylint: disable=broad-except
  24. self.db = False
  25. if self.db:
  26. logger.debug(
  27. "Database found at %s. Using it for lookups instead of the Greencheck API",
  28. database_name)
  29. return
  30. logger.warning("No database found at %s.", database_name)
  31. if allow_api_connections:
  32. logger.warning(
  33. "Falling back to the (much slower) Greencheck API, as 'allow_api_connections' is set to %s.",
  34. allow_api_connections)
  35. else:
  36. logger.debug(
  37. "filtering inactive: no database found at %s and 'allow_api_connections=%s'",
  38. database_name, allow_api_connections)
  39. def check_url(self, url=None):
  40. """Check a url passed in, and return a true or false result, based on whether
  41. the domain is marked as a one running on green energy."""
  42. logger.debug(url)
  43. parsed_domain = urlparse(url).hostname
  44. ret_val = False
  45. if parsed_domain:
  46. logger.debug("Checking %s, parsed from %s", parsed_domain, url)
  47. if self.db:
  48. ret_val = self.check_in_db(parsed_domain)
  49. elif allow_api_connections:
  50. ret_val = self.check_against_api(parsed_domain)
  51. return ret_val
  52. def check_in_db(self, domain=None): # pylint: disable=no-self-use
  53. """Checks wether ``domain`` is in the green database
  54. We basically treat the the sqlite database like an immutable, read-only
  55. datastructure. This allows multiple concurrent connections as no state
  56. is ever being changed - only read with SELECT
  57. - https://docs.python.org/3.8/library/sqlite3.html#//apple_ref/Function/sqlite3.connect
  58. - https://sqlite.org/lockingv3.html
  59. """
  60. with sqlite3.connect(
  61. "file:{}?mode=ro".format(database_name),
  62. uri=True,
  63. check_same_thread=False
  64. ) as con:
  65. cur = con.cursor()
  66. cur.execute("SELECT green FROM green_presenting WHERE url=? LIMIT 1",
  67. [domain])
  68. res = cur.fetchone()
  69. logger.debug(res)
  70. return bool(res)
  71. def check_against_api(self, domain=None): # pylint: disable=no-self-use
  72. """Checks ``domain`` against https://api.thegreenwebfoundation.org API"""
  73. api_url = "{}/greencheck/{}".format(api_server, domain)
  74. logger.debug(api_url)
  75. response = network.get(api_url).json()
  76. return bool(response.get("green"))
  77. GC = GreenCheck()
  78. def post_search(request, search): # pylint: disable=unused-argument
  79. """Filter searx results."""
  80. # pylint: disable=protected-access
  81. green_results = [
  82. result for result in list(search.result_container._merged_results)
  83. if GC.check_url(result.get('url'))
  84. ]
  85. search.result_container._merged_results = green_results