only_show_green_results.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. '''
  2. SPDX-License-Identifier: AGPL-3.0-or-later
  3. '''
  4. from flask_babel import gettext
  5. import re
  6. from searx.url_utils import urlunparse, parse_qsl, urlencode
  7. import requests
  8. import logging
  9. import os
  10. import sqlite3
  11. try:
  12. from urllib.parse import urlparse
  13. except ImportError:
  14. from urlparse import urlparse
  15. logger = logging.getLogger(__name__)
  16. name = gettext('Only show green hosted results')
  17. description = gettext('Any results not being hosted on green infrastructure will be filtered')
  18. default_on = False
  19. preference_section = 'privacy'
  20. allow_api_connections = True
  21. database_name = "url2green.db"
  22. class GreenCheck:
  23. def __init__(self):
  24. try:
  25. self.db = bool(os.stat(database_name))
  26. logger.debug(("Database found at {}. Using it for lookups "
  27. "instead of the Greencheck API".format(database_name)))
  28. except Exception:
  29. self.db = False
  30. if allow_api_connections:
  31. logger.debug("No database found at {}.".format(database_name))
  32. logger.debug(
  33. ("Falling back to the instead of the Greencheck API, as ",
  34. "'allow_api_connections' is set to {}.".format(allow_api_connections))
  35. )
  36. else:
  37. logger.debug(
  38. ("No database found at {database_name}. Not making any checks ".format(database_name),
  39. "because 'allow_api_connections' is set to {}".format(allow_api_connections))
  40. )
  41. def check_url(self, url=None):
  42. """
  43. Check a url passed in, and return a true or false result,
  44. based on whether the domain is marked as a one running on
  45. green energy.
  46. """
  47. try:
  48. parsed_domain = self.get_domain_from_url(url)
  49. except Exception as e:
  50. logger.exception("unable to parse url: {}".format(url))
  51. if parsed_domain:
  52. logger.debug("Checking {}, parsed from {}".format(parsed_domain, url))
  53. if self.db:
  54. return self.check_in_db(parsed_domain)
  55. else:
  56. if allow_api_connections:
  57. return self.check_against_api(parsed_domain)
  58. else:
  59. return False
  60. def get_domain_from_url(self, url=None):
  61. return urlparse(url).hostname
  62. def check_in_db(self, domain=None):
  63. # we basically treat the the sqlite database like an immutable,
  64. # read-only datastructure. This allows multiple concurrent
  65. # connections as no state is ever being changed - only read with SELECT
  66. # https://docs.python.org/3.8/library/sqlite3.html#//apple_ref/Function/sqlite3.connect
  67. # https://sqlite.org/lockingv3.html
  68. with sqlite3.connect(
  69. "file:{}?mode=ro".format(database_name),
  70. uri=True,
  71. check_same_thread=False
  72. ) as con:
  73. cur = con.cursor()
  74. cur.execute("SELECT green FROM green_presenting WHERE url=? LIMIT 1",
  75. [domain])
  76. res = cur.fetchone()
  77. logger.debug(res)
  78. return bool(res)
  79. def check_against_api(self, domain=None):
  80. api_server = "https://api.thegreenwebfoundation.org"
  81. api_url = "{}/greencheck/{}".format(api_server, domain)
  82. logger.debug(api_url)
  83. response = requests.get(api_url).json()
  84. if response.get("green"):
  85. return True
  86. greencheck = GreenCheck()
  87. # attach callback to the post search hook
  88. # request: flask request object
  89. # ctx: the whole local context of the pre search hook
  90. def post_search(request, search):
  91. green_results = [
  92. entry for entry in list(search.result_container._merged_results)
  93. if get_green(entry)
  94. ]
  95. search.result_container._merged_results = green_results
  96. return True
  97. def get_green(result):
  98. logger.debug(result.get('url'))
  99. green = greencheck.check_url(result.get('url'))
  100. if green:
  101. return True