only_show_green_results.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. '''
  2. SPDX-License-Identifier: AGPL-3.0-or-later
  3. '''
  4. from flask_babel import gettext
  5. import re
  6. from searx.url_utils import urlunparse, parse_qsl, urlencode
  7. import requests
  8. import logging
  9. import os
  10. import sqlite3
  11. from urllib.parse import urlparse
  12. logger = logging.getLogger(__name__)
  13. name = gettext('Only show green hosted results')
  14. description = gettext('Any results not being hosted on green infrastructure will be filtered')
  15. default_on = True
  16. preference_section = 'privacy'
  17. allow_api_connections = True
  18. database_name = "url2green.db"
  19. class GreenCheck:
  20. def __init__(self):
  21. try:
  22. self.db = bool(os.stat(database_name))
  23. logger.debug(f"Database found at {database_name}. Using it for lookups instead of the Greencheck API")
  24. except FileNotFoundError:
  25. self.db = False
  26. if allow_api_connections:
  27. logger.debug(f"No database found at {database_name}.")
  28. logger.debug(f"Falling back to the instead of the Greencheck API, as 'allow_api_connections' is set to {allow_api_connections}.")
  29. else:
  30. logger.debug(f"No database found at {database_name}. Not making any checks, because 'allow_api_connections' is set to {allow_api_connections}")
  31. def check_url(self, url=None) -> bool:
  32. """
  33. Check a url passed in, and return a true or false result,
  34. based on whether the domain is marked as a one running on
  35. green energy.
  36. """
  37. try:
  38. parsed_domain = self.get_domain_from_url(url)
  39. except Exception as e:
  40. logger.exception(f"unable to parse url: {url}")
  41. if parsed_domain:
  42. logger.debug(f"Checking {parsed_domain}, parsed from {url}")
  43. if self.db:
  44. return self.check_in_db(parsed_domain)
  45. else:
  46. if allow_api_connections:
  47. return self.check_against_api(parsed_domain)
  48. else:
  49. return False
  50. def get_domain_from_url(self, url=None):
  51. return urlparse(url).hostname
  52. def check_in_db(self, domain=None):
  53. with sqlite3.connect(database_name) as con:
  54. cur = con.cursor()
  55. cur.execute("SELECT green FROM green_presenting WHERE url=? LIMIT 1",
  56. [domain]
  57. )
  58. res = cur.fetchone()
  59. logger.debug(res)
  60. return bool(res)
  61. def check_against_api(self, domain=None):
  62. api_server = "https://api.thegreenwebfoundation.org"
  63. api_url = f"{api_server}/greencheck/{domain}"
  64. logger.debug(api_url)
  65. response = requests.get(api_url).json()
  66. if response.get("green"):
  67. return True
  68. greencheck = GreenCheck()
  69. # attach callback to the post search hook
  70. # request: flask request object
  71. # ctx: the whole local context of the pre search hook
  72. def post_search(request, search):
  73. green_results = [
  74. entry for entry in list(search.result_container._merged_results)
  75. if get_green(entry)
  76. ]
  77. search.result_container._merged_results = green_results
  78. return True
  79. def get_green(result):
  80. logger.debug(result.get('url'))
  81. green = greencheck.check_url(result.get('url'))
  82. if green:
  83. return True