greencheck.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Example of a *custom* SearXNG plugin."""
  3. import logging
  4. import os
  5. import sqlite3
  6. from urllib.parse import urlparse
  7. from searx import network
  8. log = logging.getLogger(__name__)
  9. ALLOW_API_CONNECTIONS = True
  10. DB_NAME = "url2green.db"
  11. API_SERVER = "https://api.thegreenwebfoundation.org"
  12. class GreenCheck:
  13. """Implement methods to check if a domain is part of the Green WEB"""
  14. def __init__(self):
  15. self.db = True # pylint: disable=invalid-name
  16. try:
  17. self.db = bool(os.stat(DB_NAME))
  18. except Exception: # pylint: disable=broad-except
  19. self.db = False
  20. if self.db:
  21. log.debug(
  22. f"Database found at {DB_NAME}. Using it for lookups instead of the Greencheck API"
  23. )
  24. return
  25. log.warning(f"No database found at {DB_NAME}.")
  26. if ALLOW_API_CONNECTIONS:
  27. log.warning(
  28. f"Falling back to the (much slower) Greencheck API, "
  29. f"as 'ALLOW_API_CONNECTIONS' is set to {ALLOW_API_CONNECTIONS}."
  30. )
  31. else:
  32. log.debug(
  33. f"filtering inactive: no database found at {DB_NAME}"
  34. f" and 'ALLOW_API_CONNECTIONS={ALLOW_API_CONNECTIONS}'"
  35. )
  36. def check_url(self, url=None):
  37. """Check a url passed in, and return a true or false result, based on whether
  38. the domain is marked as a one running on green energy."""
  39. log.debug(url)
  40. parsed_domain = urlparse(url).hostname
  41. ret_val = False
  42. if parsed_domain:
  43. log.debug(f"Checking {parsed_domain}, parsed from {url}")
  44. if self.db:
  45. ret_val = self.check_in_db(parsed_domain)
  46. elif ALLOW_API_CONNECTIONS:
  47. ret_val = self.check_against_api(parsed_domain)
  48. return ret_val
  49. def check_in_db(self, domain=None):
  50. """Checks wether ``domain`` is in the green database
  51. We basically treat the the sqlite database like an immutable, read-only
  52. datastructure. This allows multiple concurrent connections as no state
  53. is ever being changed - only read with SELECT
  54. - https://docs.python.org/3.8/library/sqlite3.html#//apple_ref/Function/sqlite3.connect
  55. - https://sqlite.org/lockingv3.html
  56. """
  57. with sqlite3.connect(
  58. f"file:{DB_NAME}?mode=ro", uri=True, check_same_thread=False
  59. ) as con:
  60. cur = con.cursor()
  61. cur.execute(
  62. "SELECT green FROM green_presenting WHERE url=? LIMIT 1", [domain]
  63. )
  64. res = cur.fetchone()
  65. log.debug(res)
  66. return bool(res)
  67. def check_against_api(self, domain=None):
  68. """Checks ``domain`` against https://api.thegreenwebfoundation.org API"""
  69. api_url = f"{API_SERVER}/greencheck/{domain}"
  70. log.debug(api_url)
  71. response = network.get(api_url).json()
  72. return bool(response.get("green"))