only_show_green_results.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. '''
  2. searx is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU Affero General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. searx is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU Affero General Public License for more details.
  10. You should have received a copy of the GNU Affero General Public License
  11. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  12. (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
  13. '''
  14. from flask_babel import gettext
  15. import re
  16. from searx.url_utils import urlunparse, parse_qsl, urlencode
  17. import requests
  18. import logging
  19. import sqlite3
  20. from urllib.parse import urlparse
  21. logger = logging.getLogger(__name__)
  22. name = gettext('Only show green hosted results')
  23. description = gettext('Any results not being hosted on green infrastructure will be filtered')
  24. default_on = True
  25. preference_section = 'privacy'
  26. class GreenCheck:
  27. def __init__(self):
  28. self.conn = None
  29. try:
  30. self.conn = sqlite3.connect('url2green.db', check_same_thread=False)
  31. logger.info("url2green database found. ready for queries")
  32. except:
  33. logging.exception('No url2green database found. Falling back to the API')
  34. def check_url(self, url=None) -> bool:
  35. """
  36. Check a url passed in, and return a true or false result,
  37. based on whether the domain is marked as a one running on
  38. green energy.
  39. """
  40. try:
  41. parsed_domain = self.get_domain_from_url(url)
  42. except Exception as e:
  43. logger.exception(f"unable to parse url: {url}")
  44. if parsed_domain:
  45. logger.debug(f"Checking {parsed_domain}, parsed from {url}")
  46. if self.conn:
  47. return self.check_in_db(parsed_domain)
  48. else:
  49. return self.check_against_api(parsed_domain)
  50. def get_domain_from_url(self, url=None):
  51. return urlparse(url).hostname
  52. def check_in_db(self, domain=None):
  53. c = self.conn.cursor()
  54. c.execute("SELECT green FROM green_presenting WHERE url=? LIMIT 1", [domain])
  55. res = c.fetchone()
  56. logger.debug(res)
  57. c.close()
  58. return bool(res)
  59. def check_against_api(self, domain=None):
  60. API_SERVER = "https://api.thegreenwebfoundation.org/"
  61. response = requests.get(f"{API_SERVER}/greencheck/{domain}").json()
  62. if response.get(green):
  63. return True
  64. greencheck = GreenCheck()
  65. # attach callback to the post search hook
  66. # request: flask request object
  67. # ctx: the whole local context of the pre search hook
  68. def post_search(request, search):
  69. logger.debug(search.result_container._merged_results)
  70. green_results = [
  71. entry for entry in list(search.result_container._merged_results)
  72. if get_green(entry)
  73. ]
  74. search.result_container._merged_results = green_results
  75. return True
  76. def get_green(result):
  77. logger.debug(result.get('url'))
  78. green = greencheck.check_url(result.get('url'))
  79. if green:
  80. return True