only_show_green_results.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. '''
  2. searx is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU Affero General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. searx is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU Affero General Public License for more details.
  10. You should have received a copy of the GNU Affero General Public License
  11. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  12. (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
  13. '''
  14. from flask_babel import gettext
  15. import re
  16. from searx.url_utils import urlunparse, parse_qsl, urlencode
  17. import requests
  18. import logging
  19. import sqlite3
  20. from urllib.parse import urlparse
  21. logger = logging.getLogger(__name__)
  22. name = gettext('Only show green hosted results')
  23. description = gettext('Any results not being hosted on green infrastructure will be filtered')
  24. default_on = True
  25. preference_section = 'privacy'
  26. allow_api_connections = False
  27. class GreenCheck:
  28. def __init__(self):
  29. self.conn = None
  30. try:
  31. self.conn = sqlite3.connect('url2green.db', check_same_thread=False)
  32. logger.info("url2green database found. ready for queries")
  33. except:
  34. logging.exception('No url2green database found. Falling back to the API')
  35. def check_url(self, url=None) -> bool:
  36. """
  37. Check a url passed in, and return a true or false result,
  38. based on whether the domain is marked as a one running on
  39. green energy.
  40. """
  41. try:
  42. parsed_domain = self.get_domain_from_url(url)
  43. except Exception as e:
  44. logger.exception(f"unable to parse url: {url}")
  45. if parsed_domain:
  46. logger.debug(f"Checking {parsed_domain}, parsed from {url}")
  47. if self.conn:
  48. return self.check_in_db(parsed_domain)
  49. else:
  50. if allow_api_connections:
  51. return self.check_against_api(parsed_domain)
  52. else:
  53. return false
  54. def get_domain_from_url(self, url=None):
  55. return urlparse(url).hostname
  56. def check_in_db(self, domain=None):
  57. c = self.conn.cursor()
  58. c.execute("SELECT green FROM green_presenting WHERE url=? LIMIT 1", [domain])
  59. res = c.fetchone()
  60. logger.debug(res)
  61. c.close()
  62. return bool(res)
  63. def check_against_api(self, domain=None):
  64. API_SERVER = "https://api.thegreenwebfoundation.org/"
  65. response = requests.get(f"{API_SERVER}/greencheck/{domain}").json()
  66. if response.get(green):
  67. return True
  68. greencheck = GreenCheck()
  69. # attach callback to the post search hook
  70. # request: flask request object
  71. # ctx: the whole local context of the pre search hook
  72. def post_search(request, search):
  73. logger.debug(search.result_container._merged_results)
  74. green_results = [
  75. entry for entry in list(search.result_container._merged_results)
  76. if get_green(entry)
  77. ]
  78. search.result_container._merged_results = green_results
  79. return True
  80. def get_green(result):
  81. logger.debug(result.get('url'))
  82. green = greencheck.check_url(result.get('url'))
  83. if green:
  84. return True