Browse Source

Add check in local database, with fallback to API

Chris Adams 5 years ago
parent
commit
e94f0399e9
1 changed files with 61 additions and 8 deletions
  1. 61 8
      only_show_green_results.py

+ 61 - 8
only_show_green_results.py

@@ -19,16 +19,74 @@ from flask_babel import gettext
 import re
 import re
 from searx.url_utils import urlunparse, parse_qsl, urlencode
 from searx.url_utils import urlunparse, parse_qsl, urlencode
 import requests
 import requests
+import logging
+
+import sqlite3
+from urllib.parse import urlparse
+
+logger = logging.getLogger(__name__)
 
 
 name = gettext('Only show green hosted results')
 name = gettext('Only show green hosted results')
 description = gettext('Any results not being hosted on green infrastructure will be filtered')
 description = gettext('Any results not being hosted on green infrastructure will be filtered')
 default_on = True
 default_on = True
 preference_section = 'privacy'
 preference_section = 'privacy'
 
 
+
+class GreenCheck:
+
+    def __init__(self):
+        self.conn = None
+
+        try:
+            self.conn = sqlite3.connect('url2green.db', check_same_thread=False)
+            logger.info("url2green database found. ready for queries")
+        except:
+            logging.exception('No url2green database found. Falling back to the API')
+
+    def check_url(self, url=None) -> bool:
+        """
+        Check a url passed in, and return a true or false result,
+        based on whether the domain is marked as a one running on
+        green energy.
+        """
+        try:
+            parsed_domain = self.get_domain_from_url(url)
+        except Exception as e:
+            logger.exception(f"unable to parse url: {url}")
+
+        if parsed_domain:
+            logger.debug(f"Checking {parsed_domain}, parsed from {url}")
+
+            if self.conn:
+                return self.check_in_db(parsed_domain)
+            else:
+                return self.check_against_api(parsed_domain)
+
+    def get_domain_from_url(self, url=None):
+        return urlparse(url).hostname
+
+    def check_in_db(self, domain=None):
+        c = self.conn.cursor()
+        c.execute("SELECT green FROM green_presenting WHERE url=? LIMIT 1", [domain])
+        res = c.fetchone()
+        logger.debug(res)
+        c.close()
+        return bool(res)
+
+    def check_against_api(self, domain=None):
+        API_SERVER = "https://api.thegreenwebfoundation.org/"
+        response = requests.get(f"{API_SERVER}/greencheck/{domain}").json()
+        if response.get(green):
+            return True
+
+greencheck = GreenCheck()
+
 # attach callback to the post search hook
 # attach callback to the post search hook
 #  request: flask request object
 #  request: flask request object
 #  ctx: the whole local context of the pre search hook
 #  ctx: the whole local context of the pre search hook
 def post_search(request, search):
 def post_search(request, search):
+    logger.debug(search.result_container._merged_results)
+
     green_results = [
     green_results = [
         entry for entry in list(search.result_container._merged_results)
         entry for entry in list(search.result_container._merged_results)
         if get_green(entry)
         if get_green(entry)
@@ -37,14 +95,9 @@ def post_search(request, search):
     return True
     return True
 
 
 def get_green(result):
 def get_green(result):
-    if 'parsed_url' not in result:
+    logger.debug(result.get('url'))
+    green = greencheck.check_url(result.get('url'))
+    if green:
         return True
         return True
 
 
-    print result['url']
-
-    # @todo hook up the url to our greencheck tool instead of api here
-    response = requests.get("https://api.thegreenwebfoundation.org/greencheck/" + result['parsed_url'].netloc)
-    data = response.json()
-    #print(data['green'])
 
 
-    return data['green']