Browse Source

Merge pull request #2275 from dalf/mod-ahmia-filter

[mod] ahmia_filter.py: minor changes
Adam Tauber 4 years ago
parent
commit
d3d50eff66
2 changed files with 10 additions and 8 deletions
  1. 6 1
      searx/data/__init__.py
  2. 4 7
      searx/plugins/ahmia_filter.py

+ 6 - 1
searx/data/__init__.py

@@ -2,7 +2,7 @@ import json
 from pathlib import Path
 
 
-__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader']
+__init__ = ['ENGINES_LANGUGAGES', 'CURRENCIES', 'USER_AGENTS', 'bangs_loader', 'ahmia_blacklist_loader']
 data_dir = Path(__file__).parent
 
 
@@ -16,6 +16,11 @@ def bangs_loader():
     return load('bangs.json')
 
 
+def ahmia_blacklist_loader():
+    with open(str(data_dir / 'ahmia_blacklist.txt'), encoding='utf-8') as fd:
+        return fd.read().split()
+
+
 ENGINES_LANGUAGES = load('engines_languages.json')
 CURRENCIES = load('currencies.json')
 USER_AGENTS = load('useragents.json')

+ 4 - 7
searx/plugins/ahmia_filter.py

@@ -3,9 +3,7 @@
 '''
 
 from hashlib import md5
-from os.path import join
-from urllib.parse import urlparse
-from searx import searx_dir
+from searx.data import ahmia_blacklist_loader
 
 name = "Ahmia blacklist"
 description = "Filter out onion results that appear in Ahmia's blacklist. (See https://ahmia.fi/blacklist)"
@@ -18,15 +16,14 @@ ahmia_blacklist = None
 def get_ahmia_blacklist():
     global ahmia_blacklist
     if not ahmia_blacklist:
-        with open(join(join(searx_dir, "data"), "ahmia_blacklist.txt"), 'r') as f:
-            ahmia_blacklist = f.read().split()
+        ahmia_blacklist = ahmia_blacklist_loader()
     return ahmia_blacklist
 
 
 def not_blacklisted(result):
-    if not result.get('is_onion'):
+    if not result.get('is_onion') or not result.get('parsed_url'):
         return True
-    result_hash = md5(urlparse(result.get('url')).hostname.encode()).hexdigest()
+    result_hash = md5(result['parsed_url'].hostname.encode()).hexdigest()
     return result_hash not in get_ahmia_blacklist()