Browse Source

[enh] implement a OnlineUrlSearchProcessor

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
e92d40c854
2 changed files with 51 additions and 1 deletions
  1. 9 1
      searx/search/processors/__init__.py
  2. 42 0
      searx/search/processors/online_url_search.py

+ 9 - 1
searx/search/processors/__init__.py

@@ -11,6 +11,7 @@ __all__ = [
     'OnlineProcessor',
     'OnlineDictionaryProcessor',
     'OnlineCurrencyProcessor',
+    'OnlineUrlSearchProcessor',
     'PROCESSORS',
 ]
 
@@ -24,6 +25,7 @@ from .online import OnlineProcessor
 from .offline import OfflineProcessor
 from .online_dictionary import OnlineDictionaryProcessor
 from .online_currency import OnlineCurrencyProcessor
+from .online_url_search import OnlineUrlSearchProcessor
 from .abstract import EngineProcessor
 
 logger = logger.getChild('search.processors')
@@ -33,7 +35,13 @@ PROCESSORS: Dict[str, EngineProcessor] = {}
 
 def get_processor_class(engine_type):
     """Return processor class according to the ``engine_type``"""
-    for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]:
+    for c in [
+        OnlineProcessor,
+        OfflineProcessor,
+        OnlineDictionaryProcessor,
+        OnlineCurrencyProcessor,
+        OnlineUrlSearchProcessor,
+    ]:
         if c.engine_type == engine_type:
             return c
     return None

+ 42 - 0
searx/search/processors/online_url_search.py

@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Processores for engine-type: ``online_url_search``
+
+"""
+
+import re
+from .online import OnlineProcessor
+
+re_search_urls = {
+    'http': re.compile(r'https?:\/\/[^ ]*'),
+    'ftp': re.compile(r'ftps?:\/\/[^ ]*'),
+    'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'),
+}
+
+
+class OnlineUrlSearchProcessor(OnlineProcessor):
+    """Processor class used by ``online_url_search`` engines."""
+
+    engine_type = 'online_url_search'
+
+    def get_params(self, search_query, engine_category):
+        params = super().get_params(search_query, engine_category)
+        if params is None:
+            return None
+
+        url_match = False
+        search_urls = {}
+
+        for k, v in re_search_urls.items():
+            m = v.search(search_query.query)
+            v = None
+            if m:
+                url_match = True
+                v = m[0]
+            search_urls[k] = v
+
+        if not url_match:
+            return None
+
+        params['search_urls'] = search_urls
+        return params