Browse Source

Merge pull request #2481 from dalf/mod-check

Mod check
Adam Tauber 4 years ago
parent
commit
f310305c54

+ 1 - 1
requirements.txt

@@ -9,4 +9,4 @@ pygments==2.1.3
 python-dateutil==2.8.1
 pyyaml==5.3.1
 requests[socks]==2.25.1
-pycld3==0.20
+langdetect==1.0.8

+ 1 - 0
searx/search/checker/__main__.py

@@ -74,6 +74,7 @@ def run(engine_name_list, verbose):
                 stdout.write(f'    {"found languages":15}: {" ".join(sorted(list(checker.test_results.languages)))}\n')
                 for test_name, logs in checker.test_results.logs.items():
                     for log in logs:
+                        log = map(lambda l: l if isinstance(l, str) else repr(l), log)
                         stdout.write(f'    {test_name:15}: {RED}{" ".join(log)}{RESET_SEQ}\n')
 
 

+ 10 - 5
searx/search/checker/impl.py

@@ -9,7 +9,8 @@ from time import time
 from urllib.parse import urlparse
 
 import re
-import cld3
+from langdetect import detect_langs
+from langdetect.lang_detect_exception import LangDetectException
 import requests.exceptions
 
 from searx import poolrequests, logger
@@ -181,10 +182,14 @@ class ResultContainerTests:
         self.test_results.add_error(self.test_name, message, *args, '(' + sqstr + ')')
 
     def _add_language(self, text: str) -> typing.Optional[str]:
-        r = cld3.get_language(str(text))  # pylint: disable=E1101
-        if r is not None and r.probability >= 0.98 and r.is_reliable:
-            self.languages.add(r.language)
-            self.test_results.add_language(r.language)
+        try:
+            r = detect_langs(str(text))  # pylint: disable=E1101
+        except LangDetectException:
+            return None
+
+        if len(r) > 0 and r[0].prob > 0.95:
+            self.languages.add(r[0].lang)
+            self.test_results.add_language(r[0].lang)
         return None
 
     def _check_result(self, result):

+ 3 - 3
searx/search/processors/online.py

@@ -239,14 +239,14 @@ class OnlineProcessor(EngineProcessor):
                 'test': ['unique_results']
             }
 
-        if getattr(self.engine, 'lang', False):
+        if getattr(self.engine, 'supported_languages', []):
             tests['lang_fr'] = {
                 'matrix': {'query': 'paris', 'lang': 'fr'},
-                'result_container': ['not_empty', ('has_lang', 'fr')],
+                'result_container': ['not_empty', ('has_language', 'fr')],
             }
             tests['lang_en'] = {
                 'matrix': {'query': 'paris', 'lang': 'en'},
-                'result_container': ['not_empty', ('has_lang', 'en')],
+                'result_container': ['not_empty', ('has_language', 'en')],
             }
 
         if getattr(self.engine, 'safesearch', False):

+ 9 - 3
searx/settings.yml

@@ -105,11 +105,17 @@ outgoing: # communication with search engines
 checker:
     # disable checker when in debug mode
     off_when_debug: True
+
     # scheduling: interval or int
     # use "scheduling: False" to disable scheduling
-    scheduling:
-        start_after: [300, 1800]  # delay to start the first run of the checker
-        every: [86400, 90000]  # how often the checker runs
+    # to activate the scheduler:
+    # * uncomment "scheduling" section 
+    # * add "cache2 = name=searxcache,items=2000,blocks=2000,blocksize=4096,bitmap=1" to your uwsgi.ini
+
+    # scheduling:
+    #    start_after: [300, 1800]  # delay to start the first run of the checker
+    #    every: [86400, 90000]  # how often the checker runs
+
     # additional tests: only for the YAML anchors (see the engines section)
     additional_tests:
         rosebud: &test_rosebud

+ 2 - 4
utils/searx.sh

@@ -46,7 +46,6 @@ SEARX_PACKAGES_debian="\
 python3-dev python3-babel python3-venv
 uwsgi uwsgi-plugin-python3
 git build-essential libxslt-dev zlib1g-dev libffi-dev libssl-dev
-libprotobuf-dev protobuf-compiler
 shellcheck"
 
 BUILD_PACKAGES_debian="\
@@ -59,7 +58,6 @@ SEARX_PACKAGES_arch="\
 python python-pip python-lxml python-babel
 uwsgi uwsgi-plugin-python
 git base-devel libxml2
-protobuf
 shellcheck"
 
 BUILD_PACKAGES_arch="\
@@ -71,7 +69,7 @@ SEARX_PACKAGES_fedora="\
 python python-pip python-lxml python-babel
 uwsgi uwsgi-plugin-python3
 git @development-tools libxml2
-ShellCheck protobuf-compiler protobuf-devel"
+ShellCheck"
 
 BUILD_PACKAGES_fedora="\
 firefox graphviz graphviz-gd ImageMagick librsvg2-tools
@@ -84,7 +82,7 @@ SEARX_PACKAGES_centos="\
 python36 python36-pip python36-lxml python-babel
 uwsgi uwsgi-plugin-python3
 git @development-tools libxml2
-ShellCheck protobuf-compiler protobuf-devel"
+ShellCheck"
 
 BUILD_PACKAGES_centos="\
 firefox graphviz graphviz-gd ImageMagick librsvg2-tools