Browse Source

make search language handling less strict

languages.py can change, so users may query on a language that is not
on the list anymore, even if it is still recognized by a few engines.

also made no and nb the same because they seem to return the same,
though most engines will only support one or the other.
marc 8 years ago
parent
commit
fd65c12921

File diff suppressed because it is too large
+ 0 - 0
searx/data/engines_languages.json


+ 2 - 0
searx/engines/bing.py

@@ -94,6 +94,8 @@ def _fetch_supported_languages(resp):
     options = dom.xpath('//div[@id="limit-languages"]//input')
     for option in options:
         code = option.xpath('./@id')[0].replace('_', '-')
+        if code == 'nb':
+            code = 'no'
         supported_languages.append(code)
 
     return supported_languages

+ 4 - 0
searx/engines/qwant.py

@@ -47,6 +47,8 @@ def request(query, params):
 
     # add language tag if specified
     if params['language'] != 'all':
+        if params['language'] == 'no' or params['language'].startswith('no-'):
+            params['language'] = params['language'].replace('no', 'nb', 1)
         if params['language'].find('-') < 0:
             # tries to get a country code from language
             for lang in supported_languages:
@@ -118,6 +120,8 @@ def _fetch_supported_languages(resp):
 
     supported_languages = []
     for lang in regions_json['languages'].values():
+        if lang['code'] == 'nb':
+            lang['code'] = 'no'
         for country in lang['countries']:
             supported_languages.append(lang['code'] + '-' + country)
 

+ 2 - 0
searx/engines/swisscows.py

@@ -120,6 +120,8 @@ def _fetch_supported_languages(resp):
     options = dom.xpath('//div[@id="regions-popup"]//ul/li/a')
     for option in options:
         code = option.xpath('./@data-val')[0]
+        if code.startswith('nb-'):
+            code = code.replace('nb', 'no', 1)
         supported_languages.append(code)
 
     return supported_languages

+ 1 - 0
searx/languages.py

@@ -57,6 +57,7 @@ language_codes = (
     (u"nl", u"Nederlands", u"", u"Dutch"),
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
+    (u"no-NO", u"Norsk", u"", u"Norwegian"),
     (u"pl-PL", u"Polski", u"", u"Polish"),
     (u"pt", u"Português", u"", u"Portuguese"),
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),

+ 2 - 0
searx/preferences.py

@@ -107,6 +107,8 @@ class SearchLanguageSetting(EnumStringSetting):
                 pass
             elif lang in self.choices:
                 data = lang
+            elif data == 'nb-NO':
+                data = 'no-NO'
             elif data == 'ar-XA':
                 data = 'ar-SA'
             else:

+ 3 - 3
searx/query.py

@@ -24,7 +24,7 @@ from searx.engines import (
 import string
 import re
 
-VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(\-[A-Z]{2})?$')
+VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 
 
 class RawTextQuery(object):
@@ -68,7 +68,7 @@ class RawTextQuery(object):
 
             # this force a language
             if query_part[0] == ':':
-                lang = query_part[1:].lower()
+                lang = query_part[1:].lower().replace('_', '-')
 
                 # user may set a valid, yet not selectable language
                 if VALID_LANGUAGE_CODE.match(lang):
@@ -86,7 +86,7 @@ class RawTextQuery(object):
                        or lang_id.startswith(lang)\
                        or lang == lang_name\
                        or lang == english_name\
-                       or lang.replace('_', ' ') == country:
+                       or lang.replace('-', ' ') == country:
                         parse_next = True
                         self.languages.append(lang_id)
                         # to ensure best match (first match is not necessarily the best one)

+ 2 - 11
searx/search.py

@@ -27,20 +27,16 @@ from searx.engines import (
 )
 from searx.answerers import ask
 from searx.utils import gen_useragent
-from searx.query import RawTextQuery, SearchQuery
+from searx.query import RawTextQuery, SearchQuery, VALID_LANGUAGE_CODE
 from searx.results import ResultContainer
 from searx import logger
 from searx.plugins import plugins
-from searx.languages import language_codes
 from searx.exceptions import SearxParameterException
 
 logger = logger.getChild('search')
 
 number_of_searches = 0
 
-language_code_set = set(l[0].lower() for l in language_codes)
-language_code_set.add('all')
-
 
 def send_http_request(engine, request_params, start_time, timeout_limit):
     # for page_load_time stats
@@ -219,7 +215,7 @@ def get_search_query_from_webapp(preferences, form):
         query_lang = preferences.get_value('language')
 
     # check language
-    if query_lang.lower() not in language_code_set:
+    if not VALID_LANGUAGE_CODE.match(query_lang):
         raise SearxParameterException('language', query_lang)
 
     # get safesearch
@@ -371,11 +367,6 @@ class Search(object):
             if search_query.pageno > 1 and not engine.paging:
                 continue
 
-            # if search-language is set and engine does not
-            # provide language-support, skip
-            if search_query.lang != 'all' and not engine.language_support:
-                continue
-
             # if time_range is not supported, skip
             if search_query.time_range and not engine.time_range_support:
                 continue

Some files were not shown because too many files changed in this diff