Browse Source

[fix] check language aliases when setting search language

Marc Abonce Seguin 6 years ago
parent
commit
5568f24d6c

+ 1 - 1
searx/engines/bing_images.py

@@ -55,7 +55,7 @@ def request(query, params):
         query=urlencode({'q': query}),
         query=urlencode({'q': query}),
         offset=offset)
         offset=offset)
 
 
-    language = match_language(params['language'], supported_languages).lower()
+    language = match_language(params['language'], supported_languages, language_aliases).lower()
 
 
     params['cookies']['SRCHHPGUSR'] = \
     params['cookies']['SRCHHPGUSR'] = \
         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')

+ 1 - 1
searx/engines/bing_videos.py

@@ -48,7 +48,7 @@ def request(query, params):
         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
         'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
 
 
     # language cookie
     # language cookie
-    language = match_language(params['language'], supported_languages).lower()
+    language = match_language(params['language'], supported_languages, language_aliases).lower()
     params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
     params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
 
 
     # query and paging
     # query and paging

+ 1 - 1
searx/engines/google.py

@@ -166,7 +166,7 @@ def extract_text_from_dom(result, xpath):
 def request(query, params):
 def request(query, params):
     offset = (params['pageno'] - 1) * 10
     offset = (params['pageno'] - 1) * 10
 
 
-    language = match_language(params['language'], supported_languages)
+    language = match_language(params['language'], supported_languages, language_aliases)
     language_array = language.split('-')
     language_array = language.split('-')
     if params['language'].find('-') > 0:
     if params['language'].find('-') > 0:
         country = params['language'].split('-')[1]
         country = params['language'].split('-')[1]

+ 1 - 1
searx/engines/google_news.py

@@ -51,7 +51,7 @@ def request(query, params):
     params['url'] = search_url.format(query=urlencode({'q': query}),
     params['url'] = search_url.format(query=urlencode({'q': query}),
                                       search_options=urlencode(search_options))
                                       search_options=urlencode(search_options))
 
 
-    language = match_language(params['language'], supported_languages).split('-')[0]
+    language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
     if language:
     if language:
         params['url'] += '&lr=lang_' + language
         params['url'] += '&lr=lang_' + language
 
 

+ 1 - 1
searx/engines/qwant.py

@@ -46,7 +46,7 @@ def request(query, params):
                                    offset=offset)
                                    offset=offset)
 
 
     # add language tag
     # add language tag
-    language = match_language(params['language'], supported_languages)
+    language = match_language(params['language'], supported_languages, language_aliases)
     params['url'] += '&locale=' + language.replace('-', '_').lower()
     params['url'] += '&locale=' + language.replace('-', '_').lower()
 
 
     return params
     return params

+ 1 - 1
searx/engines/swisscows.py

@@ -36,7 +36,7 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
-    region = match_language(params['language'], supported_languages)
+    region = match_language(params['language'], supported_languages, language_aliases)
     ui_language = region.split('-')[0]
     ui_language = region.split('-')[0]
 
 
     search_path = search_string.format(
     search_path = search_string.format(

+ 1 - 1
searx/engines/wikidata.py

@@ -68,7 +68,7 @@ def response(resp):
     html = fromstring(resp.text)
     html = fromstring(resp.text)
     search_results = html.xpath(wikidata_ids_xpath)
     search_results = html.xpath(wikidata_ids_xpath)
 
 
-    language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
+    language = match_language(resp.search_params['language'], supported_languages, language_aliases).split('-')[0]
 
 
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
     for search_result in search_results[:result_count]:
     for search_result in search_results[:result_count]:

+ 1 - 1
searx/engines/wikipedia.py

@@ -31,7 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
 
 
 # set language in base_url
 # set language in base_url
 def url_lang(lang):
 def url_lang(lang):
-    return match_language(lang, supported_languages).split('-')[0]
+    return match_language(lang, supported_languages, language_aliases).split('-')[0]
 
 
 
 
 # do search-request
 # do search-request

+ 1 - 0
tests/unit/engines/test_bing_images.py

@@ -9,6 +9,7 @@ class TestBingImagesEngine(SearxTestCase):
 
 
     def test_request(self):
     def test_request(self):
         bing_images.supported_languages = ['fr-FR', 'en-US']
         bing_images.supported_languages = ['fr-FR', 'en-US']
+        bing_images.language_aliases = {}
         query = 'test_query'
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
         dicto['pageno'] = 1

+ 1 - 0
tests/unit/engines/test_bing_videos.py

@@ -9,6 +9,7 @@ class TestBingVideosEngine(SearxTestCase):
 
 
     def test_request(self):
     def test_request(self):
         bing_videos.supported_languages = ['fr-FR', 'en-US']
         bing_videos.supported_languages = ['fr-FR', 'en-US']
+        bing_videos.language_aliases = {}
         query = 'test_query'
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
         dicto['pageno'] = 1

+ 8 - 1
tests/unit/engines/test_google.py

@@ -15,7 +15,8 @@ class TestGoogleEngine(SearxTestCase):
         return response
         return response
 
 
     def test_request(self):
     def test_request(self):
-        google.supported_languages = ['en', 'fr', 'zh-CN']
+        google.supported_languages = ['en', 'fr', 'zh-CN', 'iw']
+        google.language_aliases = {'he': 'iw'}
 
 
         query = 'test_query'
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto = defaultdict(dict)
@@ -41,6 +42,12 @@ class TestGoogleEngine(SearxTestCase):
         self.assertIn('zh-CN', params['url'])
         self.assertIn('zh-CN', params['url'])
         self.assertIn('zh-CN', params['headers']['Accept-Language'])
         self.assertIn('zh-CN', params['headers']['Accept-Language'])
 
 
+        dicto['language'] = 'he'
+        params = google.request(query, dicto)
+        self.assertIn('google.com', params['url'])
+        self.assertIn('iw', params['url'])
+        self.assertIn('iw', params['headers']['Accept-Language'])
+
     def test_response(self):
     def test_response(self):
         self.assertRaises(AttributeError, google.response, None)
         self.assertRaises(AttributeError, google.response, None)
         self.assertRaises(AttributeError, google.response, [])
         self.assertRaises(AttributeError, google.response, [])

+ 1 - 0
tests/unit/engines/test_google_news.py

@@ -10,6 +10,7 @@ class TestGoogleNewsEngine(SearxTestCase):
 
 
     def test_request(self):
     def test_request(self):
         google_news.supported_languages = ['en-US', 'fr-FR']
         google_news.supported_languages = ['en-US', 'fr-FR']
+        google_news.language_aliases = {}
         query = 'test_query'
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
         dicto['pageno'] = 1

+ 1 - 0
tests/unit/engines/test_qwant.py

@@ -8,6 +8,7 @@ class TestQwantEngine(SearxTestCase):
 
 
     def test_request(self):
     def test_request(self):
         qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR']
         qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR']
+        qwant.language_aliases = {}
         query = 'test_query'
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto = defaultdict(dict)
         dicto['pageno'] = 0
         dicto['pageno'] = 0

+ 1 - 0
tests/unit/engines/test_swisscows.py

@@ -8,6 +8,7 @@ class TestSwisscowsEngine(SearxTestCase):
 
 
     def test_request(self):
     def test_request(self):
         swisscows.supported_languages = ['de-AT', 'de-DE']
         swisscows.supported_languages = ['de-AT', 'de-DE']
+        swisscows.language_aliases = {}
         query = 'test_query'
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto = defaultdict(dict)
         dicto['pageno'] = 1
         dicto['pageno'] = 1

+ 1 - 0
tests/unit/engines/test_wikidata.py

@@ -27,6 +27,7 @@ class TestWikidataEngine(SearxTestCase):
         self.assertRaises(AttributeError, wikidata.response, '[]')
         self.assertRaises(AttributeError, wikidata.response, '[]')
 
 
         wikidata.supported_languages = ['en', 'es']
         wikidata.supported_languages = ['en', 'es']
+        wikidata.language_aliases = {}
         response = mock.Mock(text='<html></html>', search_params={"language": "en"})
         response = mock.Mock(text='<html></html>', search_params={"language": "en"})
         self.assertEqual(wikidata.response(response), [])
         self.assertEqual(wikidata.response(response), [])
 
 

+ 7 - 2
tests/unit/engines/test_wikipedia.py

@@ -8,7 +8,8 @@ from searx.testing import SearxTestCase
 class TestWikipediaEngine(SearxTestCase):
 class TestWikipediaEngine(SearxTestCase):
 
 
     def test_request(self):
     def test_request(self):
-        wikipedia.supported_languages = ['fr', 'en']
+        wikipedia.supported_languages = ['fr', 'en', 'no']
+        wikipedia.language_aliases = {'nb': 'no'}
 
 
         query = 'test_query'
         query = 'test_query'
         dicto = defaultdict(dict)
         dicto = defaultdict(dict)
@@ -25,9 +26,13 @@ class TestWikipediaEngine(SearxTestCase):
         self.assertIn('Test_Query', params['url'])
         self.assertIn('Test_Query', params['url'])
         self.assertNotIn('test_query', params['url'])
         self.assertNotIn('test_query', params['url'])
 
 
+        dicto['language'] = 'nb'
+        params = wikipedia.request(query, dicto)
+        self.assertIn('no.wikipedia.org', params['url'])
+
         dicto['language'] = 'xx'
         dicto['language'] = 'xx'
         params = wikipedia.request(query, dicto)
         params = wikipedia.request(query, dicto)
-        self.assertIn('en', params['url'])
+        self.assertIn('en.wikipedia.org', params['url'])
 
 
     def test_response(self):
     def test_response(self):
         dicto = defaultdict(dict)
         dicto = defaultdict(dict)