Browse Source

add language support for qwant

closes issue #863
marc 8 years ago
parent
commit
805fb02ed1

File diff suppressed because it is too large
+ 0 - 0
searx/data/engines_languages.json


+ 25 - 1
searx/engines/qwant.py

@@ -20,6 +20,7 @@ from searx.utils import html_to_text
 categories = None
 paging = True
 language_support = True
+supported_languages_url = 'https://qwant.com/region'
 
 category_to_keyword = {'general': 'web',
                        'images': 'images',
@@ -46,6 +47,13 @@ def request(query, params):
 
     # add language tag if specified
     if params['language'] != 'all':
+        if params['language'].find('-') < 0:
+            # tries to get a country code from language
+            for lang in supported_languages:
+                lc = lang.split('-')
+                if params['language'] == lc[0]:
+                    params['language'] = lang
+                    break
         params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
 
     return params
@@ -96,5 +104,21 @@ def response(resp):
                             'publishedDate': published_date,
                             'content': content})
 
-    # return results
     return results
+
+
+# get supported languages from their site
+def _fetch_supported_languages(resp):
+    # list of regions is embedded in page as a js object
+    response_text = resp.text
+    response_text = response_text[response_text.find('regionalisation'):]
+    response_text = response_text[response_text.find('{'):response_text.find(');')]
+
+    regions_json = loads(response_text)
+
+    supported_languages = []
+    for lang in regions_json['languages'].values():
+        for country in lang['countries']:
+            supported_languages.append(lang['code'] + '-' + country)
+
+    return supported_languages

+ 3 - 5
searx/languages.py

@@ -5,9 +5,6 @@
 language_codes = (
     (u"ar-SA", u"العربية", u"", u"Arabic"),
     (u"bg-BG", u"Български", u"", u"Bulgarian"),
-    (u"ca", u"Català", u"", u"Catalan"),
-    (u"ca-CT", u"Català", u"", u"Catalan"),
-    (u"ca-ES", u"Català", u"Espanya", u"Catalan"),
     (u"cs-CZ", u"Čeština", u"", u"Czech"),
     (u"da-DK", u"Dansk", u"", u"Danish"),
     (u"de", u"Deutsch", u"", u"German"),
@@ -18,7 +15,9 @@ language_codes = (
     (u"en", u"English", u"", u"English"),
     (u"en-AU", u"English", u"Australia", u"English"),
     (u"en-CA", u"English", u"Canada", u"English"),
+    (u"en-CY", u"English", u"Cyprus", u"English"),
     (u"en-GB", u"English", u"United Kingdom", u"English"),
+    (u"en-GD", u"English", u"Grenada", u"English"),
     (u"en-ID", u"English", u"Indonesia", u"English"),
     (u"en-IE", u"English", u"Ireland", u"English"),
     (u"en-IN", u"English", u"India", u"English"),
@@ -54,10 +53,10 @@ language_codes = (
     (u"ko-KR", u"한국어", u"", u"Korean"),
     (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
     (u"lv-LV", u"Latviešu", u"", u"Latvian"),
+    (u"ms-MY", u"Bahasa Melayu", u"", u"Malay"),
     (u"nl", u"Nederlands", u"", u"Dutch"),
     (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
-    (u"no-NO", u"Norsk", u"", u"Norwegian"),
     (u"pl-PL", u"Polski", u"", u"Polish"),
     (u"pt", u"Português", u"", u"Portuguese"),
     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
@@ -69,7 +68,6 @@ language_codes = (
     (u"sv-SE", u"Svenska", u"", u"Swedish"),
     (u"th-TH", u"ไทย", u"", u"Thai"),
     (u"tr-TR", u"Türkçe", u"", u"Turkish"),
-    (u"uk-UA", u"Українська", u"", u"Ukrainian"),
     (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
     (u"zh", u"中文", u"", u"Chinese"),
     (u"zh-CN", u"中文", u"中国", u"Chinese"),

+ 21 - 0
tests/unit/engines/test_qwant.py

@@ -25,6 +25,11 @@ class TestQwantEngine(SearxTestCase):
         self.assertFalse('fr' in params['url'])
         self.assertIn('news', params['url'])
 
+        qwant.supported_languages = ['en', 'fr-FR', 'fr-CA']
+        dicto['language'] = 'fr'
+        params = qwant.request(query, dicto)
+        self.assertIn('fr_fr', params['url'])
+
     def test_response(self):
         self.assertRaises(AttributeError, qwant.response, None)
         self.assertRaises(AttributeError, qwant.response, [])
@@ -315,3 +320,19 @@ class TestQwantEngine(SearxTestCase):
         results = qwant.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 0)
+
+    def test_fetch_supported_languages(self):
+        page = """some code...
+        config_set('project.regionalisation', {"continents":{},"languages":
+        {"de":{"code":"de","name":"Deutsch","countries":["DE","CH","AT"]},
+        "it":{"code":"it","name":"Italiano","countries":["IT","CH"]}}});
+        some more code..."""
+        response = mock.Mock(text=page)
+        languages = qwant._fetch_supported_languages(response)
+        self.assertEqual(type(languages), list)
+        self.assertEqual(len(languages), 5)
+        self.assertIn('de-DE', languages)
+        self.assertIn('de-CH', languages)
+        self.assertIn('de-AT', languages)
+        self.assertIn('it-IT', languages)
+        self.assertIn('it-CH', languages)

+ 4 - 2
utils/fetch_languages.py

@@ -14,7 +14,8 @@ from json import loads, dumps
 import io
 from sys import path
 path.append('../searx')  # noqa
-from searx.engines import engines
+from searx import settings
+from searx.engines import initialize_engines, engines
 
 # Geonames API for country names.
 geonames_user = ''  # ADD USER NAME HERE
@@ -77,6 +78,7 @@ def get_country_name(locale):
 
 # Fetchs supported languages for each engine and writes json file with those.
 def fetch_supported_languages():
+    initialize_engines(settings['engines'])
     for engine_name in engines:
         if hasattr(engines[engine_name], 'fetch_supported_languages'):
             try:
@@ -117,7 +119,7 @@ def join_language_lists():
                     languages[lang]['counter'].append(engine_name)
 
     # filter list to include only languages supported by most engines
-    min_supported_engines = int(0.75 * len(engines_languages))
+    min_supported_engines = int(0.70 * len(engines_languages))
     languages = {code: lang for code, lang
                  in languages.iteritems()
                  if len(lang.get('counter', [])) >= min_supported_engines or

Some files were not shown because too many files changed in this diff