Browse Source

bugfix: fetch_supported_languages bing, -news, -videos, -images

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 5 years ago
parent
commit
e0c99d9dcb
4 changed files with 19 additions and 30 deletions
  1. 14 9
      searx/engines/bing.py
  2. 2 19
      searx/engines/bing_images.py
  3. 2 1
      searx/engines/bing_news.py
  4. 1 1
      searx/engines/bing_videos.py

+ 14 - 9
searx/engines/bing.py

@@ -110,13 +110,18 @@ def response(resp):
 
 
 # get supported languages from their site
 # get supported languages from their site
 def _fetch_supported_languages(resp):
 def _fetch_supported_languages(resp):
-    supported_languages = []
+    lang_tags = set()
+
+    setmkt = re.compile('setmkt=([^&]*)')
     dom = html.fromstring(resp.text)
     dom = html.fromstring(resp.text)
-    options = eval_xpath(dom, '//div[@id="limit-languages"]//input')
-    for option in options:
-        code = eval_xpath(option, './@id')[0].replace('_', '-')
-        if code == 'nb':
-            code = 'no'
-        supported_languages.append(code)
-
-    return supported_languages
+    lang_links = eval_xpath(dom, "//li/a[contains(@href, 'setmkt')]")
+
+    for a in lang_links:
+        href = eval_xpath(a, './@href')[0]
+        match = setmkt.search(href)
+        l_tag = match.groups()[0]
+        _lang, _nation = l_tag.split('-',1)
+        l_tag = _lang.lower() + '-' + _nation.upper()
+        lang_tags.add(l_tag)
+
+    return list(lang_tags)

+ 2 - 19
searx/engines/bing_images.py

@@ -18,6 +18,8 @@ import re
 from searx.url_utils import urlencode
 from searx.url_utils import urlencode
 from searx.utils import match_language
 from searx.utils import match_language
 
 
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
+
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']
 paging = True
 paging = True
@@ -103,22 +105,3 @@ def response(resp):
             continue
             continue
 
 
     return results
     return results
-
-
-# get supported languages from their site
-def _fetch_supported_languages(resp):
-    supported_languages = []
-    dom = html.fromstring(resp.text)
-
-    regions_xpath = '//div[@id="region-section-content"]' \
-                    + '//ul[@class="b_vList"]/li/a/@href'
-
-    regions = dom.xpath(regions_xpath)
-    for region in regions:
-        code = re.search('setmkt=[^\&]+', region).group()[7:]
-        if code == 'nb-NO':
-            code = 'no-NO'
-
-        supported_languages.append(code)
-
-    return supported_languages

+ 2 - 1
searx/engines/bing_news.py

@@ -15,9 +15,10 @@ from datetime import datetime
 from dateutil import parser
 from dateutil import parser
 from lxml import etree
 from lxml import etree
 from searx.utils import list_get, match_language
 from searx.utils import list_get, match_language
-from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 from searx.url_utils import urlencode, urlparse, parse_qsl
 from searx.url_utils import urlencode, urlparse, parse_qsl
 
 
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
+
 # engine dependent config
 # engine dependent config
 categories = ['news']
 categories = ['news']
 paging = True
 paging = True

+ 1 - 1
searx/engines/bing_videos.py

@@ -12,10 +12,10 @@
 
 
 from json import loads
 from json import loads
 from lxml import html
 from lxml import html
-from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url
 from searx.url_utils import urlencode
 from searx.url_utils import urlencode
 from searx.utils import match_language
 from searx.utils import match_language
 
 
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 
 
 categories = ['videos']
 categories = ['videos']
 paging = True
 paging = True