|
@@ -3,9 +3,8 @@
|
|
"""DuckDuckGo Lite
|
|
"""DuckDuckGo Lite
|
|
"""
|
|
"""
|
|
|
|
|
|
-from json import loads
|
|
|
|
-
|
|
|
|
-from lxml.html import fromstring
|
|
|
|
|
|
+import json
|
|
|
|
+from lxml import html
|
|
|
|
|
|
from searx.utils import (
|
|
from searx.utils import (
|
|
dict_subset,
|
|
dict_subset,
|
|
@@ -14,7 +13,10 @@ from searx.utils import (
|
|
extract_text,
|
|
extract_text,
|
|
match_language,
|
|
match_language,
|
|
)
|
|
)
|
|
-from searx.network import get
|
|
|
|
|
|
+from searx import network
|
|
|
|
+from searx.enginelib.traits import EngineTraits
|
|
|
|
+
|
|
|
|
+traits: EngineTraits
|
|
|
|
|
|
# about
|
|
# about
|
|
about = {
|
|
about = {
|
|
@@ -120,13 +122,13 @@ def request(query, params):
|
|
def response(resp):
|
|
def response(resp):
|
|
|
|
|
|
headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
|
|
headers_ping = dict_subset(resp.request.headers, ['User-Agent', 'Accept-Encoding', 'Accept', 'Cookie'])
|
|
- get(url_ping, headers=headers_ping)
|
|
|
|
|
|
+ network.get(url_ping, headers=headers_ping)
|
|
|
|
|
|
if resp.status_code == 303:
|
|
if resp.status_code == 303:
|
|
return []
|
|
return []
|
|
|
|
|
|
results = []
|
|
results = []
|
|
- doc = fromstring(resp.text)
|
|
|
|
|
|
+ doc = html.fromstring(resp.text)
|
|
|
|
|
|
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
|
|
result_table = eval_xpath(doc, '//html/body/form/div[@class="filters"]/table')
|
|
if not len(result_table) >= 3:
|
|
if not len(result_table) >= 3:
|
|
@@ -180,7 +182,70 @@ def _fetch_supported_languages(resp):
|
|
response_page = response_page[response_page.find('regions:{') + 8 :]
|
|
response_page = response_page[response_page.find('regions:{') + 8 :]
|
|
response_page = response_page[: response_page.find('}') + 1]
|
|
response_page = response_page[: response_page.find('}') + 1]
|
|
|
|
|
|
- regions_json = loads(response_page)
|
|
|
|
|
|
+ regions_json = json.loads(response_page)
|
|
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
|
|
supported_languages = map((lambda x: x[3:] + '-' + x[:2].upper()), regions_json.keys())
|
|
|
|
|
|
return list(supported_languages)
|
|
return list(supported_languages)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def fetch_traits(engine_traits: EngineTraits):
|
|
|
|
+ """Fetch regions from DuckDuckGo."""
|
|
|
|
+ # pylint: disable=import-outside-toplevel
|
|
|
|
+
|
|
|
|
+ engine_traits.data_type = 'supported_languages' # deprecated
|
|
|
|
+
|
|
|
|
+ import babel
|
|
|
|
+ from searx.locales import region_tag
|
|
|
|
+
|
|
|
|
+ engine_traits.all_locale = 'wt-wt'
|
|
|
|
+
|
|
|
|
+ resp = network.get('https://duckduckgo.com/util/u588.js')
|
|
|
|
+ if not resp.ok:
|
|
|
|
+ print("ERROR: response from DuckDuckGo is not OK.")
|
|
|
|
+
|
|
|
|
+ pos = resp.text.find('regions:{') + 8
|
|
|
|
+ js_code = resp.text[pos:]
|
|
|
|
+ pos = js_code.find('}') + 1
|
|
|
|
+ regions = json.loads(js_code[:pos])
|
|
|
|
+
|
|
|
|
+ reg_map = {
|
|
|
|
+ 'tw-tzh': 'zh_TW',
|
|
|
|
+ 'hk-tzh': 'zh_HK',
|
|
|
|
+ 'ct-ca': 'skip', # ct-ca and es-ca both map to ca_ES
|
|
|
|
+ 'es-ca': 'ca_ES',
|
|
|
|
+ 'id-en': 'id_ID',
|
|
|
|
+ 'no-no': 'nb_NO',
|
|
|
|
+ 'jp-jp': 'ja_JP',
|
|
|
|
+ 'kr-kr': 'ko_KR',
|
|
|
|
+ 'xa-ar': 'ar_SA',
|
|
|
|
+ 'sl-sl': 'sl_SI',
|
|
|
|
+ 'th-en': 'th_TH',
|
|
|
|
+ 'vn-en': 'vi_VN',
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for eng_tag, name in regions.items():
|
|
|
|
+
|
|
|
|
+ if eng_tag == 'wt-wt':
|
|
|
|
+ engine_traits.all_locale = 'wt-wt'
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ region = reg_map.get(eng_tag)
|
|
|
|
+ if region == 'skip':
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ if not region:
|
|
|
|
+ eng_territory, eng_lang = eng_tag.split('-')
|
|
|
|
+ region = eng_lang + '_' + eng_territory.upper()
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ sxng_tag = region_tag(babel.Locale.parse(region))
|
|
|
|
+ except babel.UnknownLocaleError:
|
|
|
|
+ print("ERROR: %s (%s) -> %s is unknown by babel" % (name, eng_tag, region))
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ conflict = engine_traits.regions.get(sxng_tag)
|
|
|
|
+ if conflict:
|
|
|
|
+ if conflict != eng_tag:
|
|
|
|
+ print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag))
|
|
|
|
+ continue
|
|
|
|
+ engine_traits.regions[sxng_tag] = eng_tag
|