|
@@ -1,6 +1,6 @@
|
|
import json
|
|
import json
|
|
from urllib import urlencode
|
|
from urllib import urlencode
|
|
-from re import sub
|
|
|
|
|
|
+from re import compile, sub
|
|
from lxml import html
|
|
from lxml import html
|
|
from searx.utils import html_to_text
|
|
from searx.utils import html_to_text
|
|
from searx.engines.xpath import extract_text
|
|
from searx.engines.xpath import extract_text
|
|
@@ -8,6 +8,8 @@ from searx.engines.xpath import extract_text
|
|
url = 'https://api.duckduckgo.com/'\
|
|
url = 'https://api.duckduckgo.com/'\
|
|
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
|
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1'
|
|
|
|
|
|
|
|
+http_regex = compile(r'^http:')
|
|
|
|
+
|
|
|
|
|
|
def result_to_text(url, text, htmlResult):
|
|
def result_to_text(url, text, htmlResult):
|
|
# TODO : remove result ending with "Meaning" or "Category"
|
|
# TODO : remove result ending with "Meaning" or "Category"
|
|
@@ -106,7 +108,7 @@ def response(resp):
|
|
|
|
|
|
# to merge with wikidata's infobox
|
|
# to merge with wikidata's infobox
|
|
if infobox_id:
|
|
if infobox_id:
|
|
- infobox_id = sub(r'^http:', r'https:', infobox_id)
|
|
|
|
|
|
+ infobox_id = http_regex.sub('https:', infobox_id)
|
|
|
|
|
|
# entity
|
|
# entity
|
|
entity = search_res.get('Entity', None)
|
|
entity = search_res.get('Entity', None)
|