| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290 | 
							- # SPDX-License-Identifier: AGPL-3.0-or-later
 
- # lint: pylint
 
- """DuckDuckGo (Instant Answer API)
 
- """
 
- import json
 
- from urllib.parse import urlencode, urlparse, urljoin
 
- from lxml import html
 
- from searx.data import WIKIDATA_UNITS
 
- from searx.engines.duckduckgo import language_aliases
 
- from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 
- from searx.utils import extract_text, html_to_text, match_language, get_string_replaces_function
 
- from searx.external_urls import get_external_url, get_earth_coordinates_url, area_to_osm_zoom
 
- # about
 
- about = {
 
-     "website": 'https://duckduckgo.com/',
 
-     "wikidata_id": 'Q12805',
 
-     "official_api_documentation": 'https://duckduckgo.com/api',
 
-     "use_official_api": True,
 
-     "require_api_key": False,
 
-     "results": 'JSON',
 
- }
 
- URL = 'https://api.duckduckgo.com/'\
 
-     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
 
- WIKIDATA_PREFIX = [
 
-     'http://www.wikidata.org/entity/',
 
-     'https://www.wikidata.org/entity/'
 
- ]
 
- replace_http_by_https = get_string_replaces_function({'http:': 'https:'})
 
- def is_broken_text(text):
 
-     """ duckduckgo may return something like "<a href="xxxx">http://somewhere Related website<a/>"
 
-     The href URL is broken, the "Related website" may contains some HTML.
 
-     The best solution seems to ignore these results.
 
-     """
 
-     return text.startswith('http') and ' ' in text
 
- def result_to_text(text, htmlResult):
 
-     # TODO : remove result ending with "Meaning" or "Category"  # pylint: disable=fixme
 
-     result = None
 
-     dom = html.fromstring(htmlResult)
 
-     a = dom.xpath('//a')
 
-     if len(a) >= 1:
 
-         result = extract_text(a[0])
 
-     else:
 
-         result = text
 
-     if not is_broken_text(result):
 
-         return result
 
-     return None
 
- def request(query, params):
 
-     params['url'] = URL.format(query=urlencode({'q': query}))
 
-     language = match_language(
 
-         params['language'],
 
-         supported_languages,
 
-         language_aliases
 
-     )
 
-     language = language.split('-')[0]
 
-     params['headers']['Accept-Language'] = language
 
-     return params
 
- def response(resp):
 
-     # pylint: disable=too-many-locals, too-many-branches, too-many-statements
 
-     results = []
 
-     search_res = json.loads(resp.text)
 
-     # search_res.get('Entity') possible values (not exhaustive) :
 
-     # * continent / country / department / location / waterfall
 
-     # * actor / musician / artist
 
-     # * book / performing art / film / television  / media franchise / concert tour / playwright
 
-     # * prepared food
 
-     # * website / software / os / programming language / file format / software engineer
 
-     # * compagny
 
-     content = ''
 
-     heading = search_res.get('Heading', '')
 
-     attributes = []
 
-     urls = []
 
-     infobox_id = None
 
-     relatedTopics = []
 
-     # add answer if there is one
 
-     answer = search_res.get('Answer', '')
 
-     if answer:
 
-         logger.debug('AnswerType="%s" Answer="%s"', search_res.get('AnswerType'), answer)
 
-         if search_res.get('AnswerType') not in ['calc', 'ip']:
 
-             results.append({'answer': html_to_text(answer)})
 
-     # add infobox
 
-     if 'Definition' in search_res:
 
-         content = content + search_res.get('Definition', '')
 
-     if 'Abstract' in search_res:
 
-         content = content + search_res.get('Abstract', '')
 
-     # image
 
-     image = search_res.get('Image')
 
-     image = None if image == '' else image
 
-     if image is not None and urlparse(image).netloc == '':
 
-         image = urljoin('https://duckduckgo.com', image)
 
-     # urls
 
-     # Official website, Wikipedia page
 
-     for ddg_result in search_res.get('Results', []):
 
-         firstURL = ddg_result.get('FirstURL')
 
-         text = ddg_result.get('Text')
 
-         if firstURL is not None and text is not None:
 
-             urls.append({'title': text, 'url': firstURL})
 
-             results.append({'title': heading, 'url': firstURL})
 
-     # related topics
 
-     for ddg_result in search_res.get('RelatedTopics', []):
 
-         if 'FirstURL' in ddg_result:
 
-             firstURL = ddg_result.get('FirstURL')
 
-             text = ddg_result.get('Text')
 
-             if not is_broken_text(text):
 
-                 suggestion = result_to_text(
 
-                     text,
 
-                     ddg_result.get('Result')
 
-                 )
 
-                 if suggestion != heading and suggestion is not None:
 
-                     results.append({'suggestion': suggestion})
 
-         elif 'Topics' in ddg_result:
 
-             suggestions = []
 
-             relatedTopics.append({
 
-                 'name': ddg_result.get('Name', ''),
 
-                 'suggestions': suggestions
 
-             })
 
-             for topic_result in ddg_result.get('Topics', []):
 
-                 suggestion = result_to_text(
 
-                     topic_result.get('Text'),
 
-                     topic_result.get('Result')
 
-                 )
 
-                 if suggestion != heading and suggestion is not None:
 
-                     suggestions.append(suggestion)
 
-     # abstract
 
-     abstractURL = search_res.get('AbstractURL', '')
 
-     if abstractURL != '':
 
-         # add as result ? problem always in english
 
-         infobox_id = abstractURL
 
-         urls.append({
 
-             'title': search_res.get('AbstractSource'),
 
-             'url': abstractURL,
 
-             'official': True
 
-         })
 
-         results.append({
 
-             'url': abstractURL,
 
-             'title': heading
 
-         })
 
-     # definition
 
-     definitionURL = search_res.get('DefinitionURL', '')
 
-     if definitionURL != '':
 
-         # add as result ? as answer ? problem always in english
 
-         infobox_id = definitionURL
 
-         urls.append({
 
-             'title': search_res.get('DefinitionSource'),
 
-             'url': definitionURL
 
-         })
 
-     # to merge with wikidata's infobox
 
-     if infobox_id:
 
-         infobox_id = replace_http_by_https(infobox_id)
 
-     # attributes
 
-     # some will be converted to urls
 
-     if 'Infobox' in search_res:
 
-         infobox = search_res.get('Infobox')
 
-         if 'content' in infobox:
 
-             osm_zoom = 17
 
-             coordinates = None
 
-             for info in infobox.get('content'):
 
-                 data_type = info.get('data_type')
 
-                 data_label = info.get('label')
 
-                 data_value = info.get('value')
 
-                 # Workaround: ddg may return a double quote
 
-                 if data_value == '""':
 
-                     continue
 
-                 # Is it an external URL ?
 
-                 # * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile
 
-                 # * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id
 
-                 # * netflix_id
 
-                 external_url = get_external_url(data_type, data_value)
 
-                 if external_url is not None:
 
-                     urls.append({
 
-                         'title': data_label,
 
-                         'url': external_url
 
-                     })
 
-                 elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']:
 
-                     # ignore instance: Wikidata value from "Instance Of" (Qxxxx)
 
-                     # ignore wiki_maps_trigger: reference to a javascript
 
-                     # ignore google_play_artist_id: service shutdown
 
-                     pass
 
-                 elif data_type == 'string' and data_label == 'Website':
 
-                     # There is already an URL for the website
 
-                     pass
 
-                 elif data_type == 'area':
 
-                     attributes.append({
 
-                         'label': data_label,
 
-                         'value': area_to_str(data_value),
 
-                         'entity': 'P2046'
 
-                     })
 
-                     osm_zoom = area_to_osm_zoom(data_value.get('amount'))
 
-                 elif data_type == 'coordinates':
 
-                     if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2':
 
-                         # coordinate on Earth
 
-                         # get the zoom information from the area
 
-                         coordinates = info
 
-                     else:
 
-                         # coordinate NOT on Earth
 
-                         attributes.append({
 
-                             'label': data_label,
 
-                             'value': data_value,
 
-                             'entity': 'P625'
 
-                         })
 
-                 elif data_type == 'string':
 
-                     attributes.append({
 
-                         'label': data_label,
 
-                         'value': data_value
 
-                     })
 
-             if coordinates:
 
-                 data_label = coordinates.get('label')
 
-                 data_value = coordinates.get('value')
 
-                 latitude = data_value.get('latitude')
 
-                 longitude = data_value.get('longitude')
 
-                 url = get_earth_coordinates_url(latitude, longitude, osm_zoom)
 
-                 urls.append({
 
-                     'title': 'OpenStreetMap',
 
-                     'url': url,
 
-                     'entity': 'P625'
 
-                 })
 
-     if len(heading) > 0:
 
-         # TODO get infobox.meta.value where .label='article_title'    # pylint: disable=fixme
 
-         if image is None and len(attributes) == 0 and len(urls) == 1 and\
 
-            len(relatedTopics) == 0 and len(content) == 0:
 
-             results.append({
 
-                 'url': urls[0]['url'],
 
-                 'title': heading,
 
-                 'content': content
 
-             })
 
-         else:
 
-             results.append({
 
-                 'infobox': heading,
 
-                 'id': infobox_id,
 
-                 'content': content,
 
-                 'img_src': image,
 
-                 'attributes': attributes,
 
-                 'urls': urls,
 
-                 'relatedTopics': relatedTopics
 
-             })
 
-     return results
 
- def unit_to_str(unit):
 
-     for prefix in WIKIDATA_PREFIX:
 
-         if unit.startswith(prefix):
 
-             wikidata_entity = unit[len(prefix):]
 
-             return WIKIDATA_UNITS.get(wikidata_entity, unit)
 
-     return unit
 
- def area_to_str(area):
 
-     """parse {'unit': 'http://www.wikidata.org/entity/Q712226', 'amount': '+20.99'}"""
 
-     unit = unit_to_str(area.get('unit'))
 
-     if unit is not None:
 
-         try:
 
-             amount = float(area.get('amount'))
 
-             return '{} {}'.format(amount, unit)
 
-         except ValueError:
 
-             pass
 
-     return '{} {}'.format(area.get('amount', ''), area.get('unit', ''))
 
 
  |