autocomplete.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """This module implements functions needed for the autocompleter.
  4. """
  5. # pylint: disable=use-dict-literal
  6. from json import loads
  7. from urllib.parse import urlencode
  8. from lxml import etree
  9. from httpx import HTTPError
  10. from searx import settings
  11. from searx.engines import engines
  12. from searx.network import get as http_get
  13. from searx.exceptions import SearxEngineResponseException
  14. # a fetch_supported_languages() for XPath engines isn't available right now
  15. # _brave = ENGINES_LANGUAGES['brave'].keys()
  16. def get(*args, **kwargs):
  17. if 'timeout' not in kwargs:
  18. kwargs['timeout'] = settings['outgoing']['request_timeout']
  19. kwargs['raise_for_httperror'] = True
  20. return http_get(*args, **kwargs)
  21. def brave(query, _lang):
  22. # brave search autocompleter
  23. url = 'https://search.brave.com/api/suggest?'
  24. url += urlencode({'q': query})
  25. country = 'all'
  26. # if lang in _brave:
  27. # country = lang
  28. kwargs = {'cookies': {'country': country}}
  29. resp = get(url, **kwargs)
  30. results = []
  31. if resp.ok:
  32. data = resp.json()
  33. for item in data[1]:
  34. results.append(item)
  35. return results
  36. def dbpedia(query, _lang):
  37. # dbpedia autocompleter, no HTTPS
  38. autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
  39. response = get(autocomplete_url + urlencode(dict(QueryString=query)))
  40. results = []
  41. if response.ok:
  42. dom = etree.fromstring(response.content)
  43. results = dom.xpath('//Result/Label//text()')
  44. return results
  45. def duckduckgo(query, sxng_locale):
  46. """Autocomplete from DuckDuckGo. Supports DuckDuckGo's languages"""
  47. traits = engines['duckduckgo'].traits
  48. args = {
  49. 'q': query,
  50. 'kl': traits.get_region(sxng_locale, traits.all_locale),
  51. }
  52. url = 'https://duckduckgo.com/ac/?type=list&' + urlencode(args)
  53. resp = get(url)
  54. ret_val = []
  55. if resp.ok:
  56. j = resp.json()
  57. if len(j) > 1:
  58. ret_val = j[1]
  59. return ret_val
  60. def google(query, lang):
  61. # google autocompleter
  62. autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
  63. response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
  64. results = []
  65. if response.ok:
  66. dom = etree.fromstring(response.text)
  67. results = dom.xpath('//suggestion/@data')
  68. return results
  69. def seznam(query, _lang):
  70. # seznam search autocompleter
  71. url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
  72. resp = get(
  73. url.format(
  74. query=urlencode(
  75. {'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
  76. )
  77. )
  78. )
  79. if not resp.ok:
  80. return []
  81. data = resp.json()
  82. return [
  83. ''.join([part.get('text', '') for part in item.get('text', [])])
  84. for item in data.get('result', [])
  85. if item.get('itemType', None) == 'ItemType.TEXT'
  86. ]
  87. def startpage(query, sxng_locale):
  88. """Autocomplete from Startpage. Supports Startpage's languages"""
  89. lui = engines['startpage'].traits.get_language(sxng_locale, 'english')
  90. url = 'https://startpage.com/suggestions?{query}'
  91. resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
  92. data = resp.json()
  93. return [e['text'] for e in data.get('suggestions', []) if 'text' in e]
  94. def swisscows(query, _lang):
  95. # swisscows autocompleter
  96. url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
  97. resp = loads(get(url.format(query=urlencode({'query': query}))).text)
  98. return resp
  99. def qwant(query, sxng_locale):
  100. """Autocomplete from Qwant. Supports Qwant's regions."""
  101. results = []
  102. locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
  103. url = 'https://api.qwant.com/v3/suggest?{query}'
  104. resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
  105. if resp.ok:
  106. data = resp.json()
  107. if data['status'] == 'success':
  108. for item in data['data']['items']:
  109. results.append(item['value'])
  110. return results
  111. def wikipedia(query, sxng_locale):
  112. """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
  113. results = []
  114. eng_traits = engines['wikipedia'].traits
  115. wiki_lang = eng_traits.get_language(sxng_locale, 'en')
  116. wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org')
  117. url = 'https://{wiki_netloc}/w/api.php?{args}'
  118. args = urlencode(
  119. {
  120. 'action': 'opensearch',
  121. 'format': 'json',
  122. 'formatversion': '2',
  123. 'search': query,
  124. 'namespace': '0',
  125. 'limit': '10',
  126. }
  127. )
  128. resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
  129. if resp.ok:
  130. data = resp.json()
  131. if len(data) > 1:
  132. results = data[1]
  133. return results
  134. def yandex(query, _lang):
  135. # yandex autocompleter
  136. url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
  137. resp = loads(get(url.format(urlencode(dict(part=query)))).text)
  138. if len(resp) > 1:
  139. return resp[1]
  140. return []
  141. backends = {
  142. 'dbpedia': dbpedia,
  143. 'duckduckgo': duckduckgo,
  144. 'google': google,
  145. 'seznam': seznam,
  146. 'startpage': startpage,
  147. 'swisscows': swisscows,
  148. 'qwant': qwant,
  149. 'wikipedia': wikipedia,
  150. 'brave': brave,
  151. 'yandex': yandex,
  152. }
  153. def search_autocomplete(backend_name, query, sxng_locale):
  154. backend = backends.get(backend_name)
  155. if backend is None:
  156. return []
  157. if engines[backend_name].traits.data_type != "traits_v1":
  158. # vintage / deprecated
  159. if not sxng_locale or sxng_locale == 'all':
  160. sxng_locale = 'en'
  161. else:
  162. sxng_locale = sxng_locale.split('-')[0]
  163. try:
  164. return backend(query, sxng_locale)
  165. except (HTTPError, SearxEngineResponseException):
  166. return []