autocomplete.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """This module implements functions needed for the autocompleter.
  4. """
  5. # pylint: disable=use-dict-literal
  6. from json import loads
  7. from urllib.parse import urlencode
  8. from lxml import etree
  9. from httpx import HTTPError
  10. from searx import settings
  11. from searx.engines import engines
  12. from searx.network import get as http_get
  13. from searx.exceptions import SearxEngineResponseException
  14. # a fetch_supported_languages() for XPath engines isn't available right now
  15. # _brave = ENGINES_LANGUAGES['brave'].keys()
  16. def get(*args, **kwargs):
  17. if 'timeout' not in kwargs:
  18. kwargs['timeout'] = settings['outgoing']['request_timeout']
  19. kwargs['raise_for_httperror'] = True
  20. return http_get(*args, **kwargs)
  21. def brave(query, _lang):
  22. # brave search autocompleter
  23. url = 'https://search.brave.com/api/suggest?'
  24. url += urlencode({'q': query})
  25. country = 'all'
  26. # if lang in _brave:
  27. # country = lang
  28. kwargs = {'cookies': {'country': country}}
  29. resp = get(url, **kwargs)
  30. results = []
  31. if resp.ok:
  32. data = resp.json()
  33. for item in data[1]:
  34. results.append(item)
  35. return results
  36. def dbpedia(query, _lang):
  37. # dbpedia autocompleter, no HTTPS
  38. autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
  39. response = get(autocomplete_url + urlencode(dict(QueryString=query)))
  40. results = []
  41. if response.ok:
  42. dom = etree.fromstring(response.content)
  43. results = dom.xpath('//Result/Label//text()')
  44. return results
  45. def duckduckgo(query, _lang):
  46. # duckduckgo autocompleter
  47. url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
  48. resp = loads(get(url.format(urlencode(dict(q=query)))).text)
  49. if len(resp) > 1:
  50. return resp[1]
  51. return []
  52. def google(query, lang):
  53. # google autocompleter
  54. autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
  55. response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
  56. results = []
  57. if response.ok:
  58. dom = etree.fromstring(response.text)
  59. results = dom.xpath('//suggestion/@data')
  60. return results
  61. def seznam(query, _lang):
  62. # seznam search autocompleter
  63. url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
  64. resp = get(
  65. url.format(
  66. query=urlencode(
  67. {'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
  68. )
  69. )
  70. )
  71. if not resp.ok:
  72. return []
  73. data = resp.json()
  74. return [
  75. ''.join([part.get('text', '') for part in item.get('text', [])])
  76. for item in data.get('result', [])
  77. if item.get('itemType', None) == 'ItemType.TEXT'
  78. ]
  79. def startpage(query, lang):
  80. # startpage autocompleter
  81. lui = engines['startpage'].supported_languages.get(lang, 'english') # vintage / deprecated
  82. url = 'https://startpage.com/suggestions?{query}'
  83. resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
  84. data = resp.json()
  85. return [e['text'] for e in data.get('suggestions', []) if 'text' in e]
  86. def swisscows(query, _lang):
  87. # swisscows autocompleter
  88. url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
  89. resp = loads(get(url.format(query=urlencode({'query': query}))).text)
  90. return resp
  91. def qwant(query, sxng_locale):
  92. """Autocomplete from Qwant. Supports Qwant's regions."""
  93. results = []
  94. locale = engines['qwant'].traits.get_region(sxng_locale, 'en_US')
  95. url = 'https://api.qwant.com/v3/suggest?{query}'
  96. resp = get(url.format(query=urlencode({'q': query, 'locale': locale, 'version': '2'})))
  97. if resp.ok:
  98. data = resp.json()
  99. if data['status'] == 'success':
  100. for item in data['data']['items']:
  101. results.append(item['value'])
  102. return results
  103. def wikipedia(query, sxng_locale):
  104. """Autocomplete from Wikipedia. Supports Wikipedia's languages (aka netloc)."""
  105. results = []
  106. eng_traits = engines['wikipedia'].traits
  107. wiki_lang = eng_traits.get_language(sxng_locale, 'en')
  108. wiki_netloc = eng_traits.custom['wiki_netloc'].get(wiki_lang, 'en.wikipedia.org')
  109. url = 'https://{wiki_netloc}/w/api.php?{args}'
  110. args = urlencode(
  111. {
  112. 'action': 'opensearch',
  113. 'format': 'json',
  114. 'formatversion': '2',
  115. 'search': query,
  116. 'namespace': '0',
  117. 'limit': '10',
  118. }
  119. )
  120. resp = get(url.format(args=args, wiki_netloc=wiki_netloc))
  121. if resp.ok:
  122. data = resp.json()
  123. if len(data) > 1:
  124. results = data[1]
  125. return results
  126. def yandex(query, _lang):
  127. # yandex autocompleter
  128. url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
  129. resp = loads(get(url.format(urlencode(dict(part=query)))).text)
  130. if len(resp) > 1:
  131. return resp[1]
  132. return []
  133. backends = {
  134. 'dbpedia': dbpedia,
  135. 'duckduckgo': duckduckgo,
  136. 'google': google,
  137. 'seznam': seznam,
  138. 'startpage': startpage,
  139. 'swisscows': swisscows,
  140. 'qwant': qwant,
  141. 'wikipedia': wikipedia,
  142. 'brave': brave,
  143. 'yandex': yandex,
  144. }
  145. def search_autocomplete(backend_name, query, sxng_locale):
  146. backend = backends.get(backend_name)
  147. if backend is None:
  148. return []
  149. if engines[backend_name].traits.data_type != "traits_v1":
  150. # vintage / deprecated
  151. if not sxng_locale or sxng_locale == 'all':
  152. sxng_locale = 'en'
  153. else:
  154. sxng_locale = sxng_locale.split('-')[0]
  155. try:
  156. return backend(query, sxng_locale)
  157. except (HTTPError, SearxEngineResponseException):
  158. return []