autocomplete.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """This module implements functions needed for the autocompleter.
  4. """
  5. # pylint: disable=use-dict-literal
  6. from json import loads
  7. from urllib.parse import urlencode
  8. from lxml import etree
  9. from httpx import HTTPError
  10. from searx import settings
  11. from searx.data import ENGINES_LANGUAGES
  12. from searx.network import get as http_get
  13. from searx.exceptions import SearxEngineResponseException
  14. # a fetch_supported_languages() for XPath engines isn't available right now
  15. # _brave = ENGINES_LANGUAGES['brave'].keys()
  16. def get(*args, **kwargs):
  17. if 'timeout' not in kwargs:
  18. kwargs['timeout'] = settings['outgoing']['request_timeout']
  19. kwargs['raise_for_httperror'] = True
  20. return http_get(*args, **kwargs)
  21. def brave(query, _lang):
  22. # brave search autocompleter
  23. url = 'https://search.brave.com/api/suggest?'
  24. url += urlencode({'q': query})
  25. country = 'all'
  26. # if lang in _brave:
  27. # country = lang
  28. kwargs = {'cookies': {'country': country}}
  29. resp = get(url, **kwargs)
  30. results = []
  31. if resp.ok:
  32. data = resp.json()
  33. for item in data[1]:
  34. results.append(item)
  35. return results
  36. def dbpedia(query, _lang):
  37. # dbpedia autocompleter, no HTTPS
  38. autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
  39. response = get(autocomplete_url + urlencode(dict(QueryString=query)))
  40. results = []
  41. if response.ok:
  42. dom = etree.fromstring(response.content)
  43. results = dom.xpath('//Result/Label//text()')
  44. return results
  45. def duckduckgo(query, _lang):
  46. # duckduckgo autocompleter
  47. url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
  48. resp = loads(get(url.format(urlencode(dict(q=query)))).text)
  49. if len(resp) > 1:
  50. return resp[1]
  51. return []
  52. def google(query, lang):
  53. # google autocompleter
  54. autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
  55. response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
  56. results = []
  57. if response.ok:
  58. dom = etree.fromstring(response.text)
  59. results = dom.xpath('//suggestion/@data')
  60. return results
  61. def seznam(query, _lang):
  62. # seznam search autocompleter
  63. url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
  64. resp = get(
  65. url.format(
  66. query=urlencode(
  67. {'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
  68. )
  69. )
  70. )
  71. if not resp.ok:
  72. return []
  73. data = resp.json()
  74. return [
  75. ''.join([part.get('text', '') for part in item.get('text', [])])
  76. for item in data.get('result', [])
  77. if item.get('itemType', None) == 'ItemType.TEXT'
  78. ]
  79. def startpage(query, lang):
  80. # startpage autocompleter
  81. lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english')
  82. url = 'https://startpage.com/suggestions?{query}'
  83. resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
  84. data = resp.json()
  85. return [e['text'] for e in data.get('suggestions', []) if 'text' in e]
  86. def swisscows(query, _lang):
  87. # swisscows autocompleter
  88. url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
  89. resp = loads(get(url.format(query=urlencode({'query': query}))).text)
  90. return resp
  91. def qwant(query, lang):
  92. # qwant autocompleter (additional parameter : lang=en_en&count=xxx )
  93. url = 'https://api.qwant.com/api/suggest?{query}'
  94. resp = get(url.format(query=urlencode({'q': query, 'lang': lang})))
  95. results = []
  96. if resp.ok:
  97. data = loads(resp.text)
  98. if data['status'] == 'success':
  99. for item in data['data']['items']:
  100. results.append(item['value'])
  101. return results
  102. def wikipedia(query, lang):
  103. # wikipedia autocompleter
  104. url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json'
  105. resp = loads(get(url.format(urlencode(dict(search=query)))).text)
  106. if len(resp) > 1:
  107. return resp[1]
  108. return []
  109. def yandex(query, _lang):
  110. # yandex autocompleter
  111. url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
  112. resp = loads(get(url.format(urlencode(dict(part=query)))).text)
  113. if len(resp) > 1:
  114. return resp[1]
  115. return []
  116. backends = {
  117. 'dbpedia': dbpedia,
  118. 'duckduckgo': duckduckgo,
  119. 'google': google,
  120. 'seznam': seznam,
  121. 'startpage': startpage,
  122. 'swisscows': swisscows,
  123. 'qwant': qwant,
  124. 'wikipedia': wikipedia,
  125. 'brave': brave,
  126. 'yandex': yandex,
  127. }
  128. def search_autocomplete(backend_name, query, lang):
  129. backend = backends.get(backend_name)
  130. if backend is None:
  131. return []
  132. try:
  133. return backend(query, lang)
  134. except (HTTPError, SearxEngineResponseException):
  135. return []