autocomplete.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """This module implements functions needed for the autocompleter.
  4. """
  5. from json import loads
  6. from urllib.parse import urlencode
  7. from lxml import etree
  8. from httpx import HTTPError
  9. from searx import settings
  10. from searx.data import ENGINES_LANGUAGES
  11. from searx.network import get as http_get
  12. from searx.exceptions import SearxEngineResponseException
  13. # a fetch_supported_languages() for XPath engines isn't available right now
  14. # _brave = ENGINES_LANGUAGES['brave'].keys()
  15. def get(*args, **kwargs):
  16. if 'timeout' not in kwargs:
  17. kwargs['timeout'] = settings['outgoing']['request_timeout']
  18. kwargs['raise_for_httperror'] = True
  19. return http_get(*args, **kwargs)
  20. def brave(query, _lang):
  21. # brave search autocompleter
  22. url = 'https://search.brave.com/api/suggest?'
  23. url += urlencode({'q': query})
  24. country = 'all'
  25. # if lang in _brave:
  26. # country = lang
  27. kwargs = {'cookies': {'country': country}}
  28. resp = get(url, **kwargs)
  29. results = []
  30. if resp.ok:
  31. data = resp.json()
  32. for item in data[1]:
  33. results.append(item)
  34. return results
  35. def dbpedia(query, _lang):
  36. # dbpedia autocompleter, no HTTPS
  37. autocomplete_url = 'https://lookup.dbpedia.org/api/search.asmx/KeywordSearch?'
  38. response = get(autocomplete_url + urlencode(dict(QueryString=query)))
  39. results = []
  40. if response.ok:
  41. dom = etree.fromstring(response.content)
  42. results = dom.xpath('//Result/Label//text()')
  43. return results
  44. def duckduckgo(query, _lang):
  45. # duckduckgo autocompleter
  46. url = 'https://ac.duckduckgo.com/ac/?{0}&type=list'
  47. resp = loads(get(url.format(urlencode(dict(q=query)))).text)
  48. if len(resp) > 1:
  49. return resp[1]
  50. return []
  51. def google(query, lang):
  52. # google autocompleter
  53. autocomplete_url = 'https://suggestqueries.google.com/complete/search?client=toolbar&'
  54. response = get(autocomplete_url + urlencode(dict(hl=lang, q=query)))
  55. results = []
  56. if response.ok:
  57. dom = etree.fromstring(response.text)
  58. results = dom.xpath('//suggestion/@data')
  59. return results
  60. def seznam(query, _lang):
  61. # seznam search autocompleter
  62. url = 'https://suggest.seznam.cz/fulltext/cs?{query}'
  63. resp = get(
  64. url.format(
  65. query=urlencode(
  66. {'phrase': query, 'cursorPosition': len(query), 'format': 'json-2', 'highlight': '1', 'count': '6'}
  67. )
  68. )
  69. )
  70. if not resp.ok:
  71. return []
  72. data = resp.json()
  73. return [
  74. ''.join([part.get('text', '') for part in item.get('text', [])])
  75. for item in data.get('result', [])
  76. if item.get('itemType', None) == 'ItemType.TEXT'
  77. ]
  78. def startpage(query, lang):
  79. # startpage autocompleter
  80. lui = ENGINES_LANGUAGES['startpage'].get(lang, 'english')
  81. url = 'https://startpage.com/suggestions?{query}'
  82. resp = get(url.format(query=urlencode({'q': query, 'segment': 'startpage.udog', 'lui': lui})))
  83. data = resp.json()
  84. return [e['text'] for e in data.get('suggestions', []) if 'text' in e]
  85. def swisscows(query, _lang):
  86. # swisscows autocompleter
  87. url = 'https://swisscows.ch/api/suggest?{query}&itemsCount=5'
  88. resp = loads(get(url.format(query=urlencode({'query': query}))).text)
  89. return resp
  90. def qwant(query, lang):
  91. # qwant autocompleter (additional parameter : lang=en_en&count=xxx )
  92. url = 'https://api.qwant.com/api/suggest?{query}'
  93. resp = get(url.format(query=urlencode({'q': query, 'lang': lang})))
  94. results = []
  95. if resp.ok:
  96. data = loads(resp.text)
  97. if data['status'] == 'success':
  98. for item in data['data']['items']:
  99. results.append(item['value'])
  100. return results
  101. def wikipedia(query, lang):
  102. # wikipedia autocompleter
  103. url = 'https://' + lang + '.wikipedia.org/w/api.php?action=opensearch&{0}&limit=10&namespace=0&format=json'
  104. resp = loads(get(url.format(urlencode(dict(search=query)))).text)
  105. if len(resp) > 1:
  106. return resp[1]
  107. return []
  108. def yandex(query, _lang):
  109. # yandex autocompleter
  110. url = "https://suggest.yandex.com/suggest-ff.cgi?{0}"
  111. resp = loads(get(url.format(urlencode(dict(part=query)))).text)
  112. print(resp)
  113. if len(resp) > 1:
  114. return resp[1]
  115. return []
  116. backends = {
  117. 'dbpedia': dbpedia,
  118. 'duckduckgo': duckduckgo,
  119. 'google': google,
  120. 'seznam': seznam,
  121. 'startpage': startpage,
  122. 'swisscows': swisscows,
  123. 'qwant': qwant,
  124. 'wikipedia': wikipedia,
  125. 'brave': brave,
  126. 'yandex': yandex,
  127. }
  128. def search_autocomplete(backend_name, query, lang):
  129. backend = backends.get(backend_name)
  130. if backend is None:
  131. return []
  132. try:
  133. return backend(query, lang)
  134. except (HTTPError, SearxEngineResponseException):
  135. return []