mullvad_leta.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """This is the implementation of the Mullvad-Leta meta-search engine.
  3. This engine **REQUIRES** that searxng operate within a Mullvad VPN
  4. If using docker, consider using gluetun for easily connecting to the Mullvad
  5. - https://github.com/qdm12/gluetun
  6. Otherwise, follow instructions provided by Mullvad for enabling the VPN on Linux
  7. - https://mullvad.net/en/help/install-mullvad-app-linux
  8. .. hint::
  9. The :py:obj:`EngineTraits` is empty by default. Maintainers have to run
  10. ``make data.traits`` (in the Mullvad VPN / :py:obj:`fetch_traits`) and rebase
  11. the modified JSON file ``searx/data/engine_traits.json`` on every single
  12. update of SearXNG!
  13. """
  14. from typing import TYPE_CHECKING
  15. from httpx import Response
  16. from lxml import html
  17. from searx.enginelib.traits import EngineTraits
  18. from searx.locales import region_tag, get_official_locales
  19. from searx.utils import eval_xpath, extract_text, eval_xpath_list
  20. from searx.exceptions import SearxEngineResponseException
  21. if TYPE_CHECKING:
  22. import logging
  23. logger = logging.getLogger()
  24. traits: EngineTraits
  25. use_cache: bool = True # non-cache use only has 100 searches per day!
  26. search_url = "https://leta.mullvad.net"
  27. # about
  28. about = {
  29. "website": search_url,
  30. "wikidata_id": 'Q47008412', # the Mullvad id - not leta, but related
  31. "official_api_documentation": 'https://leta.mullvad.net/faq',
  32. "use_official_api": False,
  33. "require_api_key": False,
  34. "results": 'HTML',
  35. }
  36. # engine dependent config
  37. categories = ['general', 'web']
  38. paging = True
  39. max_page = 50
  40. time_range_support = True
  41. time_range_dict = {
  42. "day": "d1",
  43. "week": "w1",
  44. "month": "m1",
  45. "year": "y1",
  46. }
  47. def is_vpn_connected(dom: html.HtmlElement) -> bool:
  48. """Returns true if the VPN is connected, False otherwise"""
  49. connected_text = extract_text(eval_xpath(dom, '//main/div/p[1]'))
  50. return connected_text != 'You are not connected to Mullvad VPN.'
  51. def assign_headers(headers: dict) -> dict:
  52. """Assigns the headers to make a request to Mullvad Leta"""
  53. headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
  54. headers['Content-Type'] = "application/x-www-form-urlencoded"
  55. headers['Host'] = "leta.mullvad.net"
  56. headers['Origin'] = "https://leta.mullvad.net"
  57. return headers
  58. def request(query: str, params: dict):
  59. country = traits.get_region(params.get('searxng_locale', 'all'), traits.all_locale) # type: ignore
  60. params['url'] = search_url
  61. params['method'] = 'POST'
  62. params['data'] = {
  63. "q": query,
  64. "gl": country if country is str else '',
  65. }
  66. # pylint: disable=undefined-variable
  67. if use_cache:
  68. params['data']['oc'] = "on"
  69. # pylint: enable=undefined-variable
  70. if params['time_range'] in time_range_dict:
  71. params['dateRestrict'] = time_range_dict[params['time_range']]
  72. else:
  73. params['dateRestrict'] = ''
  74. if params['pageno'] > 1:
  75. # Page 1 is n/a, Page 2 is 11, page 3 is 21, ...
  76. params['data']['start'] = ''.join([str(params['pageno'] - 1), "1"])
  77. if params['headers'] is None:
  78. params['headers'] = {}
  79. assign_headers(params['headers'])
  80. return params
  81. def extract_result(dom_result: html.HtmlElement):
  82. [a_elem, h3_elem, p_elem] = eval_xpath_list(dom_result, 'div/div/*')
  83. return {
  84. 'url': extract_text(a_elem.text),
  85. 'title': extract_text(h3_elem),
  86. 'content': extract_text(p_elem),
  87. }
  88. def response(resp: Response):
  89. """Checks if connected to Mullvad VPN, then extracts the search results from
  90. the DOM resp: requests response object"""
  91. dom = html.fromstring(resp.text)
  92. if not is_vpn_connected(dom):
  93. raise SearxEngineResponseException('Not connected to Mullvad VPN')
  94. search_results = eval_xpath(dom.body, '//main/div[2]/div')
  95. return [extract_result(sr) for sr in search_results]
  96. def fetch_traits(engine_traits: EngineTraits):
  97. """Fetch languages and regions from Mullvad-Leta
  98. .. warning::
  99. Fetching the engine traits also requires a Mullvad VPN connection. If
  100. not connected, then an error message will print and no traits will be
  101. updated.
  102. """
  103. # pylint: disable=import-outside-toplevel
  104. # see https://github.com/searxng/searxng/issues/762
  105. from searx.network import post as http_post
  106. # pylint: enable=import-outside-toplevel
  107. resp = http_post(search_url, headers=assign_headers({}))
  108. if not isinstance(resp, Response):
  109. print("ERROR: failed to get response from mullvad-leta. Are you connected to the VPN?")
  110. return
  111. if not resp.ok:
  112. print("ERROR: response from mullvad-leta is not OK. Are you connected to the VPN?")
  113. return
  114. dom = html.fromstring(resp.text)
  115. if not is_vpn_connected(dom):
  116. print('ERROR: Not connected to Mullvad VPN')
  117. return
  118. # supported region codes
  119. options = eval_xpath_list(dom.body, '//main/div/form/div[2]/div/select[1]/option')
  120. if options is None or len(options) <= 0:
  121. print('ERROR: could not find any results. Are you connected to the VPN?')
  122. for x in options:
  123. eng_country = x.get("value")
  124. sxng_locales = get_official_locales(eng_country, engine_traits.languages.keys(), regional=True)
  125. if not sxng_locales:
  126. print(
  127. "ERROR: can't map from Mullvad-Leta country %s (%s) to a babel region."
  128. % (x.get('data-name'), eng_country)
  129. )
  130. continue
  131. for sxng_locale in sxng_locales:
  132. engine_traits.regions[region_tag(sxng_locale)] = eng_country