proxy.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Implementations for a favicon proxy"""
  3. from __future__ import annotations
  4. from typing import Callable
  5. import importlib
  6. import base64
  7. import pathlib
  8. import urllib.parse
  9. import flask
  10. from httpx import HTTPError
  11. from pydantic import BaseModel
  12. from searx import get_setting
  13. from searx.webutils import new_hmac, is_hmac_of
  14. from searx.exceptions import SearxEngineResponseException
  15. from .resolvers import DEFAULT_RESOLVER_MAP
  16. from . import cache
  17. DEFAULT_FAVICON_URL = {}
  18. CFG: FaviconProxyConfig = None # type: ignore
  19. def init(cfg: FaviconProxyConfig):
  20. global CFG # pylint: disable=global-statement
  21. CFG = cfg
  22. def _initial_resolver_map():
  23. d = {}
  24. name: str = get_setting("search.favicon_resolver", None) # type: ignore
  25. if name:
  26. func = DEFAULT_RESOLVER_MAP.get(name)
  27. if func:
  28. d = {name: f"searx.favicons.resolvers.{func.__name__}"}
  29. return d
  30. class FaviconProxyConfig(BaseModel):
  31. """Configuration of the favicon proxy."""
  32. max_age: int = 60 * 60 * 24 * 7 # seven days
  33. """HTTP header Cache-Control_ ``max-age``
  34. .. _Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
  35. """
  36. secret_key: str = get_setting("server.secret_key") # type: ignore
  37. """By default, the value from :ref:`server.secret_key <settings server>`
  38. setting is used."""
  39. resolver_timeout: int = get_setting("outgoing.request_timeout") # type: ignore
  40. """Timeout which the resolvers should not exceed, is usually passed to the
  41. outgoing request of the resolver. By default, the value from
  42. :ref:`outgoing.request_timeout <settings outgoing>` setting is used."""
  43. resolver_map: dict[str, str] = _initial_resolver_map()
  44. """The resolver_map is a key / value dictionary where the key is the name of
  45. the resolver and the value is the fully qualifying name (fqn) of resolver's
  46. function (the callable). The resolvers from the python module
  47. :py:obj:`searx.favicons.resolver` are available by default."""
  48. def get_resolver(self, name: str) -> Callable | None:
  49. """Returns the callable object (function) of the resolver with the
  50. ``name``. If no resolver is registered for the ``name``, ``None`` is
  51. returned.
  52. """
  53. fqn = self.resolver_map.get(name)
  54. if fqn is None:
  55. return None
  56. mod_name, _, func_name = fqn.rpartition('.')
  57. mod = importlib.import_module(mod_name)
  58. func = getattr(mod, func_name)
  59. if func is None:
  60. raise ValueError(f"resolver {fqn} is not implemented")
  61. return func
  62. favicon_path: str = get_setting("ui.static_path") + "/themes/{theme}/img/empty_favicon.svg" # type: ignore
  63. favicon_mime_type: str = "image/svg+xml"
  64. def favicon(self, **replacements):
  65. """Returns pathname and mimetype of the default favicon."""
  66. return (
  67. pathlib.Path(self.favicon_path.format(**replacements)),
  68. self.favicon_mime_type,
  69. )
  70. def favicon_data_url(self, **replacements):
  71. """Returns data image URL of the default favicon."""
  72. cache_key = ", ".join(f"{x}:{replacements[x]}" for x in sorted(list(replacements.keys()), key=str))
  73. data_url = DEFAULT_FAVICON_URL.get(cache_key)
  74. if data_url is not None:
  75. return data_url
  76. fav, mimetype = CFG.favicon(**replacements)
  77. # hint: encoding utf-8 limits favicons to be a SVG image
  78. with fav.open("r", encoding="utf-8") as f:
  79. data_url = f.read()
  80. data_url = urllib.parse.quote(data_url)
  81. data_url = f"data:{mimetype};utf8,{data_url}"
  82. DEFAULT_FAVICON_URL[cache_key] = data_url
  83. return data_url
  84. def favicon_proxy():
  85. """REST API of SearXNG's favicon proxy service
  86. ::
  87. /favicon_proxy?authority=<...>&h=<...>
  88. ``authority``:
  89. Domain name :rfc:`3986` / see :py:obj:`favicon_url`
  90. ``h``:
  91. HMAC :rfc:`2104`, build up from the :ref:`server.secret_key <settings
  92. server>` setting.
  93. """
  94. authority = flask.request.args.get('authority')
  95. # malformed request or RFC 3986 authority
  96. if not authority or "/" in authority:
  97. return '', 400
  98. # malformed request / does not have authorisation
  99. if not is_hmac_of(
  100. CFG.secret_key,
  101. authority.encode(),
  102. flask.request.args.get('h', ''),
  103. ):
  104. return '', 400
  105. resolver = flask.request.preferences.get_value('favicon_resolver') # type: ignore
  106. # if resolver is empty or not valid, just return HTTP 400.
  107. if not resolver or resolver not in CFG.resolver_map.keys():
  108. return "", 400
  109. data, mime = search_favicon(resolver, authority)
  110. if data is not None and mime is not None:
  111. resp = flask.Response(data, mimetype=mime) # type: ignore
  112. resp.headers['Cache-Control'] = f"max-age={CFG.max_age}"
  113. return resp
  114. # return default favicon from static path
  115. theme = flask.request.preferences.get_value("theme") # type: ignore
  116. fav, mimetype = CFG.favicon(theme=theme)
  117. return flask.send_from_directory(fav.parent, fav.name, mimetype=mimetype)
  118. def search_favicon(resolver: str, authority: str) -> tuple[None | bytes, None | str]:
  119. """Sends the request to the favicon resolver and returns a tuple for the
  120. favicon. The tuple consists of ``(data, mime)``, if the resolver has not
  121. determined a favicon, both values are ``None``.
  122. ``data``:
  123. Binary data of the favicon.
  124. ``mime``:
  125. Mime type of the favicon.
  126. """
  127. data, mime = (None, None)
  128. func = CFG.get_resolver(resolver)
  129. if func is None:
  130. return data, mime
  131. # to avoid superfluous requests to the resolver, first look in the cache
  132. data_mime = cache.CACHE(resolver, authority)
  133. if data_mime is not None:
  134. return data_mime
  135. try:
  136. data, mime = func(authority, timeout=CFG.resolver_timeout)
  137. if data is None or mime is None:
  138. data, mime = (None, None)
  139. except (HTTPError, SearxEngineResponseException):
  140. pass
  141. cache.CACHE.set(resolver, authority, mime, data)
  142. return data, mime
  143. def favicon_url(authority: str) -> str:
  144. """Function to generate the image URL used for favicons in SearXNG's result
  145. lists. The ``authority`` argument (aka netloc / :rfc:`3986`) is usually a
  146. (sub-) domain name. This function is used in the HTML (jinja) templates.
  147. .. code:: html
  148. <div class="favicon">
  149. <img src="{{ favicon_url(result.parsed_url.netloc) }}">
  150. </div>
  151. The returned URL is a route to :py:obj:`favicon_proxy` REST API.
  152. If the favicon is already in the cache, the returned URL is a `data URL`_
  153. (something like ``data:image/png;base64,...``). By generating a data url from
  154. the :py:obj:`.cache.FaviconCache`, additional HTTP roundtripps via the
  155. :py:obj:`favicon_proxy` are saved. However, it must also be borne in mind
  156. that data urls are not cached in the client (web browser).
  157. .. _data URL: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs
  158. """
  159. resolver = flask.request.preferences.get_value('favicon_resolver') # type: ignore
  160. # if resolver is empty or not valid, just return nothing.
  161. if not resolver or resolver not in CFG.resolver_map.keys():
  162. return ""
  163. data_mime = cache.CACHE(resolver, authority)
  164. if data_mime == (None, None):
  165. # we have already checked, the resolver does not have a favicon
  166. theme = flask.request.preferences.get_value("theme") # type: ignore
  167. return CFG.favicon_data_url(theme=theme)
  168. if data_mime is not None:
  169. data, mime = data_mime
  170. return f"data:{mime};base64,{str(base64.b64encode(data), 'utf-8')}" # type: ignore
  171. h = new_hmac(CFG.secret_key, authority.encode())
  172. proxy_url = flask.url_for('favicon_proxy')
  173. query = urllib.parse.urlencode({"authority": authority, "h": h})
  174. return f"{proxy_url}?{query}"