oa_doi_rewrite.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=missing-module-docstring
  3. from __future__ import annotations
  4. import typing
  5. import re
  6. from urllib.parse import parse_qsl
  7. from flask_babel import gettext
  8. from searx import get_setting
  9. from searx.plugins import Plugin, PluginInfo
  10. from searx.extended_types import sxng_request
  11. from ._core import log
  12. if typing.TYPE_CHECKING:
  13. from searx.search import SearchWithPlugins
  14. from searx.extended_types import SXNG_Request
  15. from searx.result_types import Result, LegacyResult
  16. from searx.plugins import PluginCfg
  17. ahmia_blacklist: list = []
  18. def filter_url_field(result: "Result|LegacyResult", field_name: str, url_src: str) -> bool | str:
  19. """Returns bool ``True`` to use URL unchanged (``False`` to ignore URL).
  20. If URL should be modified, the returned string is the new URL to use."""
  21. if field_name != "url":
  22. return True # use it unchanged
  23. doi = extract_doi(result.parsed_url)
  24. if doi and len(doi) < 50:
  25. for suffix in ("/", ".pdf", ".xml", "/full", "/meta", "/abstract"):
  26. doi = doi.removesuffix(suffix)
  27. new_url = get_doi_resolver() + doi
  28. if "doi" not in result:
  29. result["doi"] = doi
  30. log.debug("oa_doi_rewrite: [URL field: %s] %s -> %s", field_name, url_src, new_url)
  31. return new_url # use new url
  32. return True # use it unchanged
  33. class SXNGPlugin(Plugin):
  34. """Avoid paywalls by redirecting to open-access."""
  35. id = "oa_doi_rewrite"
  36. def __init__(self, plg_cfg: "PluginCfg") -> None:
  37. super().__init__(plg_cfg)
  38. self.info = PluginInfo(
  39. id=self.id,
  40. name=gettext("Open Access DOI rewrite"),
  41. description=gettext("Avoid paywalls by redirecting to open-access versions of publications when available"),
  42. preference_section="general",
  43. )
  44. def on_result(
  45. self,
  46. request: "SXNG_Request",
  47. search: "SearchWithPlugins",
  48. result: "Result",
  49. ) -> bool: # pylint: disable=unused-argument
  50. if result.parsed_url:
  51. result.filter_urls(filter_url_field)
  52. return True
  53. regex = re.compile(r'10\.\d{4,9}/[^\s]+')
  54. def extract_doi(url):
  55. m = regex.search(url.path)
  56. if m:
  57. return m.group(0)
  58. for _, v in parse_qsl(url.query):
  59. m = regex.search(v)
  60. if m:
  61. return m.group(0)
  62. return None
  63. def get_doi_resolver() -> str:
  64. doi_resolvers = get_setting("doi_resolvers")
  65. selected_resolver = sxng_request.preferences.get_value('doi_resolver')[0]
  66. if selected_resolver not in doi_resolvers:
  67. selected_resolver = get_setting("default_doi_resolver")
  68. return doi_resolvers[selected_resolver]