annas_archive.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Anna's Archive
  4. """
  5. from typing import List, Dict, Any, Optional
  6. from urllib.parse import quote
  7. from lxml import html
  8. from searx.utils import extract_text, eval_xpath
  9. # about
  10. about: Dict[str, Any] = {
  11. "website": "https://annas-archive.org/",
  12. "wikidata_id": "Q115288326",
  13. "official_api_documentation": None,
  14. "use_official_api": False,
  15. "require_api_key": False,
  16. "results": "HTML",
  17. }
  18. # engine dependent config
  19. categories: List[str] = ["files"]
  20. paging: bool = False
  21. # search-url
  22. base_url: str = "https://annas-archive.org"
  23. # xpath queries
  24. xpath_results: str = '//main//a[starts-with(@href,"/md5")]'
  25. xpath_url: str = ".//@href"
  26. xpath_title: str = ".//h3/text()[1]"
  27. xpath_authors: str = './/div[contains(@class, "italic")]'
  28. xpath_publisher: str = './/div[contains(@class, "text-sm")]'
  29. xpath_file_info: str = './/div[contains(@class, "text-xs")]'
  30. def request(query, params: Dict[str, Any]) -> Dict[str, Any]:
  31. search_url: str = base_url + "/search?q={search_query}&lang={lang}"
  32. lang: str = ""
  33. if params["language"] != "all":
  34. lang = params["language"]
  35. params["url"] = search_url.format(search_query=quote(query), lang=lang)
  36. print(params)
  37. return params
  38. def response(resp) -> List[Dict[str, Optional[str]]]:
  39. results: List[Dict[str, Optional[str]]] = []
  40. dom = html.fromstring(resp.text)
  41. for item in dom.xpath(xpath_results):
  42. result: Dict[str, Optional[str]] = {}
  43. result["url"] = base_url + item.xpath(xpath_url)[0]
  44. result["title"] = extract_text(eval_xpath(item, xpath_title))
  45. result["content"] = "{publisher}. {authors}. {file_info}".format(
  46. authors=extract_text(eval_xpath(item, xpath_authors)),
  47. publisher=extract_text(eval_xpath(item, xpath_publisher)),
  48. file_info=extract_text(eval_xpath(item, xpath_file_info)),
  49. )
  50. results.append(result)
  51. return results