presearch.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Presearch (general, images, videos, news)
  4. """
  5. from urllib.parse import urlencode
  6. from searx.network import get
  7. from searx.utils import gen_useragent, html_to_text
  8. about = {
  9. "website": "https://presearch.io",
  10. "wikidiata_id": "Q7240905",
  11. "official_api_documentation": "https://docs.presearch.io/nodes/api",
  12. "use_official_api": False,
  13. "require_api_key": False,
  14. "results": "JSON",
  15. }
  16. paging = True
  17. time_range_support = True
  18. categories = ["general", "web"] # general, images, videos, news
  19. search_type = "search" # must be any of "search", "images", "videos", "news"
  20. base_url = "https://presearch.com"
  21. safesearch_map = {0: 'false', 1: 'true', 2: 'true'}
  22. def _get_request_id(query, page, time_range, safesearch):
  23. args = {
  24. "q": query,
  25. "page": page,
  26. }
  27. if time_range:
  28. args["time_range"] = time_range
  29. url = f"{base_url}/{search_type}?{urlencode(args)}"
  30. headers = {
  31. 'User-Agent': gen_useragent(),
  32. 'Cookie': f"b=1;presearch_session=;use_safe_search={safesearch_map[safesearch]}",
  33. }
  34. resp_text = get(url, headers=headers).text
  35. for line in resp_text.split("\n"):
  36. if "window.searchId = " in line:
  37. return line.split("= ")[1][:-1].replace('"', "")
  38. return None
  39. def _is_valid_img_src(url):
  40. # in some cases, the image url is a base64 encoded string, which has to be skipped
  41. return "https://" in url
  42. def request(query, params):
  43. request_id = _get_request_id(query, params["pageno"], params["time_range"], params["safesearch"])
  44. params["headers"]["Accept"] = "application/json"
  45. params["url"] = f"{base_url}/results?id={request_id}"
  46. return params
  47. def response(resp):
  48. results = []
  49. json = resp.json()
  50. json_results = []
  51. if search_type == "search":
  52. json_results = json['results'].get('standardResults', [])
  53. else:
  54. json_results = json.get(search_type, [])
  55. for json_result in json_results:
  56. result = {
  57. 'url': json_result['link'],
  58. 'title': json_result['title'],
  59. 'content': html_to_text(json_result.get('description', '')),
  60. }
  61. if search_type == "images":
  62. result['template'] = 'images.html'
  63. if not _is_valid_img_src(json_result['image']):
  64. continue
  65. result['img_src'] = json_result['image']
  66. if _is_valid_img_src(json_result['thumbnail']):
  67. result['thumbnail'] = json_result['thumbnail']
  68. elif search_type == "videos":
  69. result['template'] = 'videos.html'
  70. if _is_valid_img_src(json_result['image']):
  71. result['thumbnail'] = json_result['image']
  72. result['duration'] = json_result['duration']
  73. result['length'] = json_result['duration']
  74. results.append(result)
  75. return results