baidu.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Baidu_
  3. .. _Baidu: https://www.baidu.com
  4. """
  5. # There exits a https://github.com/ohblue/baidu-serp-api/
  6. # but we don't use it here (may we can learn from).
  7. from urllib.parse import urlencode
  8. from datetime import datetime
  9. from searx.exceptions import SearxEngineAPIException
  10. about = {
  11. "website": "https://www.baidu.com",
  12. "wikidata_id": "Q14772",
  13. "official_api_documentation": None,
  14. "use_official_api": False,
  15. "require_api_key": False,
  16. "results": "JSON",
  17. }
  18. paging = True
  19. categories = ["general"]
  20. base_url = "https://www.baidu.com/s"
  21. results_per_page = 10
  22. def request(query, params):
  23. keyword = query.strip()
  24. query_params = {
  25. "wd": keyword,
  26. "rn": results_per_page,
  27. "pn": (params["pageno"] - 1) * results_per_page,
  28. "tn": "json",
  29. }
  30. params["url"] = f"{base_url}?{urlencode(query_params)}"
  31. return params
  32. def response(resp):
  33. try:
  34. data = resp.json()
  35. except Exception as e:
  36. raise SearxEngineAPIException(f"Invalid response: {e}") from e
  37. results = []
  38. if "feed" not in data or "entry" not in data["feed"]:
  39. raise SearxEngineAPIException("Invalid response")
  40. for entry in data["feed"]["entry"]:
  41. if not entry.get("title") or not entry.get("url"):
  42. continue
  43. published_date = None
  44. if entry.get("time"):
  45. try:
  46. published_date = datetime.fromtimestamp(entry["time"])
  47. except (ValueError, TypeError):
  48. published_date = None
  49. results.append(
  50. {
  51. "title": entry["title"],
  52. "url": entry["url"],
  53. "content": entry.get("abs", ""),
  54. "publishedDate": published_date,
  55. # "source": entry.get('source')
  56. }
  57. )
  58. return results