baidu.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Baidu_
  3. .. _Baidu: https://www.baidu.com
  4. """
  5. # There exits a https://github.com/ohblue/baidu-serp-api/
  6. # but we don't use it here (may we can learn from).
  7. from urllib.parse import urlencode
  8. from datetime import datetime
  9. from searx.exceptions import SearxEngineAPIException
  10. about = {
  11. "website": "https://www.baidu.com",
  12. "wikidata_id": "Q14772",
  13. "official_api_documentation": None,
  14. "use_official_api": False,
  15. "require_api_key": False,
  16. "results": "JSON",
  17. "language": "zh",
  18. }
  19. paging = True
  20. categories = ["general"]
  21. base_url = "https://www.baidu.com/s"
  22. results_per_page = 10
  23. def request(query, params):
  24. keyword = query.strip()
  25. query_params = {
  26. "wd": keyword,
  27. "rn": results_per_page,
  28. "pn": (params["pageno"] - 1) * results_per_page,
  29. "tn": "json",
  30. }
  31. params["url"] = f"{base_url}?{urlencode(query_params)}"
  32. return params
  33. def response(resp):
  34. try:
  35. data = resp.json()
  36. except Exception as e:
  37. raise SearxEngineAPIException(f"Invalid response: {e}") from e
  38. results = []
  39. if "feed" not in data or "entry" not in data["feed"]:
  40. raise SearxEngineAPIException("Invalid response")
  41. for entry in data["feed"]["entry"]:
  42. if not entry.get("title") or not entry.get("url"):
  43. continue
  44. published_date = None
  45. if entry.get("time"):
  46. try:
  47. published_date = datetime.fromtimestamp(entry["time"])
  48. except (ValueError, TypeError):
  49. published_date = None
  50. results.append(
  51. {
  52. "title": entry["title"],
  53. "url": entry["url"],
  54. "content": entry.get("abs", ""),
  55. "publishedDate": published_date,
  56. # "source": entry.get('source')
  57. }
  58. )
  59. return results