12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """Baidu_
- .. _Baidu: https://www.baidu.com
- """
- # There exits a https://github.com/ohblue/baidu-serp-api/
- # but we don't use it here (may we can learn from).
- from urllib.parse import urlencode
- from datetime import datetime
- from searx.exceptions import SearxEngineAPIException
- about = {
- "website": "https://www.baidu.com",
- "wikidata_id": "Q14772",
- "official_api_documentation": None,
- "use_official_api": False,
- "require_api_key": False,
- "results": "JSON",
- "language": "zh",
- }
- paging = True
- categories = ["general"]
- base_url = "https://www.baidu.com/s"
- results_per_page = 10
- def request(query, params):
- keyword = query.strip()
- query_params = {
- "wd": keyword,
- "rn": results_per_page,
- "pn": (params["pageno"] - 1) * results_per_page,
- "tn": "json",
- }
- params["url"] = f"{base_url}?{urlencode(query_params)}"
- return params
- def response(resp):
- try:
- data = resp.json()
- except Exception as e:
- raise SearxEngineAPIException(f"Invalid response: {e}") from e
- results = []
- if "feed" not in data or "entry" not in data["feed"]:
- raise SearxEngineAPIException("Invalid response")
- for entry in data["feed"]["entry"]:
- if not entry.get("title") or not entry.get("url"):
- continue
- published_date = None
- if entry.get("time"):
- try:
- published_date = datetime.fromtimestamp(entry["time"])
- except (ValueError, TypeError):
- published_date = None
- results.append(
- {
- "title": entry["title"],
- "url": entry["url"],
- "content": entry.get("abs", ""),
- "publishedDate": published_date,
- # "source": entry.get('source')
- }
- )
- return results
|