reuters.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Reuters_ (news) is an international news agency.
  3. .. _Reuters: https://www.reuters.com
  4. Configuration
  5. =============
  6. The engine has the following additional settings:
  7. - :py:obj:`sort_order`
  8. .. code:: yaml
  9. - name: reuters
  10. engine: reuters
  11. shortcut: reu
  12. sort_order: "relevance"
  13. Implementations
  14. ===============
  15. """
  16. from json import dumps
  17. from urllib.parse import quote_plus
  18. from datetime import datetime, timedelta
  19. from searx.result_types import EngineResults
  20. about = {
  21. "website": "https://www.reuters.com",
  22. "wikidata_id": "Q130879",
  23. "official_api_documentation": None,
  24. "use_official_api": False,
  25. "require_api_key": False,
  26. "results": "JSON",
  27. }
  28. categories = ["news"]
  29. time_range_support = True
  30. paging = True
  31. base_url = "https://www.reuters.com"
  32. results_per_page = 20
  33. sort_order = "relevance"
  34. """Sort order, one of ``relevance``, ``display_date:desc`` or ``display_data:asc``."""
  35. time_range_duration_map = {
  36. "day": 1,
  37. "week": 7,
  38. "month": 30,
  39. "year": 365,
  40. }
  41. def request(query, params):
  42. args = {
  43. "keyword": query,
  44. "offset": (params["pageno"] - 1) * results_per_page,
  45. "orderby": sort_order,
  46. "size": results_per_page,
  47. "website": "reuters",
  48. }
  49. if params["time_range"]:
  50. time_diff_days = time_range_duration_map[params["time_range"]]
  51. start_date = datetime.now() - timedelta(days=time_diff_days)
  52. args["start_date"] = start_date.isoformat()
  53. params["url"] = f"{base_url}/pf/api/v3/content/fetch/articles-by-search-v2?query={quote_plus(dumps(args))}"
  54. return params
  55. def response(resp) -> EngineResults:
  56. res = EngineResults()
  57. for result in resp.json().get("result", {}).get("articles", []):
  58. res.add(
  59. res.types.MainResult(
  60. url=base_url + result["canonical_url"],
  61. title=result["web"],
  62. content=result["description"],
  63. thumbnail=result.get("thumbnail", {}).get("url", ""),
  64. metadata=result.get("kicker", {}).get("name"),
  65. publishedDate=datetime.strptime(result["display_time"], "%Y-%m-%dT%H:%M:%SZ"),
  66. )
  67. )
  68. return res