ansa.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Engine for Ansa, Italy's oldest news agency.
  3. To use this engine add the following entry to your engines
  4. list in ``settings.yml``:
  5. .. code:: yaml
  6. - name: ansa
  7. engine: ansa
  8. shortcut: ans
  9. disabled: false
  10. """
  11. from urllib.parse import urlencode
  12. from lxml import html
  13. from searx.result_types import EngineResults, MainResult
  14. from searx.utils import eval_xpath, eval_xpath_list, extract_text
  15. engine_type = 'online'
  16. language_support = False
  17. categories = ['news']
  18. paging = True
  19. page_size = 12
  20. base_url = 'https://www.ansa.it'
  21. time_range_support = True
  22. time_range_args = {
  23. 'day': 1,
  24. 'week': 7,
  25. 'month': 31,
  26. 'year': 365,
  27. }
  28. # https://www.ansa.it/ricerca/ansait/search.shtml?start=0&any=houthi&periodo=&sort=data%3Adesc
  29. search_api = 'https://www.ansa.it/ricerca/ansait/search.shtml?'
  30. about = {
  31. 'website': 'https://www.ansa.it',
  32. 'wikidata_id': 'Q392934',
  33. 'official_api_documentation': None,
  34. 'use_official_api': False,
  35. 'require_api_key': False,
  36. 'results': 'HTML',
  37. 'language': 'it',
  38. }
  39. def request(query, params):
  40. query_params = {
  41. 'any': query,
  42. 'start': (params['pageno'] - 1) * page_size,
  43. 'sort': "data:desc",
  44. }
  45. if params['time_range']:
  46. query_params['periodo'] = time_range_args.get(params['time_range'])
  47. params['url'] = search_api + urlencode(query_params)
  48. return params
  49. def response(resp) -> EngineResults:
  50. res = EngineResults()
  51. doc = html.fromstring(resp.text)
  52. for result in eval_xpath_list(doc, "//div[@class='article']"):
  53. res_obj = MainResult(
  54. title=extract_text(eval_xpath(result, "./div[@class='content']/h2[@class='title']/a")),
  55. content=extract_text(eval_xpath(result, "./div[@class='content']/div[@class='text']")),
  56. url=base_url + extract_text(eval_xpath(result, "./div[@class='content']/h2[@class='title']/a/@href")),
  57. )
  58. thumbnail = extract_text(eval_xpath(result, "./div[@class='image']/a/img/@src"))
  59. if thumbnail:
  60. res_obj.thumbnail = base_url + thumbnail
  61. res.append(res_obj)
  62. return res