| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 | # SPDX-License-Identifier: AGPL-3.0-or-later"""Yahoo (News)Yahoo News is "English only" and do not offer localized nor language queries."""# pylint: disable=invalid-nameimport refrom urllib.parse import urlencodefrom datetime import datetime, timedeltafrom dateutil import parserfrom lxml import htmlfrom searx.utils import (    eval_xpath_list,    eval_xpath_getindex,    extract_text,)from searx.engines.yahoo import parse_url# aboutabout = {    "website": 'https://news.yahoo.com',    "wikidata_id": 'Q3044717',    "official_api_documentation": 'https://developer.yahoo.com/api/',    "use_official_api": False,    "require_api_key": False,    "results": 'HTML',}language_support = Falsetime_range_support = Falsesafesearch = Falsepaging = Truecategories = ['news']# search-urlsearch_url = (    # fmt: off    'https://news.search.yahoo.com/search'    '?{query}&b={offset}'    # fmt: on)AGO_RE = re.compile(r'([0-9]+)\s*(year|month|week|day|minute|hour)')AGO_TIMEDELTA = {    'minute': timedelta(minutes=1),    'hour': timedelta(hours=1),    'day': timedelta(days=1),    'week': timedelta(days=7),    'month': timedelta(days=30),    'year': timedelta(days=365),}def request(query, params):    offset = (params['pageno'] - 1) * 10 + 1    params['url'] = search_url.format(offset=offset, query=urlencode({'p': query}))    logger.debug("query_url --> %s", params['url'])    return paramsdef response(resp):    results = []    dom = html.fromstring(resp.text)    # parse results    for result in eval_xpath_list(dom, '//ol[contains(@class,"searchCenterMiddle")]//li'):        url = eval_xpath_getindex(result, './/h4/a/@href', 0, None)        if url is None:            continue        url = parse_url(url)        title = extract_text(result.xpath('.//h4/a'))        content = extract_text(result.xpath('.//p'))        thumbnail = eval_xpath_getindex(result, './/img/@data-src', 0, None)        item = {'url': url, 'title': title, 'content': content, 'thumbnail': thumbnail}        pub_date = extract_text(result.xpath('.//span[contains(@class,"s-time")]'))        ago = AGO_RE.search(pub_date)        if ago:            number = int(ago.group(1))            delta = AGO_TIMEDELTA[ago.group(2)]            pub_date = datetime.now() - delta * number        else:            try:                pub_date = parser.parse(pub_date)            except parser.ParserError:                pub_date = None        if pub_date is not None:            item['publishedDate'] = pub_date        results.append(item)        for suggestion in eval_xpath_list(dom, '//div[contains(@class,"AlsoTry")]//td'):            results.append({'suggestion': extract_text(suggestion)})    return results
 |