| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 | # SPDX-License-Identifier: AGPL-3.0-or-later# lint: pylint"""CORE (science)"""# pylint: disable=missing-function-docstringfrom json import loadsfrom datetime import datetimefrom urllib.parse import urlencodefrom searx import loggerfrom searx.exceptions import SearxEngineAPIExceptionlogger = logger.getChild('CORE engine')about = {    "website": 'https://core.ac.uk',    "wikidata_id": 'Q22661180',    "official_api_documentation": 'https://core.ac.uk/documentation/api/',    "use_official_api": True,    "require_api_key": True,    "results": 'JSON',}categories = ['science']paging = Truenb_per_page = 10api_key = 'unset'logger = logger.getChild('CORE engine')base_url = 'https://core.ac.uk:443/api-v2/search/'search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'def request(query, params):    if api_key == 'unset':        raise SearxEngineAPIException('missing CORE API key')    search_path = search_string.format(        query = urlencode({'q': query}),        nb_per_page = nb_per_page,        page = params['pageno'],        apikey = api_key,    )    params['url'] = base_url + search_path    logger.debug("query_url --> %s", params['url'])    return paramsdef response(resp):    results = []    json_data = loads(resp.text)    for result in json_data['data']:        source = result['_source']        time = source['publishedDate'] or source['depositedDate']        if time :            date = datetime.fromtimestamp(time / 1000)        else:            date = None        metadata = []        if source['publisher'] and len(source['publisher']) > 3:            metadata.append(source['publisher'])        if source['topics']:            metadata.append(source['topics'][0])        if source['doi']:            metadata.append(source['doi'])        metadata = ' / '.join(metadata)        results.append({            'url': source['urls'][0].replace('http://', 'https://', 1),            'title': source['title'],            'content': source['description'],            'publishedDate': date,            'metadata' : metadata,        })    return results
 |