| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 | # SPDX-License-Identifier: AGPL-3.0-or-later"""Internet Archive scholar(science)"""from datetime import datetimefrom urllib.parse import urlencodefrom searx.utils import html_to_textabout = {    "website": "https://scholar.archive.org/",    "wikidata_id": "Q115667709",    "official_api_documentation": "https://scholar.archive.org/api/redoc",    "use_official_api": True,    "require_api_key": False,    "results": "JSON",}categories = ['science', 'scientific publications']paging = Truebase_url = "https://scholar.archive.org"results_per_page = 15def request(query, params):    args = {        "q": query,        "limit": results_per_page,        "offset": (params["pageno"] - 1) * results_per_page,    }    params["url"] = f"{base_url}/search?{urlencode(args)}"    params["headers"]["Accept"] = "application/json"    return paramsdef response(resp):    results = []    json = resp.json()    for result in json["results"]:        publishedDate, content, doi = None, '', None        if result['biblio'].get('release_date'):            publishedDate = datetime.strptime(result['biblio']['release_date'], "%Y-%m-%d")        if len(result['abstracts']) > 0:            content = result['abstracts'][0].get('body')        elif len(result['_highlights']) > 0:            content = result['_highlights'][0]        if len(result['releases']) > 0:            doi = result['releases'][0].get('doi')        results.append(            {                'template': 'paper.html',                'url': result['fulltext']['access_url'],                'title': result['biblio'].get('title') or result['biblio'].get('container_name'),                'content': html_to_text(content),                'publisher': result['biblio'].get('publisher'),                'doi': doi,                'journal': result['biblio'].get('container_name'),                'authors': result['biblio'].get('contrib_names'),                'tags': result['tags'],                'publishedDate': publishedDate,                'issns': result['biblio'].get('issns'),                'pdf_url': result['fulltext'].get('access_url'),            }        )    return results
 |