| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 | # SPDX-License-Identifier: AGPL-3.0-or-later""" ScanR Structures (Science)"""from json import loads, dumpsfrom searx.utils import html_to_text# aboutabout = {    "website": 'https://scanr.enseignementsup-recherche.gouv.fr',    "wikidata_id": 'Q44105684',    "official_api_documentation": 'https://scanr.enseignementsup-recherche.gouv.fr/opendata',    "use_official_api": True,    "require_api_key": False,    "results": 'JSON',}# engine dependent configcategories = ['science']paging = Truepage_size = 20# search-urlurl = 'https://scanr.enseignementsup-recherche.gouv.fr/'search_url = url + 'api/structures/search'# do search-requestdef request(query, params):    params['url'] = search_url    params['method'] = 'POST'    params['headers']['Content-type'] = "application/json"    params['data'] = dumps(        {            "query": query,            "searchField": "ALL",            "sortDirection": "ASC",            "sortOrder": "RELEVANCY",            "page": params['pageno'],            "pageSize": page_size,        }    )    return params# get response from search-requestdef response(resp):    results = []    search_res = loads(resp.text)    # return empty array if there are no results    if search_res.get('total', 0) < 1:        return []    # parse results    for result in search_res['results']:        if 'id' not in result:            continue        # is it thumbnail or img_src??        thumbnail = None        if 'logo' in result:            thumbnail = result['logo']            if thumbnail[0] == '/':                thumbnail = url + thumbnail        content = None        if 'highlights' in result:            content = result['highlights'][0]['value']        # append result        results.append(            {                'url': url + 'structure/' + result['id'],                'title': result['label'],                'thumbnail': thumbnail,                'content': html_to_text(content),            }        )    # return results    return results
 |