| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 | # SPDX-License-Identifier: AGPL-3.0-or-later""" Microsoft Academic (Science)"""from datetime import datetimefrom json import loadsfrom uuid import uuid4from urllib.parse import urlencodefrom searx.utils import html_to_text# aboutabout = {    "website": 'https://academic.microsoft.com',    "wikidata_id": 'Q28136779',    "official_api_documentation": 'http://ma-graph.org/',    "use_official_api": False,    "require_api_key": False,    "results": 'JSON',}categories = ['images']paging = Trueresult_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'def request(query, params):    correlation_id = uuid4()    msacademic = uuid4()    time_now = datetime.now()    params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id}))    params['cookies']['msacademic'] = str(msacademic)    params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now))    params['method'] = 'POST'    params['data'] = {        'Query': '@{query}@'.format(query=query),        'Limit': 10,        'Offset': params['pageno'] - 1,        'Filters': '',        'OrderBy': '',        'SortAscending': False,    }    return paramsdef response(resp):    results = []    response_data = loads(resp.text)    if not response_data:        return results    for result in response_data['results']:        url = _get_url(result)        title = result['e']['dn']        content = _get_content(result)        results.append({            'url': url,            'title': html_to_text(title),            'content': html_to_text(content),        })    return resultsdef _get_url(result):    if 's' in result['e']:        return result['e']['s'][0]['u']    return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id'])def _get_content(result):    if 'd' in result['e']:        content = result['e']['d']        if len(content) > 300:            return content[:300] + '...'        return content    return ''
 |