| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 | # SPDX-License-Identifier: AGPL-3.0-or-later# lint: pylint"""Semantic Scholar (Science)"""from json import dumps, loadsfrom datetime import datetimefrom flask_babel import gettextabout = {    "website": 'https://www.semanticscholar.org/',    "wikidata_id": 'Q22908627',    "official_api_documentation": 'https://api.semanticscholar.org/',    "use_official_api": True,    "require_api_key": False,    "results": 'JSON',}categories = ['science', 'scientific publications']paging = Truesearch_url = 'https://www.semanticscholar.org/api/1/search'paper_url = 'https://www.semanticscholar.org/paper'def request(query, params):    params['url'] = search_url    params['method'] = 'POST'    params['headers']['content-type'] = 'application/json'    params['data'] = dumps(        {            "queryString": query,            "page": params['pageno'],            "pageSize": 10,            "sort": "relevance",            "useFallbackRankerService": False,            "useFallbackSearchCluster": False,            "getQuerySuggestions": False,            "authors": [],            "coAuthors": [],            "venues": [],            "performTitleMatch": True,        }    )    return paramsdef response(resp):    res = loads(resp.text)    results = []    for result in res['results']:        url = result.get('primaryPaperLink', {}).get('url')        if not url and result.get('links'):            url = result.get('links')[0]        if not url:            alternatePaperLinks = result.get('alternatePaperLinks')            if alternatePaperLinks:                url = alternatePaperLinks[0].get('url')        if not url:            url = paper_url + '/%s' % result['id']        # publishedDate        if 'pubDate' in result:            publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")        else:            publishedDate = None        # authors        authors = [author[0]['name'] for author in result.get('authors', [])]        # pick for the first alternate link, but not from the crawler        pdf_url = None        for doc in result.get('alternatePaperLinks', []):            if doc['linkType'] not in ('crawler', 'doi'):                pdf_url = doc['url']                break        # comments        comments = None        if 'citationStats' in result:            comments = gettext(                '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'            ).format(                numCitations=result['citationStats']['numCitations'],                firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],                lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],            )        results.append(            {                'template': 'paper.html',                'url': url,                'title': result['title']['text'],                'content': result['paperAbstract']['text'],                'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),                'doi': result.get('doiInfo', {}).get('doi'),                'tags': result.get('fieldsOfStudy'),                'authors': authors,                'pdf_url': pdf_url,                'publishedDate': publishedDate,                'comments': comments,            }        )    return results
 |