semantic_scholar.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Semantic Scholar (Science)
  4. """
  5. from json import dumps, loads
  6. from datetime import datetime
  7. about = {
  8. "website": 'https://www.semanticscholar.org/',
  9. "wikidata_id": 'Q22908627',
  10. "official_api_documentation": 'https://api.semanticscholar.org/',
  11. "use_official_api": True,
  12. "require_api_key": False,
  13. "results": 'JSON',
  14. }
  15. paging = True
  16. search_url = 'https://www.semanticscholar.org/api/1/search'
  17. paper_url = 'https://www.semanticscholar.org/paper'
  18. def request(query, params):
  19. params['url'] = search_url
  20. params['method'] = 'POST'
  21. params['headers']['content-type'] = 'application/json'
  22. params['data'] = dumps(
  23. {
  24. "queryString": query,
  25. "page": params['pageno'],
  26. "pageSize": 10,
  27. "sort": "relevance",
  28. "useFallbackRankerService": False,
  29. "useFallbackSearchCluster": False,
  30. "getQuerySuggestions": False,
  31. "authors": [],
  32. "coAuthors": [],
  33. "venues": [],
  34. "performTitleMatch": True,
  35. }
  36. )
  37. return params
  38. def response(resp):
  39. res = loads(resp.text)
  40. results = []
  41. for result in res['results']:
  42. item = {}
  43. metadata = []
  44. url = result.get('primaryPaperLink', {}).get('url')
  45. if not url and result.get('links'):
  46. url = result.get('links')[0]
  47. if not url:
  48. alternatePaperLinks = result.get('alternatePaperLinks')
  49. if alternatePaperLinks:
  50. url = alternatePaperLinks[0].get('url')
  51. if not url:
  52. url = paper_url + '/%s' % result['id']
  53. item['url'] = url
  54. item['title'] = result['title']['text']
  55. item['content'] = result['paperAbstract']['text']
  56. metadata = result.get('fieldsOfStudy') or []
  57. venue = result.get('venue', {}).get('text')
  58. if venue:
  59. metadata.append(venue)
  60. if metadata:
  61. item['metadata'] = ', '.join(metadata)
  62. pubDate = result.get('pubDate')
  63. if pubDate:
  64. item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
  65. results.append(item)
  66. return results