core.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """CORE (science)
  4. """
  5. from datetime import datetime
  6. from urllib.parse import urlencode
  7. from searx.exceptions import SearxEngineAPIException
  8. about = {
  9. "website": 'https://core.ac.uk',
  10. "wikidata_id": 'Q22661180',
  11. "official_api_documentation": 'https://core.ac.uk/documentation/api/',
  12. "use_official_api": True,
  13. "require_api_key": True,
  14. "results": 'JSON',
  15. }
  16. categories = ['science']
  17. paging = True
  18. nb_per_page = 10
  19. api_key = 'unset'
  20. base_url = 'https://core.ac.uk:443/api-v2/search/'
  21. search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
  22. def request(query, params):
  23. if api_key == 'unset':
  24. raise SearxEngineAPIException('missing CORE API key')
  25. search_path = search_string.format(
  26. query=urlencode({'q': query}),
  27. nb_per_page=nb_per_page,
  28. page=params['pageno'],
  29. apikey=api_key,
  30. )
  31. params['url'] = base_url + search_path
  32. logger.debug("query_url --> %s", params['url'])
  33. return params
  34. def response(resp):
  35. results = []
  36. json_data = resp.json()
  37. for result in json_data['data']:
  38. source = result['_source']
  39. if not source['urls']:
  40. continue
  41. time = source['publishedDate'] or source['depositedDate']
  42. if time:
  43. publishedDate = datetime.fromtimestamp(time / 1000)
  44. journals = []
  45. if source['journals']:
  46. for j in source['journals']:
  47. journals.append(j['title'])
  48. publisher = source['publisher']
  49. if publisher:
  50. publisher = source['publisher'].strip("'")
  51. results.append(
  52. {
  53. 'template': 'paper.html',
  54. 'title': source['title'],
  55. 'url': source['urls'][0].replace('http://', 'https://', 1),
  56. 'content': source['description'],
  57. # 'comments': '',
  58. 'tags': source['topics'],
  59. 'publishedDate': publishedDate,
  60. 'type': (source['types'] or [None])[0],
  61. 'authors': source['authors'],
  62. 'editor': ', '.join(source['contributors'] or []),
  63. 'publisher': publisher,
  64. 'journal': ', '.join(journals),
  65. # 'volume': '',
  66. # 'pages' : '',
  67. # 'number': '',
  68. 'doi': source['doi'],
  69. 'issn': source['issn'],
  70. 'isbn': source.get('isbn'), # exists in the rawRecordXml
  71. 'pdf_url': source.get('repositoryDocument', {}).get('pdfOrigin'),
  72. }
  73. )
  74. return results