core.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """CORE (science)
  4. """
  5. # pylint: disable=missing-function-docstring
  6. from json import loads
  7. from datetime import datetime
  8. from urllib.parse import urlencode
  9. from searx.exceptions import SearxEngineAPIException
  10. about = {
  11. "website": 'https://core.ac.uk',
  12. "wikidata_id": 'Q22661180',
  13. "official_api_documentation": 'https://core.ac.uk/documentation/api/',
  14. "use_official_api": True,
  15. "require_api_key": True,
  16. "results": 'JSON',
  17. }
  18. categories = ['science']
  19. paging = True
  20. nb_per_page = 10
  21. api_key = 'unset'
  22. base_url = 'https://core.ac.uk:443/api-v2/search/'
  23. search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}'
  24. def request(query, params):
  25. if api_key == 'unset':
  26. raise SearxEngineAPIException('missing CORE API key')
  27. search_path = search_string.format(
  28. query = urlencode({'q': query}),
  29. nb_per_page = nb_per_page,
  30. page = params['pageno'],
  31. apikey = api_key,
  32. )
  33. params['url'] = base_url + search_path
  34. logger.debug("query_url --> %s", params['url'])
  35. return params
  36. def response(resp):
  37. results = []
  38. json_data = loads(resp.text)
  39. for result in json_data['data']:
  40. source = result['_source']
  41. time = source['publishedDate'] or source['depositedDate']
  42. if time :
  43. date = datetime.fromtimestamp(time / 1000)
  44. else:
  45. date = None
  46. metadata = []
  47. if source['publisher'] and len(source['publisher']) > 3:
  48. metadata.append(source['publisher'])
  49. if source['topics']:
  50. metadata.append(source['topics'][0])
  51. if source['doi']:
  52. metadata.append(source['doi'])
  53. metadata = ' / '.join(metadata)
  54. results.append({
  55. 'url': source['urls'][0].replace('http://', 'https://', 1),
  56. 'title': source['title'],
  57. 'content': source['description'],
  58. 'publishedDate': date,
  59. 'metadata' : metadata,
  60. })
  61. return results