| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 | 
							- # SPDX-License-Identifier: AGPL-3.0-or-later
 
- """CORE_ (COnnecting REpositories) provides a comprehensive bibliographic
 
- database of the world’s scholarly literature, collecting and indexing
 
- research from repositories and journals.
 
- .. _CORE: https://core.ac.uk/about
 
- .. _core engine config:
 
- Configuration
 
- =============
 
- The engine has the following additional settings:
 
- - :py:obj:`api_key`
 
- .. code:: yaml
 
-   - name: core.ac.uk
 
-     engine: core
 
-     categories: science
 
-     shortcut: cor
 
-     api_key: "..."
 
-     timeout: 5
 
- Implementations
 
- ===============
 
- """
 
- # pylint: disable=too-many-branches
 
- from datetime import datetime
 
- from urllib.parse import urlencode
 
- from searx.exceptions import SearxEngineAPIException
 
- about = {
 
-     "website": 'https://core.ac.uk',
 
-     "wikidata_id": 'Q22661180',
 
-     "official_api_documentation": 'https://api.core.ac.uk/docs/v3',
 
-     "use_official_api": True,
 
-     "require_api_key": True,
 
-     "results": 'JSON',
 
- }
 
- api_key = 'unset'
 
- """For an API key register at https://core.ac.uk/services/api and insert
 
- the API key in the engine :ref:`core engine config`."""
 
- categories = ['science', 'scientific publications']
 
- paging = True
 
- nb_per_page = 10
 
- base_url = 'https://api.core.ac.uk/v3/search/works/'
 
- def request(query, params):
 
-     if api_key == 'unset':
 
-         raise SearxEngineAPIException('missing CORE API key')
 
-     # API v3 uses different parameters
 
-     search_params = {
 
-         'q': query,
 
-         'offset': (params['pageno'] - 1) * nb_per_page,
 
-         'limit': nb_per_page,
 
-         'sort': 'relevance',
 
-     }
 
-     params['url'] = base_url + '?' + urlencode(search_params)
 
-     params['headers'] = {'Authorization': f'Bearer {api_key}'}
 
-     return params
 
- def response(resp):
 
-     results = []
 
-     json_data = resp.json()
 
-     for result in json_data.get('results', []):
 
-         # Get title
 
-         if not result.get('title'):
 
-             continue
 
-         # Get URL - try different options
 
-         url = None
 
-         # Try DOI first
 
-         doi = result.get('doi')
 
-         if doi:
 
-             url = f'https://doi.org/{doi}'
 
-         if url is None and result.get('doi'):
 
-             # use the DOI reference
 
-             url = 'https://doi.org/' + str(result['doi'])
 
-         elif result.get('id'):
 
-             url = 'https://core.ac.uk/works/' + str(result['id'])
 
-         elif result.get('downloadUrl'):
 
-             url = result['downloadUrl']
 
-         elif result.get('sourceFulltextUrls'):
 
-             url = result['sourceFulltextUrls']
 
-         else:
 
-             continue
 
-         # Published date
 
-         published_date = None
 
-         raw_date = result.get('publishedDate') or result.get('depositedDate')
 
-         if raw_date:
 
-             try:
 
-                 published_date = datetime.fromisoformat(result['publishedDate'].replace('Z', '+00:00'))
 
-             except (ValueError, AttributeError):
 
-                 pass
 
-         # Handle journals
 
-         journals = []
 
-         if result.get('journals'):
 
-             journals = [j.get('title') for j in result['journals'] if j.get('title')]
 
-         # Handle publisher
 
-         publisher = result.get('publisher', '').strip("'")
 
-         if publisher:
 
-             publisher = publisher.strip("'")
 
-         # Handle authors
 
-         authors = set()
 
-         for i in result.get('authors', []):
 
-             name = i.get("name")
 
-             if name:
 
-                 authors.add(name)
 
-         results.append(
 
-             {
 
-                 'template': 'paper.html',
 
-                 'title': result.get('title'),
 
-                 'url': url,
 
-                 'content': result.get('fullText', '') or '',
 
-                 # 'comments': '',
 
-                 'tags': result.get('fieldOfStudy', []),
 
-                 'publishedDate': published_date,
 
-                 'type': result.get('documentType', '') or '',
 
-                 'authors': authors,
 
-                 'editor': ', '.join(result.get('contributors', [])),
 
-                 'publisher': publisher,
 
-                 'journal': ', '.join(journals),
 
-                 'doi': result.get('doi'),
 
-                 # 'issn' : ''
 
-                 # 'isbn' : ''
 
-                 'pdf_url': result.get('downloadUrl', {}) or result.get("sourceFulltextUrls", {}),
 
-             }
 
-         )
 
-     return results
 
 
  |