123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- # SPDX-License-Identifier: AGPL-3.0-or-later
- """CORE_ (COnnecting REpositories) provides a comprehensive bibliographic
- database of the world’s scholarly literature, collecting and indexing
- research from repositories and journals.
- .. _CORE: https://core.ac.uk/about
- .. _core engine config:
- Configuration
- =============
- The engine has the following additional settings:
- - :py:obj:`api_key`
- .. code:: yaml
- - name: core.ac.uk
- engine: core
- categories: science
- shortcut: cor
- api_key: "..."
- timeout: 5
- Implementations
- ===============
- """
- # pylint: disable=too-many-branches
- from datetime import datetime
- from urllib.parse import urlencode
- from searx.exceptions import SearxEngineAPIException
- about = {
- "website": 'https://core.ac.uk',
- "wikidata_id": 'Q22661180',
- "official_api_documentation": 'https://api.core.ac.uk/docs/v3',
- "use_official_api": True,
- "require_api_key": True,
- "results": 'JSON',
- }
- api_key = 'unset'
- """For an API key register at https://core.ac.uk/services/api and insert
- the API key in the engine :ref:`core engine config`."""
- categories = ['science', 'scientific publications']
- paging = True
- nb_per_page = 10
- base_url = 'https://api.core.ac.uk/v3/search/works/'
- def request(query, params):
- if api_key == 'unset':
- raise SearxEngineAPIException('missing CORE API key')
- # API v3 uses different parameters
- search_params = {
- 'q': query,
- 'offset': (params['pageno'] - 1) * nb_per_page,
- 'limit': nb_per_page,
- 'sort': 'relevance',
- }
- params['url'] = base_url + '?' + urlencode(search_params)
- params['headers'] = {'Authorization': f'Bearer {api_key}'}
- return params
- def response(resp):
- results = []
- json_data = resp.json()
- for result in json_data.get('results', []):
- # Get title
- if not result.get('title'):
- continue
- # Get URL - try different options
- url = None
- # Try DOI first
- doi = result.get('doi')
- if doi:
- url = f'https://doi.org/{doi}'
- if url is None and result.get('doi'):
- # use the DOI reference
- url = 'https://doi.org/' + str(result['doi'])
- elif result.get('id'):
- url = 'https://core.ac.uk/works/' + str(result['id'])
- elif result.get('downloadUrl'):
- url = result['downloadUrl']
- elif result.get('sourceFulltextUrls'):
- url = result['sourceFulltextUrls']
- else:
- continue
- # Published date
- published_date = None
- raw_date = result.get('publishedDate') or result.get('depositedDate')
- if raw_date:
- try:
- published_date = datetime.fromisoformat(result['publishedDate'].replace('Z', '+00:00'))
- except (ValueError, AttributeError):
- pass
- # Handle journals
- journals = []
- if result.get('journals'):
- journals = [j.get('title') for j in result['journals'] if j.get('title')]
- # Handle publisher
- publisher = result.get('publisher', '').strip("'")
- if publisher:
- publisher = publisher.strip("'")
- # Handle authors
- authors = set()
- for i in result.get('authors', []):
- name = i.get("name")
- if name:
- authors.add(name)
- results.append(
- {
- 'template': 'paper.html',
- 'title': result.get('title'),
- 'url': url,
- 'content': result.get('fullText', '') or '',
- # 'comments': '',
- 'tags': result.get('fieldOfStudy', []),
- 'publishedDate': published_date,
- 'type': result.get('documentType', '') or '',
- 'authors': authors,
- 'editor': ', '.join(result.get('contributors', [])),
- 'publisher': publisher,
- 'journal': ', '.join(journals),
- 'doi': result.get('doi'),
- # 'issn' : ''
- # 'isbn' : ''
- 'pdf_url': result.get('downloadUrl', {}) or result.get("sourceFulltextUrls", {}),
- }
- )
- return results
|