2 years ago · fc389f009d
--- a/docs/dev/engine_overview.rst
+++ b/docs/dev/engine_overview.rst
@@ -311,3 +311,88 @@ the parameter ``template`` must be set to the desired type.
 
				    address.postcode          postcode of object
			
 
				    address.country           country of object
			
 
				    ========================= =====================================================
			
 
				+
			
 
				+.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
			
 
				+.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
			
 
				+
			
 
				+.. list-table:: Parameter of the **paper** media type /
			
 
				+                see `BibTeX field types`_ and `BibTeX format`_
			
 
				+   :header-rows: 2
			
 
				+   :width: 100%
			
 
				+
			
 
				+   * - result-parameter
			
 
				+     - Python type
			
 
				+     - information
			
 
				+
			
 
				+   * - template
			
 
				+     - :py:class:`str`
			
 
				+     - is set to ``paper.html``
			
 
				+
			
 
				+   * - title
			
 
				+     - :py:class:`str`
			
 
				+     - title of the result
			
 
				+
			
 
				+   * - content
			
 
				+     - :py:class:`str`
			
 
				+     - abstract
			
 
				+
			
 
				+   * - comments
			
 
				+     - :py:class:`str`
			
 
				+     - free text display in italic below the content
			
 
				+
			
 
				+   * - tags
			
 
				+     - :py:class:`List <list>`\ [\ :py:class:`str`\ ]
			
 
				+     - free tag list
			
 
				+
			
 
				+   * - publishedDate
			
 
				+     - :py:class:`datetime <datetime.datetime>`
			
 
				+     - last publication date
			
 
				+
			
 
				+   * - authors
			
 
				+     - :py:class:`List <list>`\ [\ :py:class:`str`\ ]
			
 
				+     - list of authors of the work (authors with a "s")
			
 
				+
			
 
				+   * - editor
			
 
				+     - :py:class:`str`
			
 
				+     - list of editors of a book
			
 
				+
			
 
				+   * - publisher
			
 
				+     - :py:class:`str`
			
 
				+     - name of the publisher
			
 
				+
			
 
				+   * - journal
			
 
				+     - :py:class:`str`
			
 
				+     - name of the journal or magazine the article was
			
 
				+       published in
			
 
				+
			
 
				+   * - volume
			
 
				+     - :py:class:`str`
			
 
				+     - volume number
			
 
				+
			
 
				+   * - pages
			
 
				+     - :py:class:`str`
			
 
				+     - page range where the article is
			
 
				+
			
 
				+   * - number
			
 
				+     - :py:class:`str`
			
 
				+     - number of the report or the issue number for a journal article
			
 
				+
			
 
				+   * - doi
			
 
				+     - :py:class:`str`
			
 
				+     - DOI number (like ``10.1038/d41586-018-07848-2``)
			
 
				+
			
 
				+   * - issn
			
 
				+     - :py:class:`str`
			
 
				+     - ISSN number like ``1476-4687``
			
 
				+
			
 
				+   * - isbn
			
 
				+     - :py:class:`str`
			
 
				+     - ISBN number like ``9780201896831``
			
 
				+
			
 
				+   * - pdf_url
			
 
				+     - :py:class:`str`
			
 
				+     - URL to the full article, the PDF version
			
 
				+
			
 
				+   * - html_url
			
 
				+     - :py:class:`str`
			
 
				+     - URL to full article, HTML version
			
--- a/searx/engines/arxiv.py
+++ b/searx/engines/arxiv.py
@@ -3,9 +3,10 @@
 
				  ArXiV (Scientific preprints)
			
 
				 """
			
 
				 
			
 
				-from lxml import html
			
 
				+from lxml import etree
			
 
				+from lxml.etree import XPath
			
 
				 from datetime import datetime
			
 
				-from searx.utils import eval_xpath_list, eval_xpath_getindex
			
 
				+from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
			
 
				 
			
 
				 # about
			
 
				 about = {
			
@@ -17,7 +18,7 @@ about = {
 
				     "results": 'XML-RSS',
			
 
				 }
			
 
				 
			
 
				-categories = ['science']
			
 
				+categories = ['science', 'scientific publications']
			
 
				 paging = True
			
 
				 
			
 
				 base_url = (
			
@@ -27,6 +28,23 @@ base_url = (
 
				 # engine dependent config
			
 
				 number_of_results = 10
			
 
				 
			
 
				+# xpaths
			
 
				+arxiv_namespaces = {
			
 
				+    "atom": "http://www.w3.org/2005/Atom",
			
 
				+    "arxiv": "http://arxiv.org/schemas/atom",
			
 
				+}
			
 
				+xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
			
 
				+xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
			
 
				+xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
			
 
				+xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
			
 
				+xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
			
 
				+xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
			
 
				+xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
			
 
				+xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
			
 
				+xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
			
 
				+xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
			
 
				+xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
			
 
				+
			
 
				 
			
 
				 def request(query, params):
			
 
				     # basic search
			
@@ -41,30 +59,50 @@ def request(query, params):
 
				 
			
 
				 def response(resp):
			
 
				     results = []
			
 
				-
			
 
				-    dom = html.fromstring(resp.content)
			
 
				-
			
 
				-    for entry in eval_xpath_list(dom, '//entry'):
			
 
				-        title = eval_xpath_getindex(entry, './/title', 0).text
			
 
				-
			
 
				-        url = eval_xpath_getindex(entry, './/id', 0).text
			
 
				-
			
 
				-        content_string = '{doi_content}{abstract_content}'
			
 
				-
			
 
				-        abstract = eval_xpath_getindex(entry, './/summary', 0).text
			
 
				-
			
 
				-        #  If a doi is available, add it to the snipppet
			
 
				-        doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None)
			
 
				-        doi_content = doi_element.text if doi_element is not None else ''
			
 
				-        content = content_string.format(doi_content=doi_content, abstract_content=abstract)
			
 
				-
			
 
				-        if len(content) > 300:
			
 
				-            content = content[0:300] + "..."
			
 
				-        # TODO: center snippet on query term
			
 
				-
			
 
				-        publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ')
			
 
				-
			
 
				-        res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
			
 
				+    dom = etree.fromstring(resp.content)
			
 
				+    for entry in eval_xpath_list(dom, xpath_entry):
			
 
				+        title = eval_xpath_getindex(entry, xpath_title, 0).text
			
 
				+
			
 
				+        url = eval_xpath_getindex(entry, xpath_id, 0).text
			
 
				+        abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
			
 
				+
			
 
				+        authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
			
 
				+
			
 
				+        #  doi
			
 
				+        doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
			
 
				+        doi = None if doi_element is None else doi_element.text
			
 
				+
			
 
				+        # pdf
			
 
				+        pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
			
 
				+        pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
			
 
				+
			
 
				+        # journal
			
 
				+        journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
			
 
				+        journal = None if journal_element is None else journal_element.text
			
 
				+
			
 
				+        # tags
			
 
				+        tag_elements = eval_xpath(entry, xpath_category)
			
 
				+        tags = [str(tag) for tag in tag_elements]
			
 
				+
			
 
				+        # comments
			
 
				+        comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
			
 
				+        comments = None if comments_elements is None else comments_elements.text
			
 
				+
			
 
				+        publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
			
 
				+
			
 
				+        res_dict = {
			
 
				+            'template': 'paper.html',
			
 
				+            'url': url,
			
 
				+            'title': title,
			
 
				+            'publishedDate': publishedDate,
			
 
				+            'content': abstract,
			
 
				+            'doi': doi,
			
 
				+            'authors': authors,
			
 
				+            'journal': journal,
			
 
				+            'tags': tags,
			
 
				+            'comments': comments,
			
 
				+            'pdf_url': pdf_url,
			
 
				+        }
			
 
				 
			
 
				         results.append(res_dict)
			
 
				 
			
--- a/searx/engines/crossref.py
+++ b/searx/engines/crossref.py
@@ -0,0 +1,59 @@
 
				+# SPDX-License-Identifier: AGPL-3.0-or-later
			
 
				+# lint: pylint
			
 
				+"""Semantic Scholar (Science)
			
 
				+"""
			
 
				+
			
 
				+from urllib.parse import urlencode
			
 
				+from searx.utils import html_to_text
			
 
				+
			
 
				+about = {
			
 
				+    "website": 'https://www.crossref.org/',
			
 
				+    "wikidata_id": 'Q5188229',
			
 
				+    "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
			
 
				+    "use_official_api": False,
			
 
				+    "require_api_key": False,
			
 
				+    "results": 'JSON',
			
 
				+}
			
 
				+
			
 
				+categories = ['science', 'scientific publications']
			
 
				+paging = True
			
 
				+search_url = 'https://api.crossref.org/works'
			
 
				+
			
 
				+
			
 
				+def request(query, params):
			
 
				+    params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
			
 
				+    return params
			
 
				+
			
 
				+
			
 
				+def response(resp):
			
 
				+    res = resp.json()
			
 
				+    results = []
			
 
				+    for record in res['message']['items']:
			
 
				+        record_type = record['type']
			
 
				+        if record_type == 'book-chapter':
			
 
				+            title = record['container-title'][0]
			
 
				+            if record['title'][0].lower().strip() != title.lower().strip():
			
 
				+                title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
			
 
				+            journal = None
			
 
				+        else:
			
 
				+            title = html_to_text(record['title'][0])
			
 
				+            journal = record.get('container-title', [None])[0]
			
 
				+        url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
			
 
				+        authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
			
 
				+        isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
			
 
				+        results.append(
			
 
				+            {
			
 
				+                'template': 'paper.html',
			
 
				+                'url': url,
			
 
				+                'title': title,
			
 
				+                'journal': journal,
			
 
				+                'volume': record.get('volume'),
			
 
				+                'type': record['type'],
			
 
				+                'content': html_to_text(record.get('abstract', '')),
			
 
				+                'publisher': record.get('publisher'),
			
 
				+                'authors': authors,
			
 
				+                'doi': record['DOI'],
			
 
				+                'isbn': isbn,
			
 
				+            }
			
 
				+        )
			
 
				+    return results
			
--- a/searx/engines/google_scholar.py
+++ b/searx/engines/google_scholar.py
@@ -13,10 +13,12 @@ Definitions`_.
 
				 
			
 
				 from urllib.parse import urlencode
			
 
				 from datetime import datetime
			
 
				+from typing import Optional
			
 
				 from lxml import html
			
 
				 
			
 
				 from searx.utils import (
			
 
				     eval_xpath,
			
 
				+    eval_xpath_getindex,
			
 
				     eval_xpath_list,
			
 
				     extract_text,
			
 
				 )
			
@@ -46,7 +48,7 @@ about = {
 
				 }
			
 
				 
			
 
				 # engine dependent config
			
 
				-categories = ['science']
			
 
				+categories = ['science', 'scientific publications']
			
 
				 paging = True
			
 
				 language_support = True
			
 
				 use_locale_domain = True
			
@@ -99,7 +101,43 @@ def request(query, params):
 
				     return params
			
 
				 
			
 
				 
			
 
				-def response(resp):
			
 
				+def parse_gs_a(text: Optional[str]):
			
 
				+    """Parse the text written in green.
			
 
				+
			
 
				+    Possible formats:
			
 
				+    * "{authors} - {journal}, {year} - {publisher}"
			
 
				+    * "{authors} - {year} - {publisher}"
			
 
				+    * "{authors} - {publisher}"
			
 
				+    """
			
 
				+    if text is None or text == "":
			
 
				+        return None, None, None, None
			
 
				+
			
 
				+    s_text = text.split(' - ')
			
 
				+    authors = s_text[0].split(', ')
			
 
				+    publisher = s_text[-1]
			
 
				+    if len(s_text) != 3:
			
 
				+        return authors, None, publisher, None
			
 
				+
			
 
				+    # the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"
			
 
				+    # get journal and year
			
 
				+    journal_year = s_text[1].split(', ')
			
 
				+    # journal is optional and may contains some coma
			
 
				+    if len(journal_year) > 1:
			
 
				+        journal = ', '.join(journal_year[0:-1])
			
 
				+        if journal == '…':
			
 
				+            journal = None
			
 
				+    else:
			
 
				+        journal = None
			
 
				+    # year
			
 
				+    year = journal_year[-1]
			
 
				+    try:
			
 
				+        publishedDate = datetime.strptime(year.strip(), '%Y')
			
 
				+    except ValueError:
			
 
				+        publishedDate = None
			
 
				+    return authors, journal, publisher, publishedDate
			
 
				+
			
 
				+
			
 
				+def response(resp):  # pylint: disable=too-many-locals
			
 
				     """Get response from google's search request"""
			
 
				     results = []
			
 
				 
			
@@ -112,30 +150,53 @@ def response(resp):
 
				     dom = html.fromstring(resp.text)
			
 
				 
			
 
				     # parse results
			
 
				-    for result in eval_xpath_list(dom, '//div[@class="gs_ri"]'):
			
 
				+    for result in eval_xpath_list(dom, '//div[@data-cid]'):
			
 
				 
			
 
				-        title = extract_text(eval_xpath(result, './h3[1]//a'))
			
 
				+        title = extract_text(eval_xpath(result, './/h3[1]//a'))
			
 
				 
			
 
				         if not title:
			
 
				             # this is a [ZITATION] block
			
 
				             continue
			
 
				 
			
 
				-        url = eval_xpath(result, './h3[1]//a/@href')[0]
			
 
				-        content = extract_text(eval_xpath(result, './div[@class="gs_rs"]')) or ''
			
 
				-
			
 
				-        pub_info = extract_text(eval_xpath(result, './div[@class="gs_a"]'))
			
 
				-        if pub_info:
			
 
				-            content += "[%s]" % pub_info
			
 
				-
			
 
				         pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
			
 
				         if pub_type:
			
 
				-            title = title + " " + pub_type
			
 
				+            pub_type = pub_type[1:-1].lower()
			
 
				+
			
 
				+        url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0)
			
 
				+        content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]'))
			
 
				+        authors, journal, publisher, publishedDate = parse_gs_a(
			
 
				+            extract_text(eval_xpath(result, './/div[@class="gs_a"]'))
			
 
				+        )
			
 
				+        if publisher in url:
			
 
				+            publisher = None
			
 
				+
			
 
				+        # cited by
			
 
				+        comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'))
			
 
				+
			
 
				+        # link to the html or pdf document
			
 
				+        html_url = None
			
 
				+        pdf_url = None
			
 
				+        doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None)
			
 
				+        doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
			
 
				+        if doc_type == "[PDF]":
			
 
				+            pdf_url = doc_url
			
 
				+        else:
			
 
				+            html_url = doc_url
			
 
				 
			
 
				         results.append(
			
 
				             {
			
 
				+                'template': 'paper.html',
			
 
				+                'type': pub_type,
			
 
				                 'url': url,
			
 
				                 'title': title,
			
 
				+                'authors': authors,
			
 
				+                'publisher': publisher,
			
 
				+                'journal': journal,
			
 
				+                'publishedDate': publishedDate,
			
 
				                 'content': content,
			
 
				+                'comments': comments,
			
 
				+                'html_url': html_url,
			
 
				+                'pdf_url': pdf_url,
			
 
				             }
			
 
				         )
			
 
				 
			
--- a/searx/engines/pubmed.py
+++ b/searx/engines/pubmed.py
@@ -3,11 +3,15 @@
 
				  PubMed (Scholar publications)
			
 
				 """
			
 
				 
			
 
				-from flask_babel import gettext
			
 
				 from lxml import etree
			
 
				 from datetime import datetime
			
 
				 from urllib.parse import urlencode
			
 
				 from searx.network import get
			
 
				+from searx.utils import (
			
 
				+    eval_xpath_getindex,
			
 
				+    eval_xpath_list,
			
 
				+    extract_text,
			
 
				+)
			
 
				 
			
 
				 # about
			
 
				 about = {
			
@@ -22,7 +26,7 @@ about = {
 
				     "results": 'XML',
			
 
				 }
			
 
				 
			
 
				-categories = ['science']
			
 
				+categories = ['science', 'scientific publications']
			
 
				 
			
 
				 base_url = (
			
 
				     'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
			
@@ -63,46 +67,61 @@ def response(resp):
 
				 
			
 
				     retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args)
			
 
				 
			
 
				-    search_results_xml = get(retrieve_url_encoded).content
			
 
				-    search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation')
			
 
				-
			
 
				-    for entry in search_results:
			
 
				-        title = entry.xpath('.//Article/ArticleTitle')[0].text
			
 
				+    search_results_response = get(retrieve_url_encoded).content
			
 
				+    search_results = etree.XML(search_results_response)
			
 
				+    for entry in eval_xpath_list(search_results, '//PubmedArticle'):
			
 
				+        medline = eval_xpath_getindex(entry, './MedlineCitation', 0)
			
 
				 
			
 
				-        pmid = entry.xpath('.//PMID')[0].text
			
 
				+        title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text
			
 
				+        pmid = eval_xpath_getindex(medline, './/PMID', 0).text
			
 
				         url = pubmed_url + pmid
			
 
				-
			
 
				-        try:
			
 
				-            content = entry.xpath('.//Abstract/AbstractText')[0].text
			
 
				-        except:
			
 
				-            content = gettext('No abstract is available for this publication.')
			
 
				-
			
 
				-        #  If a doi is available, add it to the snipppet
			
 
				-        try:
			
 
				-            doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text
			
 
				-            content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content)
			
 
				-        except:
			
 
				-            pass
			
 
				-
			
 
				-        if len(content) > 300:
			
 
				-            content = content[0:300] + "..."
			
 
				-        # TODO: center snippet on query term
			
 
				-
			
 
				-        res_dict = {'url': url, 'title': title, 'content': content}
			
 
				-
			
 
				-        try:
			
 
				-            publishedDate = datetime.strptime(
			
 
				-                entry.xpath('.//DateCreated/Year')[0].text
			
 
				-                + '-'
			
 
				-                + entry.xpath('.//DateCreated/Month')[0].text
			
 
				-                + '-'
			
 
				-                + entry.xpath('.//DateCreated/Day')[0].text,
			
 
				-                '%Y-%m-%d',
			
 
				-            )
			
 
				-            res_dict['publishedDate'] = publishedDate
			
 
				-        except:
			
 
				-            pass
			
 
				+        content = extract_text(
			
 
				+            eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True
			
 
				+        )
			
 
				+        doi = extract_text(
			
 
				+            eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True
			
 
				+        )
			
 
				+        journal = extract_text(
			
 
				+            eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True
			
 
				+        )
			
 
				+        issn = extract_text(
			
 
				+            eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True
			
 
				+        )
			
 
				+        authors = []
			
 
				+        for author in eval_xpath_list(medline, './Article/AuthorList/Author'):
			
 
				+            f = eval_xpath_getindex(author, './ForeName', 0, default=None)
			
 
				+            l = eval_xpath_getindex(author, './LastName', 0, default=None)
			
 
				+            f = '' if f is None else f.text
			
 
				+            l = '' if l is None else l.text
			
 
				+            authors.append((f + ' ' + l).strip())
			
 
				+
			
 
				+        res_dict = {
			
 
				+            'template': 'paper.html',
			
 
				+            'url': url,
			
 
				+            'title': title,
			
 
				+            'content': content,
			
 
				+            'journal': journal,
			
 
				+            'issn': [issn],
			
 
				+            'authors': authors,
			
 
				+            'doi': doi,
			
 
				+        }
			
 
				+
			
 
				+        accepted_date = eval_xpath_getindex(
			
 
				+            entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None
			
 
				+        )
			
 
				+        if accepted_date is not None:
			
 
				+            year = eval_xpath_getindex(accepted_date, './Year', 0)
			
 
				+            month = eval_xpath_getindex(accepted_date, './Month', 0)
			
 
				+            day = eval_xpath_getindex(accepted_date, './Day', 0)
			
 
				+            try:
			
 
				+                publishedDate = datetime.strptime(
			
 
				+                    year.text + '-' + month.text + '-' + day.text,
			
 
				+                    '%Y-%m-%d',
			
 
				+                )
			
 
				+                res_dict['publishedDate'] = publishedDate
			
 
				+            except Exception as e:
			
 
				+                print(e)
			
 
				 
			
 
				         results.append(res_dict)
			
 
				 
			
 
				-        return results
			
 
				+    return results
			
--- a/searx/engines/semantic_scholar.py
+++ b/searx/engines/semantic_scholar.py
@@ -6,6 +6,8 @@
 
				 from json import dumps, loads
			
 
				 from datetime import datetime
			
 
				 
			
 
				+from flask_babel import gettext
			
 
				+
			
 
				 about = {
			
 
				     "website": 'https://www.semanticscholar.org/',
			
 
				     "wikidata_id": 'Q22908627',
			
@@ -15,6 +17,7 @@ about = {
 
				     "results": 'JSON',
			
 
				 }
			
 
				 
			
 
				+categories = ['science', 'scientific publications']
			
 
				 paging = True
			
 
				 search_url = 'https://www.semanticscholar.org/api/1/search'
			
 
				 paper_url = 'https://www.semanticscholar.org/paper'
			
@@ -45,11 +48,7 @@ def request(query, params):
 
				 def response(resp):
			
 
				     res = loads(resp.text)
			
 
				     results = []
			
 
				-
			
 
				     for result in res['results']:
			
 
				-        item = {}
			
 
				-        metadata = []
			
 
				-
			
 
				         url = result.get('primaryPaperLink', {}).get('url')
			
 
				         if not url and result.get('links'):
			
 
				             url = result.get('links')[0]
			
@@ -60,22 +59,47 @@ def response(resp):
 
				         if not url:
			
 
				             url = paper_url + '/%s' % result['id']
			
 
				 
			
 
				-        item['url'] = url
			
 
				+        # publishedDate
			
 
				+        if 'pubDate' in result:
			
 
				+            publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
			
 
				+        else:
			
 
				+            publishedDate = None
			
 
				 
			
 
				-        item['title'] = result['title']['text']
			
 
				-        item['content'] = result['paperAbstract']['text']
			
 
				+        # authors
			
 
				+        authors = [author[0]['name'] for author in result.get('authors', [])]
			
 
				 
			
 
				-        metadata = result.get('fieldsOfStudy') or []
			
 
				-        venue = result.get('venue', {}).get('text')
			
 
				-        if venue:
			
 
				-            metadata.append(venue)
			
 
				-        if metadata:
			
 
				-            item['metadata'] = ', '.join(metadata)
			
 
				+        # pick for the first alternate link, but not from the crawler
			
 
				+        pdf_url = None
			
 
				+        for doc in result.get('alternatePaperLinks', []):
			
 
				+            if doc['linkType'] not in ('crawler', 'doi'):
			
 
				+                pdf_url = doc['url']
			
 
				+                break
			
 
				 
			
 
				-        pubDate = result.get('pubDate')
			
 
				-        if pubDate:
			
 
				-            item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
			
 
				+        # comments
			
 
				+        comments = None
			
 
				+        if 'citationStats' in result:
			
 
				+            comments = gettext(
			
 
				+                '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
			
 
				+            ).format(
			
 
				+                numCitations=result['citationStats']['numCitations'],
			
 
				+                firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
			
 
				+                lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
			
 
				+            )
			
 
				 
			
 
				-        results.append(item)
			
 
				+        results.append(
			
 
				+            {
			
 
				+                'template': 'paper.html',
			
 
				+                'url': url,
			
 
				+                'title': result['title']['text'],
			
 
				+                'content': result['paperAbstract']['text'],
			
 
				+                'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
			
 
				+                'doi': result.get('doiInfo', {}).get('doi'),
			
 
				+                'tags': result.get('fieldsOfStudy'),
			
 
				+                'authors': authors,
			
 
				+                'pdf_url': pdf_url,
			
 
				+                'publishedDate': publishedDate,
			
 
				+                'comments': comments,
			
 
				+            }
			
 
				+        )
			
 
				 
			
 
				     return results
			
--- a/searx/engines/springer.py
+++ b/searx/engines/springer.py
@@ -19,7 +19,7 @@ about = {
 
				     "results": 'JSON',
			
 
				 }
			
 
				 
			
 
				-categories = ['science']
			
 
				+categories = ['science', 'scientific publications']
			
 
				 paging = True
			
 
				 nb_per_page = 10
			
 
				 api_key = 'unset'
			
@@ -41,32 +41,29 @@ def response(resp):
 
				     json_data = loads(resp.text)
			
 
				 
			
 
				     for record in json_data['records']:
			
 
				-        content = record['abstract'][0:500]
			
 
				-        if len(record['abstract']) > len(content):
			
 
				-            content += "..."
			
 
				+        content = record['abstract']
			
 
				         published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
			
 
				-
			
 
				-        metadata = [
			
 
				-            record[x]
			
 
				-            for x in [
			
 
				-                'publicationName',
			
 
				-                'identifier',
			
 
				-                'contentType',
			
 
				-            ]
			
 
				-            if record.get(x) is not None
			
 
				-        ]
			
 
				-
			
 
				-        metadata = ' / '.join(metadata)
			
 
				-        if record.get('startingPage') and record.get('endingPage') is not None:
			
 
				-            metadata += " (%(startingPage)s-%(endingPage)s)" % record
			
 
				-
			
 
				+        authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
			
 
				+        tags = record.get('genre')
			
 
				+        if isinstance(tags, str):
			
 
				+            tags = [tags]
			
 
				         results.append(
			
 
				             {
			
 
				+                'template': 'paper.html',
			
 
				                 'title': record['title'],
			
 
				                 'url': record['url'][0]['value'].replace('http://', 'https://', 1),
			
 
				+                'type': record.get('contentType'),
			
 
				                 'content': content,
			
 
				                 'publishedDate': published,
			
 
				-                'metadata': metadata,
			
 
				+                'authors': authors,
			
 
				+                'doi': record.get('doi'),
			
 
				+                'journal': record.get('publicationName'),
			
 
				+                'pages': record.get('start_page') + '-' + record.get('end_page'),
			
 
				+                'tags': tags,
			
 
				+                'issn': [record.get('issn')],
			
 
				+                'isbn': [record.get('isbn')],
			
 
				+                'volume': record.get('volume') or None,
			
 
				+                'number': record.get('number') or None,
			
 
				             }
			
 
				         )
			
 
				     return results
			
--- a/searx/plugins/oa_doi_rewrite.py
+++ b/searx/plugins/oa_doi_rewrite.py
@@ -42,4 +42,6 @@ def on_result(request, search, result):
 
				                 doi = doi[: -len(suffix)]
			
 
				         result['url'] = get_doi_resolver(request.preferences) + doi
			
 
				         result['parsed_url'] = urlparse(result['url'])
			
 
				+        if 'doi' not in result:
			
 
				+            result['doi'] = doi
			
 
				     return True
			
--- a/searx/searxng.msg
+++ b/searx/searxng.msg
@@ -43,6 +43,7 @@ CATEGORY_GROUPS = {
 
				     'REPOS': 'repos',
			
 
				     'SOFTWARE_WIKIS': 'software wikis',
			
 
				     'WEB': 'web',
			
 
				+    'SCIENTIFIC PUBLICATIONS': 'scientific publications',
			
 
				 }
			
 
				 
			
 
				 STYLE_NAMES = {
			
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -319,7 +319,6 @@ engines:
 
				   - name: arxiv
			
 
				     engine: arxiv
			
 
				     shortcut: arx
			
 
				-    categories: science
			
 
				     timeout: 4.0
			
 
				 
			
 
				   # tmp suspended:  dh key too small
			
@@ -411,23 +410,10 @@ engines:
 
				   #   api_key: 'unset'
			
 
				 
			
 
				   - name: crossref
			
 
				-    engine: json_engine
			
 
				-    paging: true
			
 
				-    search_url: https://search.crossref.org/dois?q={query}&page={pageno}
			
 
				-    url_query: doi
			
 
				-    title_query: title
			
 
				-    title_html_to_text: true
			
 
				-    content_query: fullCitation
			
 
				-    content_html_to_text: true
			
 
				-    categories: science
			
 
				+    engine: crossref
			
 
				     shortcut: cr
			
 
				-    about:
			
 
				-      website: https://www.crossref.org/
			
 
				-      wikidata_id: Q5188229
			
 
				-      official_api_documentation: https://github.com/CrossRef/rest-api-doc
			
 
				-      use_official_api: false
			
 
				-      require_api_key: false
			
 
				-      results: JSON
			
 
				+    timeout: 30
			
 
				+    disable: true
			
 
				 
			
 
				   - name: yep
			
 
				     engine: json_engine
			
@@ -1068,7 +1054,7 @@ engines:
 
				     title_query: metadata/oaf:entity/oaf:result/title/$
			
 
				     content_query: metadata/oaf:entity/oaf:result/description/$
			
 
				     content_html_to_text: true
			
 
				-    categories: science
			
 
				+    categories: "science"
			
 
				     shortcut: oad
			
 
				     timeout: 5.0
			
 
				     about:
			
@@ -1198,7 +1184,6 @@ engines:
 
				   - name: pubmed
			
 
				     engine: pubmed
			
 
				     shortcut: pub
			
 
				-    categories: science
			
 
				     timeout: 3.0
			
 
				 
			
 
				   - name: pypi
			
@@ -1346,7 +1331,6 @@ engines:
 
				     engine: semantic_scholar
			
 
				     disabled: true
			
 
				     shortcut: se
			
 
				-    categories: science
			
 
				 
			
 
				   # Spotify needs API credentials
			
 
				   # - name: spotify
			
@@ -1372,8 +1356,7 @@ engines:
 
				   #   # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
			
 
				   #   api_key: 'unset'
			
 
				   #   shortcut: springer
			
 
				-  #   categories: science
			
 
				-  #   timeout: 6.0
			
 
				+  #   timeout: 15.0
			
 
				 
			
 
				   - name: startpage
			
 
				     engine: startpage
			
--- a/searx/static/themes/simple/css/searxng-rtl.min.css
+++ b/searx/static/themes/simple/css/searxng-rtl.min.css
--- a/searx/static/themes/simple/css/searxng-rtl.min.css.map
+++ b/searx/static/themes/simple/css/searxng-rtl.min.css.map
--- a/searx/static/themes/simple/css/searxng.min.css
+++ b/searx/static/themes/simple/css/searxng.min.css
--- a/searx/static/themes/simple/css/searxng.min.css.map
+++ b/searx/static/themes/simple/css/searxng.min.css.map
--- a/searx/static/themes/simple/src/less/style.less
+++ b/searx/static/themes/simple/src/less/style.less
@@ -302,6 +302,49 @@ article[data-vim-selected].category-social {
 
				   }
			
 
				 }
			
 
				 
			
 
				+.result-paper {
			
 
				+  .attributes {
			
 
				+    display: table;
			
 
				+    border-spacing: 0.125rem;
			
 
				+
			
 
				+    div {
			
 
				+      display: table-row;
			
 
				+
			
 
				+      span {
			
 
				+        font-size: 0.9rem;
			
 
				+        margin-top: 0.25rem;
			
 
				+        display: table-cell;
			
 
				+
			
 
				+        time {
			
 
				+          font-size: 0.9rem;
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      span:first-child {
			
 
				+        color: var(--color-base-font);
			
 
				+        min-width: 10rem;
			
 
				+      }
			
 
				+
			
 
				+      span:nth-child(2) {
			
 
				+        color: var(--color-result-publishdate-font);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  .content {
			
 
				+    margin-top: 0.25rem;
			
 
				+  }
			
 
				+
			
 
				+  .comments {
			
 
				+    font-size: 0.9rem;
			
 
				+    margin: 0.25rem 0 0 0;
			
 
				+    padding: 0;
			
 
				+    word-wrap: break-word;
			
 
				+    line-height: 1.24;
			
 
				+    font-style: italic;
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 .template_group_images {
			
 
				   display: flex;
			
 
				   flex-wrap: wrap;
			
@@ -955,6 +998,28 @@ article[data-vim-selected].category-social {
 
				     border: none !important;
			
 
				     background-color: var(--color-sidebar-background);
			
 
				   }
			
 
				+
			
 
				+  .result-paper {
			
 
				+    .attributes {
			
 
				+      display: block;
			
 
				+
			
 
				+      div {
			
 
				+        display: block;
			
 
				+
			
 
				+        span {
			
 
				+          display: inline;
			
 
				+        }
			
 
				+
			
 
				+        span:first-child {
			
 
				+          font-weight: bold;
			
 
				+        }
			
 
				+
			
 
				+        span:nth-child(2) {
			
 
				+          .ltr-margin-left(0.5rem);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/searx/templates/simple/result_templates/paper.html
+++ b/searx/templates/simple/result_templates/paper.html
@@ -0,0 +1,44 @@
 
				+{% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer with context %}
			
 
				+
			
 
				+{{ result_header(result, favicons, image_proxify) -}}
			
 
				+<div class="attributes">
			
 
				+  {%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%}
			
 
				+  {%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%}
			
 
				+  {%- if result.journal -%}
			
 
				+    <div class="result_journal">
			
 
				+      <span>{{- _("Journal") }}:</span><span>{{ result.journal -}}
			
 
				+      {%- if result.volume -%}
			
 
				+        &nbsp;{{- result.volume -}}
			
 
				+        {%- if result.number -%}
			
 
				+          .{{- result.number -}}
			
 
				+        {%- endif -%}
			
 
				+      {%- endif -%}
			
 
				+      {%- if result.pages -%}
			
 
				+        &nbsp;{{- result.pages -}}
			
 
				+      {%- endif -%}
			
 
				+      </span>
			
 
				+    </div>
			
 
				+  {%- endif %}
			
 
				+  {%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%}
			
 
				+  {%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%}
			
 
				+  {%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%}
			
 
				+  {%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%}
			
 
				+  {%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{- result.doi -}}</span></div>{% endif -%}
			
 
				+  {%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%}
			
 
				+  {%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%}
			
 
				+</div>
			
 
				+{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%}
			
 
				+{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%}
			
 
				+<p class="altlink">
			
 
				+  {%- if result.pdf_url -%}
			
 
				+    <a href="{{ result.pdf_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('PDF') }}</a>
			
 
				+  {%- endif -%}
			
 
				+  {%- if result.html_url -%}
			
 
				+      <a href="{{ result.html_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('HTML') }}</a>
			
 
				+  {%- endif -%}
			
 
				+  {%- if result.doi %}
			
 
				+    <a href="https://www.altmetric.com/details/doi/{{result.doi}}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>Altmetric</a>
			
 
				+  {% endif -%}
			
 
				+</p>
			
 
				+{{- result_sub_footer(result, proxify) -}}
			
 
				+{{- result_footer(result) }}
			
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -12,7 +12,6 @@ import os
 
				 import sys
			
 
				 import base64
			
 
				 
			
 
				-from datetime import datetime, timedelta
			
 
				 from timeit import default_timer
			
 
				 from html import escape
			
 
				 from io import StringIO
			
@@ -45,7 +44,6 @@ from flask.json import jsonify
 
				 from flask_babel import (
			
 
				     Babel,
			
 
				     gettext,
			
 
				-    format_date,
			
 
				     format_decimal,
			
 
				 )
			
 
				 
			
@@ -79,6 +77,7 @@ from searx.webutils import (
 
				     is_hmac_of,
			
 
				     is_flask_run_cmdline,
			
 
				     group_engines_in_tab,
			
 
				+    searxng_l10n_timespan,
			
 
				 )
			
 
				 from searx.webadapter import (
			
 
				     get_search_query_from_webapp,
			
@@ -718,25 +717,13 @@ def search():
 
				         if 'url' in result:
			
 
				             result['pretty_url'] = prettify_url(result['url'])
			
 
				 
			
 
				-        # TODO, check if timezone is calculated right  # pylint: disable=fixme
			
 
				         if result.get('publishedDate'):  # do not try to get a date from an empty string or a None type
			
 
				             try:  # test if publishedDate >= 1900 (datetime module bug)
			
 
				                 result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
			
 
				             except ValueError:
			
 
				                 result['publishedDate'] = None
			
 
				             else:
			
 
				-                if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
			
 
				-                    timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
			
 
				-                    minutes = int((timedifference.seconds / 60) % 60)
			
 
				-                    hours = int(timedifference.seconds / 60 / 60)
			
 
				-                    if hours == 0:
			
 
				-                        result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
			
 
				-                    else:
			
 
				-                        result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(
			
 
				-                            hours=hours, minutes=minutes
			
 
				-                        )
			
 
				-                else:
			
 
				-                    result['publishedDate'] = format_date(result['publishedDate'])
			
 
				+                result['publishedDate'] = searxng_l10n_timespan(result['publishedDate'])
			
 
				 
			
 
				         # set result['open_group'] = True when the template changes from the previous result
			
 
				         # set result['close_group'] = True when the template changes on the next result
			
--- a/searx/webutils.py
+++ b/searx/webutils.py
@@ -7,11 +7,14 @@ import hmac
 
				 import re
			
 
				 import inspect
			
 
				 import itertools
			
 
				+from datetime import datetime, timedelta
			
 
				 from typing import Iterable, List, Tuple, Dict
			
 
				 
			
 
				 from io import StringIO
			
 
				 from codecs import getincrementalencoder
			
 
				 
			
 
				+from flask_babel import gettext, format_date
			
 
				+
			
 
				 from searx import logger, settings
			
 
				 from searx.engines import Engine, OTHER_CATEGORY
			
 
				 
			
@@ -138,6 +141,28 @@ def highlight_content(content, query):
 
				     return content
			
 
				 
			
 
				 
			
 
				+def searxng_l10n_timespan(dt: datetime) -> str:  # pylint: disable=invalid-name
			
 
				+    """Returns a human-readable and translated string indicating how long ago
			
 
				+    a date was in the past / the time span of the date to the present.
			
 
				+
			
 
				+    On January 1st, midnight, the returned string only indicates how many years
			
 
				+    ago the date was.
			
 
				+    """
			
 
				+    # TODO, check if timezone is calculated right  # pylint: disable=fixme
			
 
				+    d = dt.date()
			
 
				+    t = dt.time()
			
 
				+    if d.month == 1 and d.day == 1 and t.hour == 0 and t.minute == 0 and t.second == 0:
			
 
				+        return str(d.year)
			
 
				+    if dt.replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
			
 
				+        timedifference = datetime.now() - dt.replace(tzinfo=None)
			
 
				+        minutes = int((timedifference.seconds / 60) % 60)
			
 
				+        hours = int(timedifference.seconds / 60 / 60)
			
 
				+        if hours == 0:
			
 
				+            return gettext('{minutes} minute(s) ago').format(minutes=minutes)
			
 
				+        return gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)
			
 
				+    return format_date(dt)
			
 
				+
			
 
				+
			
 
				 def is_flask_run_cmdline():
			
 
				     """Check if the application was started using "flask run" command line