3 years ago · e36f85b836
--- a/searx/engines/arxiv.py
+++ b/searx/engines/arxiv.py
@@ -3,9 +3,10 @@
 
															  ArXiV (Scientific preprints)
														
 
															 """
														
 
															-from lxml import html
														
 
															+from lxml import etree
														
 
															+from lxml.etree import XPath
														
 
															 from datetime import datetime
														
 
															-from searx.utils import eval_xpath_list, eval_xpath_getindex
														
 
															+from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
														
 
															 # about
														
 
															 about = {
														
@@ -17,7 +18,7 @@ about = {
 
															     "results": 'XML-RSS',
														
 
															 }
														
 
															-categories = ['science']
														
 
															+categories = ['science', 'scientific publications']
														
 
															 paging = True
														
 
															 base_url = (
														
@@ -27,6 +28,23 @@ base_url = (
 
															 # engine dependent config
														
 
															 number_of_results = 10
														
 
															+# xpaths
														
 
															+arxiv_namespaces = {
														
 
															+    "atom": "http://www.w3.org/2005/Atom",
														
 
															+    "arxiv": "http://arxiv.org/schemas/atom",
														
 
															+}
														
 
															+xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
														
 
															+xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
														
 
															+xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
														
 
															+xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
														
 
															+xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
														
 
															+xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
														
 
															+xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
														
 
															+xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
														
 
															+xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
														
 
															+xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
														
 
															+xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
														
 
															+
														
 
															 def request(query, params):
														
 
															     # basic search
														
@@ -41,30 +59,50 @@ def request(query, params):
 
															 def response(resp):
														
 
															     results = []
														
 
															-
														
 
															-    dom = html.fromstring(resp.content)
														
 
															-
														
 
															-    for entry in eval_xpath_list(dom, '//entry'):
														
 
															-        title = eval_xpath_getindex(entry, './/title', 0).text
														
 
															-
														
 
															-        url = eval_xpath_getindex(entry, './/id', 0).text
														
 
															-
														
 
															-        content_string = '{doi_content}{abstract_content}'
														
 
															-
														
 
															-        abstract = eval_xpath_getindex(entry, './/summary', 0).text
														
 
															-
														
 
															-        #  If a doi is available, add it to the snipppet
														
 
															-        doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None)
														
 
															-        doi_content = doi_element.text if doi_element is not None else ''
														
 
															-        content = content_string.format(doi_content=doi_content, abstract_content=abstract)
														
 
															-
														
 
															-        if len(content) > 300:
														
 
															-            content = content[0:300] + "..."
														
 
															-        # TODO: center snippet on query term
														
 
															-
														
 
															-        publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ')
														
 
															-
														
 
															-        res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
														
 
															+    dom = etree.fromstring(resp.content)
														
 
															+    for entry in eval_xpath_list(dom, xpath_entry):
														
 
															+        title = eval_xpath_getindex(entry, xpath_title, 0).text
														
 
															+
														
 
															+        url = eval_xpath_getindex(entry, xpath_id, 0).text
														
 
															+        abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
														
 
															+
														
 
															+        authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
														
 
															+
														
 
															+        #  doi
														
 
															+        doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
														
 
															+        doi = None if doi_element is None else doi_element.text
														
 
															+
														
 
															+        # pdf
														
 
															+        pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
														
 
															+        pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
														
 
															+
														
 
															+        # journal
														
 
															+        journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
														
 
															+        journal = None if journal_element is None else journal_element.text
														
 
															+
														
 
															+        # tags
														
 
															+        tag_elements = eval_xpath(entry, xpath_category)
														
 
															+        tags = [str(tag) for tag in tag_elements]
														
 
															+
														
 
															+        # comments
														
 
															+        comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
														
 
															+        comments = None if comments_elements is None else comments_elements.text
														
 
															+
														
 
															+        publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
														
 
															+
														
 
															+        res_dict = {
														
 
															+            'template': 'paper.html',
														
 
															+            'url': url,
														
 
															+            'title': title,
														
 
															+            'publishedDate': publishedDate,
														
 
															+            'content': abstract,
														
 
															+            'doi': doi,
														
 
															+            'authors': authors,
														
 
															+            'journal': journal,
														
 
															+            'tags': tags,
														
 
															+            'comments': comments,
														
 
															+            'pdf_url': pdf_url,
														
 
															+        }
														
 
															         results.append(res_dict)
														
--- a/searx/engines/crossref.py
+++ b/searx/engines/crossref.py
@@ -0,0 +1,59 @@
 
															+# SPDX-License-Identifier: AGPL-3.0-or-later
														
 
															+# lint: pylint
														
 
															+"""Semantic Scholar (Science)
														
 
															+"""
														
 
															+
														
 
															+from urllib.parse import urlencode
														
 
															+from searx.utils import html_to_text
														
 
															+
														
 
															+about = {
														
 
															+    "website": 'https://www.crossref.org/',
														
 
															+    "wikidata_id": 'Q5188229',
														
 
															+    "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
														
 
															+    "use_official_api": False,
														
 
															+    "require_api_key": False,
														
 
															+    "results": 'JSON',
														
 
															+}
														
 
															+
														
 
															+categories = ['science', 'scientific publications']
														
 
															+paging = True
														
 
															+search_url = 'https://api.crossref.org/works'
														
 
															+
														
 
															+
														
 
															+def request(query, params):
														
 
															+    params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
														
 
															+    return params
														
 
															+
														
 
															+
														
 
															+def response(resp):
														
 
															+    res = resp.json()
														
 
															+    results = []
														
 
															+    for record in res['message']['items']:
														
 
															+        record_type = record['type']
														
 
															+        if record_type == 'book-chapter':
														
 
															+            title = record['container-title'][0]
														
 
															+            if record['title'][0].lower().strip() != title.lower().strip():
														
 
															+                title = title + ' (' + record['title'][0] + ')'
														
 
															+            journal = None
														
 
															+        else:
														
 
															+            title = record['title'][0]
														
 
															+            journal = record.get('container-title', [None])[0]
														
 
															+        url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
														
 
															+        authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
														
 
															+        isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
														
 
															+        results.append(
														
 
															+            {
														
 
															+                'template': 'paper.html',
														
 
															+                'url': url,
														
 
															+                'title': title,
														
 
															+                'journal': journal,
														
 
															+                'volume': record.get('volume'),
														
 
															+                'type': record['type'],
														
 
															+                'content': html_to_text(record.get('abstract', '')),
														
 
															+                'publisher': record.get('publisher'),
														
 
															+                'authors': authors,
														
 
															+                'doi': record['DOI'],
														
 
															+                'isbn': isbn,
														
 
															+            }
														
 
															+        )
														
 
															+    return results
														
--- a/searx/engines/google_scholar.py
+++ b/searx/engines/google_scholar.py
@@ -13,10 +13,12 @@ Definitions`_.
 
															 from urllib.parse import urlencode
														
 
															 from datetime import datetime
														
 
															+from typing import Optional
														
 
															 from lxml import html
														
 
															 from searx.utils import (
														
 
															     eval_xpath,
														
 
															+    eval_xpath_getindex,
														
 
															     eval_xpath_list,
														
 
															     extract_text,
														
 
															 )
														
@@ -46,7 +48,7 @@ about = {
 
															 }
														
 
															 # engine dependent config
														
 
															-categories = ['science']
														
 
															+categories = ['science', 'scientific publications']
														
 
															 paging = True
														
 
															 language_support = True
														
 
															 use_locale_domain = True
														
@@ -99,7 +101,43 @@ def request(query, params):
 
															     return params
														
 
															-def response(resp):
														
 
															+def parse_gs_a(text: Optional[str]):
														
 
															+    """Parse the text written in green.
														
 
															+
														
 
															+    Possible formats:
														
 
															+    * "{authors} - {journal}, {year} - {publisher}"
														
 
															+    * "{authors} - {year} - {publisher}"
														
 
															+    * "{authors} - {publisher}"
														
 
															+    """
														
 
															+    if text is None or text == "":
														
 
															+        return None, None, None, None
														
 
															+
														
 
															+    s_text = text.split(' - ')
														
 
															+    authors = s_text[0].split(', ')
														
 
															+    publisher = s_text[-1]
														
 
															+    if len(s_text) != 3:
														
 
															+        return authors, None, publisher, None
														
 
															+
														
 
															+    # the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"
														
 
															+    # get journal and year
														
 
															+    journal_year = s_text[1].split(', ')
														
 
															+    # journal is optional and may contains some coma
														
 
															+    if len(journal_year) > 1:
														
 
															+        journal = ', '.join(journal_year[0:-1])
														
 
															+        if journal == '…':
														
 
															+            journal = None
														
 
															+    else:
														
 
															+        journal = None
														
 
															+    # year
														
 
															+    year = journal_year[-1]
														
 
															+    try:
														
 
															+        publishedDate = datetime.strptime(year.strip(), '%Y')
														
 
															+    except ValueError:
														
 
															+        publishedDate = None
														
 
															+    return authors, journal, publisher, publishedDate
														
 
															+
														
 
															+
														
 
															+def response(resp):  # pylint: disable=too-many-locals
														
 
															     """Get response from google's search request"""
														
 
															     results = []
														
@@ -112,30 +150,53 @@ def response(resp):
 
															     dom = html.fromstring(resp.text)
														
 
															     # parse results
														
 
															-    for result in eval_xpath_list(dom, '//div[@class="gs_ri"]'):
														
 
															+    for result in eval_xpath_list(dom, '//div[@data-cid]'):
														
 
															-        title = extract_text(eval_xpath(result, './h3[1]//a'))
														
 
															+        title = extract_text(eval_xpath(result, './/h3[1]//a'))
														
 
															         if not title:
														
 
															             # this is a [ZITATION] block
														
 
															             continue
														
 
															-        url = eval_xpath(result, './h3[1]//a/@href')[0]
														
 
															-        content = extract_text(eval_xpath(result, './div[@class="gs_rs"]')) or ''
														
 
															-
														
 
															-        pub_info = extract_text(eval_xpath(result, './div[@class="gs_a"]'))
														
 
															-        if pub_info:
														
 
															-            content += "[%s]" % pub_info
														
 
															-
														
 
															         pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
														
 
															         if pub_type:
														
 
															-            title = title + " " + pub_type
														
 
															+            pub_type = pub_type[1:-1].lower()
														
 
															+
														
 
															+        url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0)
														
 
															+        content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]'))
														
 
															+        authors, journal, publisher, publishedDate = parse_gs_a(
														
 
															+            extract_text(eval_xpath(result, './/div[@class="gs_a"]'))
														
 
															+        )
														
 
															+        if publisher in url:
														
 
															+            publisher = None
														
 
															+
														
 
															+        # cited by
														
 
															+        comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'))
														
 
															+
														
 
															+        # link to the html or pdf document
														
 
															+        html_url = None
														
 
															+        pdf_url = None
														
 
															+        doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None)
														
 
															+        doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
														
 
															+        if doc_type == "[PDF]":
														
 
															+            pdf_url = doc_url
														
 
															+        else:
														
 
															+            html_url = doc_url
														
 
															         results.append(
														
 
															             {
														
 
															+                'template': 'paper.html',
														
 
															+                'type': pub_type,
														
 
															                 'url': url,
														
 
															                 'title': title,
														
 
															+                'authors': authors,
														
 
															+                'publisher': publisher,
														
 
															+                'journal': journal,
														
 
															+                'publishedDate': publishedDate,
														
 
															                 'content': content,
														
 
															+                'comments': comments,
														
 
															+                'html_url': html_url,
														
 
															+                'pdf_url': pdf_url,
														
 
															             }
														
 
															         )
														
--- a/searx/engines/pubmed.py
+++ b/searx/engines/pubmed.py
@@ -3,11 +3,15 @@
 
															  PubMed (Scholar publications)
														
 
															 """
														
 
															-from flask_babel import gettext
														
 
															 from lxml import etree
														
 
															 from datetime import datetime
														
 
															 from urllib.parse import urlencode
														
 
															 from searx.network import get
														
 
															+from searx.utils import (
														
 
															+    eval_xpath_getindex,
														
 
															+    eval_xpath_list,
														
 
															+    extract_text,
														
 
															+)
														
 
															 # about
														
 
															 about = {
														
@@ -22,7 +26,7 @@ about = {
 
															     "results": 'XML',
														
 
															 }
														
 
															-categories = ['science']
														
 
															+categories = ['science', 'scientific publications']
														
 
															 base_url = (
														
 
															     'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
														
@@ -63,46 +67,61 @@ def response(resp):
 
															     retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args)
														
 
															-    search_results_xml = get(retrieve_url_encoded).content
														
 
															-    search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation')
														
 
															-
														
 
															-    for entry in search_results:
														
 
															-        title = entry.xpath('.//Article/ArticleTitle')[0].text
														
 
															+    search_results_response = get(retrieve_url_encoded).content
														
 
															+    search_results = etree.XML(search_results_response)
														
 
															+    for entry in eval_xpath_list(search_results, '//PubmedArticle'):
														
 
															+        medline = eval_xpath_getindex(entry, './MedlineCitation', 0)
														
 
															-        pmid = entry.xpath('.//PMID')[0].text
														
 
															+        title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text
														
 
															+        pmid = eval_xpath_getindex(medline, './/PMID', 0).text
														
 
															         url = pubmed_url + pmid
														
 
															-
														
 
															-        try:
														
 
															-            content = entry.xpath('.//Abstract/AbstractText')[0].text
														
 
															-        except:
														
 
															-            content = gettext('No abstract is available for this publication.')
														
 
															-
														
 
															-        #  If a doi is available, add it to the snipppet
														
 
															-        try:
														
 
															-            doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text
														
 
															-            content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content)
														
 
															-        except:
														
 
															-            pass
														
 
															-
														
 
															-        if len(content) > 300:
														
 
															-            content = content[0:300] + "..."
														
 
															-        # TODO: center snippet on query term
														
 
															-
														
 
															-        res_dict = {'url': url, 'title': title, 'content': content}
														
 
															-
														
 
															-        try:
														
 
															-            publishedDate = datetime.strptime(
														
 
															-                entry.xpath('.//DateCreated/Year')[0].text
														
 
															-                + '-'
														
 
															-                + entry.xpath('.//DateCreated/Month')[0].text
														
 
															-                + '-'
														
 
															-                + entry.xpath('.//DateCreated/Day')[0].text,
														
 
															-                '%Y-%m-%d',
														
 
															-            )
														
 
															-            res_dict['publishedDate'] = publishedDate
														
 
															-        except:
														
 
															-            pass
														
 
															+        content = extract_text(
														
 
															+            eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True
														
 
															+        )
														
 
															+        doi = extract_text(
														
 
															+            eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True
														
 
															+        )
														
 
															+        journal = extract_text(
														
 
															+            eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True
														
 
															+        )
														
 
															+        issn = extract_text(
														
 
															+            eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True
														
 
															+        )
														
 
															+        authors = []
														
 
															+        for author in eval_xpath_list(medline, './Article/AuthorList/Author'):
														
 
															+            f = eval_xpath_getindex(author, './ForeName', 0, default=None)
														
 
															+            l = eval_xpath_getindex(author, './LastName', 0, default=None)
														
 
															+            f = '' if f is None else f.text
														
 
															+            l = '' if l is None else l.text
														
 
															+            authors.append((f + ' ' + l).strip())
														
 
															+
														
 
															+        res_dict = {
														
 
															+            'template': 'paper.html',
														
 
															+            'url': url,
														
 
															+            'title': title,
														
 
															+            'content': content,
														
 
															+            'journal': journal,
														
 
															+            'issn': [issn],
														
 
															+            'authors': authors,
														
 
															+            'doi': doi,
														
 
															+        }
														
 
															+
														
 
															+        accepted_date = eval_xpath_getindex(
														
 
															+            entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None
														
 
															+        )
														
 
															+        if accepted_date is not None:
														
 
															+            year = eval_xpath_getindex(accepted_date, './Year', 0)
														
 
															+            month = eval_xpath_getindex(accepted_date, './Month', 0)
														
 
															+            day = eval_xpath_getindex(accepted_date, './Day', 0)
														
 
															+            try:
														
 
															+                publishedDate = datetime.strptime(
														
 
															+                    year.text + '-' + month.text + '-' + day.text,
														
 
															+                    '%Y-%m-%d',
														
 
															+                )
														
 
															+                res_dict['publishedDate'] = publishedDate
														
 
															+            except Exception as e:
														
 
															+                print(e)
														
 
															         results.append(res_dict)
														
 
															-        return results
														
 
															+    return results
														
--- a/searx/engines/semantic_scholar.py
+++ b/searx/engines/semantic_scholar.py
@@ -6,6 +6,8 @@
 
															 from json import dumps, loads
														
 
															 from datetime import datetime
														
 
															+from flask_babel import gettext
														
 
															+
														
 
															 about = {
														
 
															     "website": 'https://www.semanticscholar.org/',
														
 
															     "wikidata_id": 'Q22908627',
														
@@ -15,6 +17,7 @@ about = {
 
															     "results": 'JSON',
														
 
															 }
														
 
															+categories = ['science', 'scientific publications']
														
 
															 paging = True
														
 
															 search_url = 'https://www.semanticscholar.org/api/1/search'
														
 
															 paper_url = 'https://www.semanticscholar.org/paper'
														
@@ -47,9 +50,6 @@ def response(resp):
 
															     results = []
														
 
															     for result in res['results']:
														
 
															-        item = {}
														
 
															-        metadata = []
														
 
															-
														
 
															         url = result.get('primaryPaperLink', {}).get('url')
														
 
															         if not url and result.get('links'):
														
 
															             url = result.get('links')[0]
														
@@ -60,22 +60,47 @@ def response(resp):
 
															         if not url:
														
 
															             url = paper_url + '/%s' % result['id']
														
 
															-        item['url'] = url
														
 
															+        # publishedDate
														
 
															+        if 'pubDate' in result:
														
 
															+            publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
														
 
															+        else:
														
 
															+            publishedDate = None
														
 
															-        item['title'] = result['title']['text']
														
 
															-        item['content'] = result['paperAbstract']['text']
														
 
															+        # authors
														
 
															+        authors = [author[0]['name'] for author in result.get('authors', [])]
														
 
															-        metadata = result.get('fieldsOfStudy') or []
														
 
															-        venue = result.get('venue', {}).get('text')
														
 
															-        if venue:
														
 
															-            metadata.append(venue)
														
 
															-        if metadata:
														
 
															-            item['metadata'] = ', '.join(metadata)
														
 
															+        # pick for the first alternate link, but not from the crawler
														
 
															+        pdf_url = None
														
 
															+        for doc in result.get('alternatePaperLinks', []):
														
 
															+            if doc['linkType'] != 'crawler':
														
 
															+                pdf_url = doc['url']
														
 
															+                break
														
 
															-        pubDate = result.get('pubDate')
														
 
															-        if pubDate:
														
 
															-            item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
														
 
															+        # comments
														
 
															+        comments = None
														
 
															+        if 'citationStats' in result:
														
 
															+            comments = gettext(
														
 
															+                '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
														
 
															+            ).format(
														
 
															+                numCitations=result['citationStats']['numCitations'],
														
 
															+                firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
														
 
															+                lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
														
 
															+            )
														
 
															-        results.append(item)
														
 
															+        results.append(
														
 
															+            {
														
 
															+                'template': 'paper.html',
														
 
															+                'url': url,
														
 
															+                'title': result['title']['text'],
														
 
															+                'content': result['paperAbstract']['text'],
														
 
															+                'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
														
 
															+                'doi': result.get('doiInfo', {}).get('doi'),
														
 
															+                'tags': result.get('fieldsOfStudy'),
														
 
															+                'authors': authors,
														
 
															+                'pdf_url': pdf_url,
														
 
															+                'publishedDate': publishedDate,
														
 
															+                'comments': comments,
														
 
															+            }
														
 
															+        )
														
 
															     return results
														
--- a/searx/engines/springer.py
+++ b/searx/engines/springer.py
@@ -19,7 +19,7 @@ about = {
 
															     "results": 'JSON',
														
 
															 }
														
 
															-categories = ['science']
														
 
															+categories = ['science', 'scientific publications']
														
 
															 paging = True
														
 
															 nb_per_page = 10
														
 
															 api_key = 'unset'
														
@@ -41,32 +41,30 @@ def response(resp):
 
															     json_data = loads(resp.text)
														
 
															     for record in json_data['records']:
														
 
															-        content = record['abstract'][0:500]
														
 
															-        if len(record['abstract']) > len(content):
														
 
															-            content += "..."
														
 
															+        content = record['abstract']
														
 
															         published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
														
 
															-
														
 
															-        metadata = [
														
 
															-            record[x]
														
 
															-            for x in [
														
 
															-                'publicationName',
														
 
															-                'identifier',
														
 
															-                'contentType',
														
 
															-            ]
														
 
															-            if record.get(x) is not None
														
 
															-        ]
														
 
															-
														
 
															-        metadata = ' / '.join(metadata)
														
 
															-        if record.get('startingPage') and record.get('endingPage') is not None:
														
 
															-            metadata += " (%(startingPage)s-%(endingPage)s)" % record
														
 
															-
														
 
															+        authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
														
 
															+        tags = record.get('genre')
														
 
															+        if isinstance(tags, str):
														
 
															+            tags = [tags]
														
 
															         results.append(
														
 
															             {
														
 
															+                'template': 'paper.html',
														
 
															                 'title': record['title'],
														
 
															                 'url': record['url'][0]['value'].replace('http://', 'https://', 1),
														
 
															+                'type': record.get('contentType'),
														
 
															                 'content': content,
														
 
															                 'publishedDate': published,
														
 
															-                'metadata': metadata,
														
 
															+                'authors': authors,
														
 
															+                'doi': record.get('doi'),
														
 
															+                'journal': record.get('publicationName'),
														
 
															+                'start_page': record.get('start_page'),
														
 
															+                'end_page': record.get('end_page'),
														
 
															+                'tags': tags,
														
 
															+                'issn': [record.get('issn')],
														
 
															+                'isbn': [record.get('isbn')],
														
 
															+                'volume': record.get('volume') or None,
														
 
															+                'number': record.get('number') or None,
														
 
															             }
														
 
															         )
														
 
															     return results
														
--- a/searx/searxng.msg
+++ b/searx/searxng.msg
@@ -43,6 +43,7 @@ CATEGORY_GROUPS = {
 
															     'REPOS': 'repos',
														
 
															     'SOFTWARE_WIKIS': 'software wikis',
														
 
															     'WEB': 'web',
														
 
															+    'SCIENTIFIC PUBLICATIONS': 'scientific publications',
														
 
															 }
														
 
															 STYLE_NAMES = {
														
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -319,7 +319,6 @@ engines:
 
															   - name: arxiv
														
 
															     engine: arxiv
														
 
															     shortcut: arx
														
 
															-    categories: science
														
 
															     timeout: 4.0
														
 
															   # tmp suspended:  dh key too small
														
@@ -411,23 +410,9 @@ engines:
 
															   #   api_key: 'unset'
														
 
															   - name: crossref
														
 
															-    engine: json_engine
														
 
															-    paging: true
														
 
															-    search_url: https://search.crossref.org/dois?q={query}&page={pageno}
														
 
															-    url_query: doi
														
 
															-    title_query: title
														
 
															-    title_html_to_text: true
														
 
															-    content_query: fullCitation
														
 
															-    content_html_to_text: true
														
 
															-    categories: science
														
 
															+    engine: crossref
														
 
															     shortcut: cr
														
 
															-    about:
														
 
															-      website: https://www.crossref.org/
														
 
															-      wikidata_id: Q5188229
														
 
															-      official_api_documentation: https://github.com/CrossRef/rest-api-doc
														
 
															-      use_official_api: false
														
 
															-      require_api_key: false
														
 
															-      results: JSON
														
 
															+    timeout: 10
														
 
															   - name: yep
														
 
															     engine: json_engine
														
@@ -1068,7 +1053,7 @@ engines:
 
															     title_query: metadata/oaf:entity/oaf:result/title/$
														
 
															     content_query: metadata/oaf:entity/oaf:result/description/$
														
 
															     content_html_to_text: true
														
 
															-    categories: science
														
 
															+    categories: "science"
														
 
															     shortcut: oad
														
 
															     timeout: 5.0
														
 
															     about:
														
@@ -1198,7 +1183,6 @@ engines:
 
															   - name: pubmed
														
 
															     engine: pubmed
														
 
															     shortcut: pub
														
 
															-    categories: science
														
 
															     timeout: 3.0
														
 
															   - name: pypi
														
@@ -1346,7 +1330,6 @@ engines:
 
															     engine: semantic_scholar
														
 
															     disabled: true
														
 
															     shortcut: se
														
 
															-    categories: science
														
 
															   # Spotify needs API credentials
														
 
															   # - name: spotify
														
@@ -1372,8 +1355,7 @@ engines:
 
															   #   # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
														
 
															   #   api_key: 'unset'
														
 
															   #   shortcut: springer
														
 
															-  #   categories: science
														
 
															-  #   timeout: 6.0
														
 
															+  #   timeout: 15.0
														
 
															   - name: startpage
														
 
															     engine: startpage