Browse Source

Merge pull request #944 from return42/fix-939

[fix] engine: Semantic Scholar (Science) // rework & fix
Markus Heiser 3 years ago
parent
commit
0d86e7e4ea
2 changed files with 44 additions and 16 deletions
  1. 44 9
      searx/engines/semantic_scholar.py
  2. 0 7
      searx/settings.yml

+ 44 - 9
searx/engines/semantic_scholar.py

@@ -1,12 +1,23 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
-"""
- Semantic Scholar (Science)
+# lint: pylint
+"""Semantic Scholar (Science)
 """
 """
 
 
 from json import dumps, loads
 from json import dumps, loads
+from datetime import datetime
 
 
+about = {
+    "website": 'https://www.semanticscholar.org/',
+    "wikidata_id": 'Q22908627',
+    "official_api_documentation": 'https://api.semanticscholar.org/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
 
+paging = True
 search_url = 'https://www.semanticscholar.org/api/1/search'
 search_url = 'https://www.semanticscholar.org/api/1/search'
+paper_url = 'https://www.semanticscholar.org/paper'
 
 
 
 
 def request(query, params):
 def request(query, params):
@@ -34,13 +45,37 @@ def request(query, params):
 def response(resp):
 def response(resp):
     res = loads(resp.text)
     res = loads(resp.text)
     results = []
     results = []
+
     for result in res['results']:
     for result in res['results']:
-        results.append(
-            {
-                'url': result['primaryPaperLink']['url'],
-                'title': result['title']['text'],
-                'content': result['paperAbstractTruncated'],
-            }
-        )
+        item = {}
+        metadata = []
+
+        url = result.get('primaryPaperLink', {}).get('url')
+        if not url and result.get('links'):
+            url = result.get('links')[0]
+        if not url:
+            alternatePaperLinks = result.get('alternatePaperLinks')
+            if alternatePaperLinks:
+                url = alternatePaperLinks[0].get('url')
+        if not url:
+            url = paper_url + '/%s' % result['id']
+
+        item['url'] = url
+
+        item['title'] = result['title']['text']
+        item['content'] = result['paperAbstract']['text']
+
+        metadata = result.get('fieldsOfStudy') or []
+        venue = result.get('venue', {}).get('text')
+        if venue:
+            metadata.append(venue)
+        if metadata:
+            item['metadata'] = ', '.join(metadata)
+
+        pubDate = result.get('pubDate')
+        if pubDate:
+            item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
+
+        results.append(item)
 
 
     return results
     return results

+ 0 - 7
searx/settings.yml

@@ -1205,13 +1205,6 @@ engines:
     disabled: true
     disabled: true
     shortcut: se
     shortcut: se
     categories: science
     categories: science
-    about:
-      website: https://www.semanticscholar.org/
-      wikidata_id: Q22908627
-      official_api_documentation: https://api.semanticscholar.org/
-      use_official_api: false
-      require_api_key: false
-      results: JSON
 
 
   # Spotify needs API credentials
   # Spotify needs API credentials
   # - name: spotify
   # - name: spotify