Browse Source

wikipedia wikidata infobox + disable wikisource (#2806)

Co-authored-by: Markus Heiser <markus.heiser@darmarit.de>
Émilien (perso) 1 year ago
parent
commit
ad725ce7d7
3 changed files with 45 additions and 15 deletions
  1. 16 3
      searx/engines/wikidata.py
  2. 22 12
      searx/engines/wikipedia.py
  3. 7 0
      searx/settings.yml

+ 16 - 3
searx/engines/wikidata.py

@@ -41,6 +41,12 @@ about = {
     "results": 'JSON',
 }
 
+display_type = ["infobox"]
+"""A list of display types composed from ``infobox`` and ``list``.  The latter
+one will add a hit to the result list.  The first one will show a hit in the
+info box.  Both values can be set, or one of the two can be set."""
+
+
 # SPARQL
 SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
 SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'
@@ -268,8 +274,9 @@ def get_results(attribute_result, attributes, language):
                 for url in value.split(', '):
                     infobox_urls.append({'title': attribute.get_label(language), 'url': url, **attribute.kwargs})
                     # "normal" results (not infobox) include official website and Wikipedia links.
-                    if attribute.kwargs.get('official') or attribute_type == WDArticle:
+                    if "list" in display_type and (attribute.kwargs.get('official') or attribute_type == WDArticle):
                         results.append({'title': infobox_title, 'url': url, "content": infobox_content})
+
                     # update the infobox_id with the wikipedia URL
                     # first the local wikipedia URL, and as fallback the english wikipedia URL
                     if attribute_type == WDArticle and (
@@ -305,9 +312,15 @@ def get_results(attribute_result, attributes, language):
     # add the wikidata URL at the end
     infobox_urls.append({'title': 'Wikidata', 'url': attribute_result['item']})
 
-    if img_src is None and len(infobox_attributes) == 0 and len(infobox_urls) == 1 and len(infobox_content) == 0:
+    if (
+        "list" in display_type
+        and img_src is None
+        and len(infobox_attributes) == 0
+        and len(infobox_urls) == 1
+        and len(infobox_content) == 0
+    ):
         results.append({'url': infobox_urls[0]['url'], 'title': infobox_title, 'content': infobox_content})
-    else:
+    elif "infobox" in display_type:
         results.append(
             {
                 'infobox': infobox_title,

+ 22 - 12
searx/engines/wikipedia.py

@@ -77,6 +77,11 @@ about = {
     "results": 'JSON',
 }
 
+display_type = ["infobox"]
+"""A list of display types composed from ``infobox`` and ``list``.  The latter
+one will add a hit to the result list.  The first one will show a hit in the
+info box.  Both values can be set, or one of the two can be set."""
+
 send_accept_language_header = True
 """The HTTP ``Accept-Language`` header is needed for wikis where
 LanguageConverter_ is enabled."""
@@ -185,18 +190,23 @@ def response(resp):
     api_result = resp.json()
     title = utils.html_to_text(api_result.get('titles', {}).get('display') or api_result.get('title'))
     wikipedia_link = api_result['content_urls']['desktop']['page']
-    results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')})
-
-    if api_result.get('type') == 'standard':
-        results.append(
-            {
-                'infobox': title,
-                'id': wikipedia_link,
-                'content': api_result.get('extract', ''),
-                'img_src': api_result.get('thumbnail', {}).get('source'),
-                'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
-            }
-        )
+
+    if "list" in display_type or api_result.get('type') != 'standard':
+        # show item in the result list if 'list' is in the display options or it
+        # is a item that can't be displayed in a infobox.
+        results.append({'url': wikipedia_link, 'title': title, 'content': api_result.get('description', '')})
+
+    if "infobox" in display_type:
+        if api_result.get('type') == 'standard':
+            results.append(
+                {
+                    'infobox': title,
+                    'id': wikipedia_link,
+                    'content': api_result.get('extract', ''),
+                    'img_src': api_result.get('thumbnail', {}).get('source'),
+                    'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}],
+                }
+            )
 
     return results
 

+ 7 - 0
searx/settings.yml

@@ -371,7 +371,10 @@ engines:
   - name: wikipedia
     engine: wikipedia
     shortcut: wp
+    # add "list" to the array to get results in the results list
+    display_type: ["infobox"]
     base_url: 'https://{language}.wikipedia.org/'
+    categories: [general]
 
   - name: bilibili
     engine: bilibili
@@ -584,7 +587,10 @@ engines:
     shortcut: wd
     timeout: 3.0
     weight: 2
+    # add "list" to the array to get results in the results list
+    display_type: ["infobox"]
     tests: *tests_infobox
+    categories: [general]
 
   - name: duckduckgo
     engine: duckduckgo
@@ -1622,6 +1628,7 @@ engines:
     categories: [general, wikimedia]
     base_url: "https://{language}.wikisource.org/"
     search_type: text
+    disabled: true
     about:
       website: https://www.wikisource.org/
       wikidata_id: Q263