Browse Source

[enh] add settings option to enable/disable search formats

Access to formats can be denied by settings configuration::

    search:
        formats: [html, csv, json, rss]

Closes: https://github.com/searxng/searxng/issues/95
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
6ed4616da9

+ 1 - 0
searx/settings.yml

@@ -18,6 +18,7 @@ search:
     default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py'
     ban_time_on_fail : 5 # ban time in seconds after engine errors
     max_ban_time_on_fail : 120 # max ban time in seconds after engine errors
+    formats: [html, csv, json, rss]  # remove format to deny access, use lower case.
 
 server:
     port : 8888

+ 5 - 1
searx/templates/oscar/results.html

@@ -80,9 +80,10 @@
                             <input id="search_url" type="url" class="form-control select-all-on-click cursor-text" name="search_url" value="{{ search_url() }}" readonly>{{- "" -}}
                         </div>{{- "" -}}
                     </form>
+                    {% if search_formats %}
                     <label>{{ _('Download results') }}</label>
                     <div class="clearfix"></div>
-                    {% for output_type in ('csv', 'json', 'rss') %}
+                    {% for output_type in search_formats %}
                     <form method="{{ method or 'POST' }}" action="{{ url_for('search') }}" class="form-inline pull-{% if rtl %}right{% else %}left{% endif %} result_download">
                         {{- search_form_attrs(pageno) -}}
                         <input type="hidden" name="format" value="{{ output_type }}">{{- "" -}}
@@ -90,8 +91,11 @@
                     </form>
                     {% endfor %}
                     <div class="clearfix"></div>
+                    {% if 'rss' in search_formats %}
                     <br /><label><a href="{{ search_url() }}&amp;format=rss">{{ _('RSS subscription') }}</a></label>
+                    {% endif %}
                     <div class="clearfix"></div>
+                    {% endif %}
                 </div>
             </div>
         </div><!-- /#sidebar_results -->

+ 3 - 1
searx/templates/simple/results.html

@@ -85,8 +85,9 @@
             <div class="selectable_url"><pre>{{ url_for('search', _external=True) }}?q={{ q|urlencode }}&amp;language={{ current_language }}&amp;time_range={{ time_range }}&amp;safesearch={{ safesearch }}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if timeout_limit %}&amp;timeout_limit={{ timeout_limit|urlencode }}{% endif %}</pre></div>
         </div>
         <div id="apis">
+          {% if search_formats %}
           <h4 class="title">{{ _('Download results') }}</h4>
-          {% for output_type in ('csv', 'json', 'rss') %}
+          {% for output_type in search_formats %}
 	  <div class="left">
             <form method="{{ method or 'POST' }}" action="{{ url_for('search') }}">
               <input type="hidden" name="q" value="{{ q|e }}">
@@ -103,6 +104,7 @@
             </form>
 	  </div>
           {% endfor %}
+          {% endif %}
         </div>
     </div>
 

+ 57 - 0
searx/utils.py

@@ -8,6 +8,7 @@ from os.path import splitext, join
 from random import choice
 from html.parser import HTMLParser
 from urllib.parse import urljoin, urlparse
+from collections.abc import Mapping
 
 from lxml import html
 from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
@@ -500,6 +501,62 @@ def get_engine_from_settings(name):
     return {}
 
 
+NOT_EXISTS = object()
+"""Singleton used by :py:obj:`get_value` if a key does not exists."""
+
+
+def get_value(dictionary, keyword, *keys, default=NOT_EXISTS):
+    """Return the value from a *deep* mapping type (e.g. the ``settings`` object
+    from yaml).  If the path to the *key* does not exists a :py:obj:`NOT_EXISTS`
+    is returned (non ``KeyError`` exception is raised).
+
+    .. code: python
+
+       >>> from searx.utils import get_value, NOT_EXISTS
+       >>> get_value(settings, 'checker', 'additional_tests', 'rosebud', 'result_container')
+       ['not_empty', ['one_title_contains', 'citizen kane']]
+
+       >>> get_value(settings, 'search', 'xxx') is NOT_EXISTS
+       True
+       >>> get_value(settings, 'search', 'formats')
+       ['csv', 'json', 'rss']
+
+    The list returned from the ``search.format`` key is not a mapping type, you
+    can't traverse along non-mapping types.  If you try it, you will get a
+    :py:ref:`NOT_EXISTS`:
+
+    .. code: python
+
+       >>> get_value(settings, 'search', 'format', 'csv') is NOT_EXISTS
+       True
+       >>> get_value(settings, 'search', 'formats')[0]
+       'csv'
+
+    For convenience you can replace :py:ref:`NOT_EXISTS` by a default value of
+    your choice:
+
+    .. code: python
+
+       if 'csv' in get_value(settings, 'search', 'formats', default=[]):
+           print("csv format is denied")
+
+    """
+    if not isinstance(dictionary, Mapping):
+        raise TypeError("expected mapping type, got %s" % type(dictionary))
+
+    ret_val = dictionary.get(keyword, default)
+
+    if ret_val is default:
+        return ret_val
+
+    if len(keys):
+        if not isinstance(ret_val, Mapping):
+            ret_val = default
+        else:
+            ret_val = get_value(ret_val, *keys, default=default)
+    return ret_val
+
+
 def get_xpath(xpath_spec):
     """Return cached compiled XPath
 

+ 14 - 1
searx/webapp.py

@@ -31,6 +31,8 @@ from pygments.formatters import HtmlFormatter  # pylint: disable=no-name-in-modu
 from werkzeug.middleware.proxy_fix import ProxyFix
 from werkzeug.serving import WSGIRequestHandler
 
+import flask
+
 from flask import (
     Flask,
     request,
@@ -86,6 +88,7 @@ from searx.utils import (
     gen_useragent,
     dict_subset,
     match_language,
+    get_value,
 )
 from searx.version import VERSION_STRING
 from searx.query import RawTextQuery
@@ -161,6 +164,8 @@ for indice, theme in enumerate(themes):
     for (dirpath, dirnames, filenames) in os.walk(theme_img_path):
         global_favicons[indice].extend(filenames)
 
+OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
+
 STATS_SORT_PARAMETERS = {
     'name': (False, 'name', ''),
     'score': (True, 'score', 0),
@@ -511,6 +516,11 @@ def render(template_name, override_theme=None, **kwargs):
 
     kwargs['preferences'] = request.preferences
 
+    kwargs['search_formats'] = [
+        x for x in get_value(
+            settings, 'search', 'formats', default=OUTPUT_FORMATS)
+        if x != 'html']
+
     kwargs['brand'] = brand
 
     kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':'))
@@ -683,9 +693,12 @@ def search():
 
     # output_format
     output_format = request.form.get('format', 'html')
-    if output_format not in ['html', 'csv', 'json', 'rss']:
+    if output_format not in OUTPUT_FORMATS:
         output_format = 'html'
 
+    if output_format not in get_value(settings, 'search', 'formats', default=OUTPUT_FORMATS):
+        flask.abort(403)
+
     # check if there is query (not None and not an empty string)
     if not request.form.get('q'):
         if output_format == 'html':

+ 1 - 0
utils/templates/etc/searx/use_default_settings.yml

@@ -8,6 +8,7 @@ search:
     safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict
     autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "swisscows", "qwant", "wikipedia" - leave blank to turn it off by default
     default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py'
+    formats: [html, csv, json, rss]
 
 server:
     port : 8888