Browse Source

[enh] add settings option to enable/disable search formats

Access to formats can be denied by settings configuration::

    search:
        formats: [html, csv, json, rss]

Closes: https://github.com/searxng/searxng/issues/95
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 3 years ago
parent
commit
6ed4616da9

+ 1 - 0
searx/settings.yml

@@ -18,6 +18,7 @@ search:
     default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py'
     default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py'
     ban_time_on_fail : 5 # ban time in seconds after engine errors
     ban_time_on_fail : 5 # ban time in seconds after engine errors
     max_ban_time_on_fail : 120 # max ban time in seconds after engine errors
     max_ban_time_on_fail : 120 # max ban time in seconds after engine errors
+    formats: [html, csv, json, rss]  # remove format to deny access, use lower case.
 
 
 server:
 server:
     port : 8888
     port : 8888

+ 5 - 1
searx/templates/oscar/results.html

@@ -80,9 +80,10 @@
                             <input id="search_url" type="url" class="form-control select-all-on-click cursor-text" name="search_url" value="{{ search_url() }}" readonly>{{- "" -}}
                             <input id="search_url" type="url" class="form-control select-all-on-click cursor-text" name="search_url" value="{{ search_url() }}" readonly>{{- "" -}}
                         </div>{{- "" -}}
                         </div>{{- "" -}}
                     </form>
                     </form>
+                    {% if search_formats %}
                     <label>{{ _('Download results') }}</label>
                     <label>{{ _('Download results') }}</label>
                     <div class="clearfix"></div>
                     <div class="clearfix"></div>
-                    {% for output_type in ('csv', 'json', 'rss') %}
+                    {% for output_type in search_formats %}
                     <form method="{{ method or 'POST' }}" action="{{ url_for('search') }}" class="form-inline pull-{% if rtl %}right{% else %}left{% endif %} result_download">
                     <form method="{{ method or 'POST' }}" action="{{ url_for('search') }}" class="form-inline pull-{% if rtl %}right{% else %}left{% endif %} result_download">
                         {{- search_form_attrs(pageno) -}}
                         {{- search_form_attrs(pageno) -}}
                         <input type="hidden" name="format" value="{{ output_type }}">{{- "" -}}
                         <input type="hidden" name="format" value="{{ output_type }}">{{- "" -}}
@@ -90,8 +91,11 @@
                     </form>
                     </form>
                     {% endfor %}
                     {% endfor %}
                     <div class="clearfix"></div>
                     <div class="clearfix"></div>
+                    {% if 'rss' in search_formats %}
                     <br /><label><a href="{{ search_url() }}&amp;format=rss">{{ _('RSS subscription') }}</a></label>
                     <br /><label><a href="{{ search_url() }}&amp;format=rss">{{ _('RSS subscription') }}</a></label>
+                    {% endif %}
                     <div class="clearfix"></div>
                     <div class="clearfix"></div>
+                    {% endif %}
                 </div>
                 </div>
             </div>
             </div>
         </div><!-- /#sidebar_results -->
         </div><!-- /#sidebar_results -->

+ 3 - 1
searx/templates/simple/results.html

@@ -85,8 +85,9 @@
             <div class="selectable_url"><pre>{{ url_for('search', _external=True) }}?q={{ q|urlencode }}&amp;language={{ current_language }}&amp;time_range={{ time_range }}&amp;safesearch={{ safesearch }}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if timeout_limit %}&amp;timeout_limit={{ timeout_limit|urlencode }}{% endif %}</pre></div>
             <div class="selectable_url"><pre>{{ url_for('search', _external=True) }}?q={{ q|urlencode }}&amp;language={{ current_language }}&amp;time_range={{ time_range }}&amp;safesearch={{ safesearch }}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if timeout_limit %}&amp;timeout_limit={{ timeout_limit|urlencode }}{% endif %}</pre></div>
         </div>
         </div>
         <div id="apis">
         <div id="apis">
+          {% if search_formats %}
           <h4 class="title">{{ _('Download results') }}</h4>
           <h4 class="title">{{ _('Download results') }}</h4>
-          {% for output_type in ('csv', 'json', 'rss') %}
+          {% for output_type in search_formats %}
 	  <div class="left">
 	  <div class="left">
             <form method="{{ method or 'POST' }}" action="{{ url_for('search') }}">
             <form method="{{ method or 'POST' }}" action="{{ url_for('search') }}">
               <input type="hidden" name="q" value="{{ q|e }}">
               <input type="hidden" name="q" value="{{ q|e }}">
@@ -103,6 +104,7 @@
             </form>
             </form>
 	  </div>
 	  </div>
           {% endfor %}
           {% endfor %}
+          {% endif %}
         </div>
         </div>
     </div>
     </div>
 
 

+ 57 - 0
searx/utils.py

@@ -8,6 +8,7 @@ from os.path import splitext, join
 from random import choice
 from random import choice
 from html.parser import HTMLParser
 from html.parser import HTMLParser
 from urllib.parse import urljoin, urlparse
 from urllib.parse import urljoin, urlparse
+from collections.abc import Mapping
 
 
 from lxml import html
 from lxml import html
 from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
 from lxml.etree import ElementBase, XPath, XPathError, XPathSyntaxError, _ElementStringResult, _ElementUnicodeResult
@@ -500,6 +501,62 @@ def get_engine_from_settings(name):
     return {}
     return {}
 
 
 
 
+NOT_EXISTS = object()
+"""Singleton used by :py:obj:`get_value` if a key does not exists."""
+
+
+def get_value(dictionary, keyword, *keys, default=NOT_EXISTS):
+    """Return the value from a *deep* mapping type (e.g. the ``settings`` object
+    from yaml).  If the path to the *key* does not exists a :py:obj:`NOT_EXISTS`
+    is returned (non ``KeyError`` exception is raised).
+
+    .. code: python
+
+       >>> from searx.utils import get_value, NOT_EXISTS
+       >>> get_value(settings, 'checker', 'additional_tests', 'rosebud', 'result_container')
+       ['not_empty', ['one_title_contains', 'citizen kane']]
+
+       >>> get_value(settings, 'search', 'xxx') is NOT_EXISTS
+       True
+       >>> get_value(settings, 'search', 'formats')
+       ['csv', 'json', 'rss']
+
+    The list returned from the ``search.format`` key is not a mapping type, you
+    can't traverse along non-mapping types.  If you try it, you will get a
+    :py:ref:`NOT_EXISTS`:
+
+    .. code: python
+
+       >>> get_value(settings, 'search', 'format', 'csv') is NOT_EXISTS
+       True
+       >>> get_value(settings, 'search', 'formats')[0]
+       'csv'
+
+    For convenience you can replace :py:ref:`NOT_EXISTS` by a default value of
+    your choice:
+
+    .. code: python
+
+       if 'csv' in get_value(settings, 'search', 'formats', default=[]):
+           print("csv format is denied")
+
+    """
+    if not isinstance(dictionary, Mapping):
+        raise TypeError("expected mapping type, got %s" % type(dictionary))
+
+    ret_val = dictionary.get(keyword, default)
+
+    if ret_val is default:
+        return ret_val
+
+    if len(keys):
+        if not isinstance(ret_val, Mapping):
+            ret_val = default
+        else:
+            ret_val = get_value(ret_val, *keys, default=default)
+    return ret_val
+
+
 def get_xpath(xpath_spec):
 def get_xpath(xpath_spec):
     """Return cached compiled XPath
     """Return cached compiled XPath
 
 

+ 14 - 1
searx/webapp.py

@@ -31,6 +31,8 @@ from pygments.formatters import HtmlFormatter  # pylint: disable=no-name-in-modu
 from werkzeug.middleware.proxy_fix import ProxyFix
 from werkzeug.middleware.proxy_fix import ProxyFix
 from werkzeug.serving import WSGIRequestHandler
 from werkzeug.serving import WSGIRequestHandler
 
 
+import flask
+
 from flask import (
 from flask import (
     Flask,
     Flask,
     request,
     request,
@@ -86,6 +88,7 @@ from searx.utils import (
     gen_useragent,
     gen_useragent,
     dict_subset,
     dict_subset,
     match_language,
     match_language,
+    get_value,
 )
 )
 from searx.version import VERSION_STRING
 from searx.version import VERSION_STRING
 from searx.query import RawTextQuery
 from searx.query import RawTextQuery
@@ -161,6 +164,8 @@ for indice, theme in enumerate(themes):
     for (dirpath, dirnames, filenames) in os.walk(theme_img_path):
     for (dirpath, dirnames, filenames) in os.walk(theme_img_path):
         global_favicons[indice].extend(filenames)
         global_favicons[indice].extend(filenames)
 
 
+OUTPUT_FORMATS = ['html', 'csv', 'json', 'rss']
+
 STATS_SORT_PARAMETERS = {
 STATS_SORT_PARAMETERS = {
     'name': (False, 'name', ''),
     'name': (False, 'name', ''),
     'score': (True, 'score', 0),
     'score': (True, 'score', 0),
@@ -511,6 +516,11 @@ def render(template_name, override_theme=None, **kwargs):
 
 
     kwargs['preferences'] = request.preferences
     kwargs['preferences'] = request.preferences
 
 
+    kwargs['search_formats'] = [
+        x for x in get_value(
+            settings, 'search', 'formats', default=OUTPUT_FORMATS)
+        if x != 'html']
+
     kwargs['brand'] = brand
     kwargs['brand'] = brand
 
 
     kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':'))
     kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':'))
@@ -683,9 +693,12 @@ def search():
 
 
     # output_format
     # output_format
     output_format = request.form.get('format', 'html')
     output_format = request.form.get('format', 'html')
-    if output_format not in ['html', 'csv', 'json', 'rss']:
+    if output_format not in OUTPUT_FORMATS:
         output_format = 'html'
         output_format = 'html'
 
 
+    if output_format not in get_value(settings, 'search', 'formats', default=OUTPUT_FORMATS):
+        flask.abort(403)
+
     # check if there is query (not None and not an empty string)
     # check if there is query (not None and not an empty string)
     if not request.form.get('q'):
     if not request.form.get('q'):
         if output_format == 'html':
         if output_format == 'html':

+ 1 - 0
utils/templates/etc/searx/use_default_settings.yml

@@ -8,6 +8,7 @@ search:
     safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict
     safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict
     autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "swisscows", "qwant", "wikipedia" - leave blank to turn it off by default
     autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "swisscows", "qwant", "wikipedia" - leave blank to turn it off by default
     default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py'
     default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py'
+    formats: [html, csv, json, rss]
 
 
 server:
 server:
     port : 8888
     port : 8888