Browse Source

[enh] validate input and raise an exception inside search.py. The exception message is output in json and rss format.

Alexandre Flament 8 years ago
parent
commit
15eef0ebdb
4 changed files with 133 additions and 38 deletions
  1. 32 0
      searx/exceptions.py
  2. 43 23
      searx/search.py
  3. 6 0
      searx/templates/__common__/opensearch_response_rss.xml
  4. 52 15
      searx/webapp.py

+ 32 - 0
searx/exceptions.py

@@ -0,0 +1,32 @@
+'''
+searx is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+searx is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
+
+(C) 2017- by Alexandre Flament, <alex@al-f.net>
+'''
+
+
+class SearxException(Exception):
+    pass
+
+
+class SearxParameterException(SearxException):
+
+    def __init__(self, name, value):
+        if value == '' or value is None:
+            message = 'Empty ' + name + ' parameter'
+        else:
+            message = 'Invalid value "' + value + '" for parameter ' + name
+        super(SearxParameterException, self).__init__(message)
+        self.parameter_name = name
+        self.parameter_value = value

+ 43 - 23
searx/search.py

@@ -31,11 +31,16 @@ from searx.query import RawTextQuery, SearchQuery
 from searx.results import ResultContainer
 from searx import logger
 from searx.plugins import plugins
+from searx.languages import language_codes
+from searx.exceptions import SearxParameterException
 
 logger = logger.getChild('search')
 
 number_of_searches = 0
 
+language_code_set = set(l[0].lower() for l in language_codes)
+language_code_set.add('all')
+
 
 def send_http_request(engine, request_params, start_time, timeout_limit):
     # for page_load_time stats
@@ -182,33 +187,13 @@ def default_request_params():
 
 
 def get_search_query_from_webapp(preferences, form):
-    query = None
-    query_engines = []
-    query_categories = []
-    query_pageno = 1
-    query_lang = 'all'
-    query_time_range = None
+    # no text for the query ?
+    if not form.get('q'):
+        raise SearxParameterException('q', '')
 
     # set blocked engines
     disabled_engines = preferences.engines.get_disabled()
 
-    # set specific language if set
-    query_lang = preferences.get_value('language')
-
-    # safesearch
-    query_safesearch = preferences.get_value('safesearch')
-
-    # TODO better exceptions
-    if not form.get('q'):
-        raise Exception('noquery')
-
-    # set pagenumber
-    pageno_param = form.get('pageno', '1')
-    if not pageno_param.isdigit() or int(pageno_param) < 1:
-        pageno_param = 1
-
-    query_pageno = int(pageno_param)
-
     # parse query, if tags are set, which change
     # the serch engine or search-language
     raw_text_query = RawTextQuery(form['q'], disabled_engines)
@@ -217,6 +202,13 @@ def get_search_query_from_webapp(preferences, form):
     # set query
     query = raw_text_query.getSearchQuery()
 
+    # get and check page number
+    pageno_param = form.get('pageno', '1')
+    if not pageno_param.isdigit() or int(pageno_param) < 1:
+        raise SearxParameterException('pageno', pageno_param)
+    query_pageno = int(pageno_param)
+
+    # get language
     # set specific language if set on request, query or preferences
     # TODO support search with multible languages
     if len(raw_text_query.languages):
@@ -226,10 +218,38 @@ def get_search_query_from_webapp(preferences, form):
     else:
         query_lang = preferences.get_value('language')
 
+    # check language
+    if query_lang not in language_code_set:
+        raise SearxParameterException('language', query_lang)
+
+    # get safesearch
+    if 'safesearch' in form:
+        query_safesearch = form.get('safesearch')
+        # first check safesearch
+        if not query_safesearch.isdigit():
+            raise SearxParameterException('safesearch', query_safesearch)
+        query_safesearch = int(query_safesearch)
+    else:
+        query_safesearch = preferences.get_value('safesearch')
+
+    # safesearch : second check
+    if query_safesearch < 0 or query_safesearch > 2:
+        raise SearxParameterException('safesearch', query_safesearch)
+
+    # get time_range
     query_time_range = form.get('time_range')
 
+    # check time_range
+    if not(query_time_range is None)\
+       and not (query_time_range in ['', 'day', 'week', 'month', 'year']):
+        raise SearxParameterException('time_range', query_time_range)
+
+    # query_engines
     query_engines = raw_text_query.engines
 
+    # query_categories
+    query_categories = []
+
     # if engines are calculated from query,
     # set categories by using that informations
     if query_engines and raw_text_query.specific:

+ 6 - 0
searx/templates/__common__/opensearch_response_rss.xml

@@ -11,6 +11,12 @@
     <opensearch:itemsPerPage>{{ number_of_results }}</opensearch:itemsPerPage>
     <atom:link rel="search" type="application/opensearchdescription+xml" href="{{ base_url }}opensearch.xml"/>
     <opensearch:Query role="request" searchTerms="{{ q|e }}" startPage="1" />
+    {% if error_message %}
+    <item>
+      <title>Error</title>
+      <description>{{ error_message|e }}</description>
+    </item>
+    {% endif %}
     {% for r in results %}
     <item>
       <title>{{ r.title }}</title>

+ 52 - 15
searx/webapp.py

@@ -52,6 +52,7 @@ from flask import (
 from flask_babel import Babel, gettext, format_date, format_decimal
 from flask.json import jsonify
 from searx import settings, searx_dir, searx_debug
+from searx.exceptions import SearxException, SearxParameterException
 from searx.engines import (
     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
 )
@@ -400,6 +401,33 @@ def pre_request():
             request.user_plugins.append(plugin)
 
 
+def index_error(output_format, error_message):
+    if output_format == 'json':
+        return Response(json.dumps({'error': error_message}),
+                        mimetype='application/json')
+    elif output_format == 'csv':
+        response = Response('', mimetype='application/csv')
+        cont_disp = 'attachment;Filename=searx.csv'
+        response.headers.add('Content-Disposition', cont_disp)
+        return response
+    elif output_format == 'rss':
+        response_rss = render(
+            'opensearch_response_rss.xml',
+            results=[],
+            q=request.form['q'] if 'q' in request.form else '',
+            number_of_results=0,
+            base_url=get_base_url(),
+            error_message=error_message
+        )
+        return Response(response_rss, mimetype='text/xml')
+    else:
+        # html
+        request.errors.append(gettext('search error'))
+        return render(
+            'index.html',
+        )
+
+
 @app.route('/search', methods=['GET', 'POST'])
 @app.route('/', methods=['GET', 'POST'])
 def index():
@@ -408,10 +436,19 @@ def index():
     Supported outputs: html, json, csv, rss.
     """
 
+    # output_format
+    output_format = request.form.get('format', 'html')
+    if output_format not in ['html', 'csv', 'json', 'rss']:
+        output_format = 'html'
+
+    # check if there is query
     if request.form.get('q') is None:
-        return render(
-            'index.html',
-        )
+        if output_format == 'html':
+            return render(
+                'index.html',
+            )
+        else:
+            return index_error(output_format, 'No query'), 400
 
     # search
     search_query = None
@@ -421,20 +458,24 @@ def index():
         # search = Search(search_query) #  without plugins
         search = SearchWithPlugins(search_query, request)
         result_container = search.search()
-    except:
-        request.errors.append(gettext('search error'))
+    except Exception as e:
+        # log exception
         logger.exception('search error')
-        return render(
-            'index.html',
-        )
 
+        # is it an invalid input parameter or something else ?
+        if (issubclass(e.__class__, SearxParameterException)):
+            return index_error(output_format, e.message), 400
+        else:
+            return index_error(output_format, gettext('search error')), 500
+
+    # results
     results = result_container.get_ordered_results()
+    number_of_results = result_container.results_number()
+    if number_of_results < result_container.results_length():
+        number_of_results = 0
 
     # UI
     advanced_search = request.form.get('advanced_search', None)
-    output_format = request.form.get('format', 'html')
-    if output_format not in ['html', 'csv', 'json', 'rss']:
-        output_format = 'html'
 
     # output
     for result in results:
@@ -470,10 +511,6 @@ def index():
                 else:
                     result['publishedDate'] = format_date(result['publishedDate'])
 
-    number_of_results = result_container.results_number()
-    if number_of_results < result_container.results_length():
-        number_of_results = 0
-
     if output_format == 'json':
         return Response(json.dumps({'query': search_query.query,
                                     'number_of_results': number_of_results,