Browse Source

[enh] Add timeout limit per request (#1640)

The new url parameter "timeout_limit" set timeout limit defined in second.
Example "timeout_limit=1.5" means the timeout limit is 1.5 seconds.

In addition, the query can start with <[number] to set the timeout limit.

For number between 0 and 99, the unit is the second :
Example: "<30 searx" means the timeout limit is 3 seconds

For number above 100, the unit is the millisecond:
Example: "<850 searx" means the timeout is 850 milliseconds.

In addition, there is a new optional setting: outgoing.max_request_timeout.
If not set, the user timeout can't go above searx configuration (as before: the max timeout of selected engine for a query).

If the value is set, the user can set a timeout between 0 and max_request_timeout using
<[number] or timeout_limit query parameter.

Related to #1077
Updated version of PR #1413 from @isj-privacore
Alexandre Flament 5 years ago
parent
commit
72029d27de

+ 18 - 1
searx/query.py

@@ -43,6 +43,7 @@ class RawTextQuery(object):
         self.query_parts = []
         self.engines = []
         self.languages = []
+        self.timeout_limit = None
         self.specific = False
 
     # parse query, if tags are set, which
@@ -69,6 +70,21 @@ class RawTextQuery(object):
                 self.query_parts.append(query_part)
                 continue
 
+            # this force the timeout
+            if query_part[0] == '<':
+                try:
+                    raw_timeout_limit = int(query_part[1:])
+                    if raw_timeout_limit < 100:
+                        # below 100, the unit is the second ( <3 = 3 seconds timeout )
+                        self.timeout_limit = float(raw_timeout_limit)
+                    else:
+                        # 100 or above, the unit is the millisecond ( <850 = 850 milliseconds timeout )
+                        self.timeout_limit = raw_timeout_limit / 1000.0
+                    parse_next = True
+                except ValueError:
+                    # error not reported to the user
+                    pass
+
             # this force a language
             if query_part[0] == ':':
                 lang = query_part[1:].lower().replace('_', '-')
@@ -161,7 +177,7 @@ class RawTextQuery(object):
 class SearchQuery(object):
     """container for all the search parameters (query, language, etc...)"""
 
-    def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
+    def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, timeout_limit=None):
         self.query = query.encode('utf-8')
         self.engines = engines
         self.categories = categories
@@ -169,6 +185,7 @@ class SearchQuery(object):
         self.safesearch = safesearch
         self.pageno = pageno
         self.time_range = time_range
+        self.timeout_limit = timeout_limit
 
     def __str__(self):
         return str(self.query) + ";" + str(self.engines)

+ 48 - 7
searx/search.py

@@ -45,6 +45,16 @@ if sys.version_info[0] == 3:
 logger = logger.getChild('search')
 
 number_of_searches = 0
+max_request_timeout = settings.get('outgoing', {}).get('max_request_timeout' or None)
+if max_request_timeout is None:
+    logger.info('max_request_timeout={0}'.format(max_request_timeout))
+else:
+    if isinstance(max_request_timeout, float):
+        logger.info('max_request_timeout={0} second(s)'.format(max_request_timeout))
+    else:
+        logger.critical('outgoing.max_request_timeout if defined has to be float')
+        from sys import exit
+        exit(1)
 
 
 def send_http_request(engine, request_params):
@@ -265,6 +275,15 @@ def get_search_query_from_webapp(preferences, form):
     # query_engines
     query_engines = raw_text_query.engines
 
+    # timeout_limit
+    query_timeout = raw_text_query.timeout_limit
+    if query_timeout is None and 'timeout_limit' in form:
+        raw_time_limit = form.get('timeout_limit')
+        try:
+            query_timeout = float(raw_time_limit)
+        except ValueError:
+            raise SearxParameterException('timeout_limit', raw_time_limit)
+
     # query_categories
     query_categories = []
 
@@ -338,7 +357,8 @@ def get_search_query_from_webapp(preferences, form):
     query_engines = deduplicate_query_engines(query_engines)
 
     return (SearchQuery(query, query_engines, query_categories,
-                        query_lang, query_safesearch, query_pageno, query_time_range),
+                        query_lang, query_safesearch, query_pageno,
+                        query_time_range, query_timeout),
             raw_text_query)
 
 
@@ -351,6 +371,7 @@ class Search(object):
         super(Search, self).__init__()
         self.search_query = search_query
         self.result_container = ResultContainer()
+        self.actual_timeout = None
 
     # do search-request
     def search(self):
@@ -380,7 +401,7 @@ class Search(object):
         search_query = self.search_query
 
         # max of all selected engine timeout
-        timeout_limit = 0
+        default_timeout = 0
 
         # start search-reqest for all selected engines
         for selected_engine in search_query.engines:
@@ -420,12 +441,32 @@ class Search(object):
             # append request to list
             requests.append((selected_engine['name'], search_query.query, request_params))
 
-            # update timeout_limit
-            timeout_limit = max(timeout_limit, engine.timeout)
-
+            # update default_timeout
+            default_timeout = max(default_timeout, engine.timeout)
+
+        # adjust timeout
+        self.actual_timeout = default_timeout
+        query_timeout = self.search_query.timeout_limit
+
+        if max_request_timeout is None and query_timeout is None:
+            # No max, no user query: default_timeout
+            pass
+        elif max_request_timeout is None and query_timeout is not None:
+            # No max, but user query: From user query except if above default
+            self.actual_timeout = min(default_timeout, query_timeout)
+        elif max_request_timeout is not None and query_timeout is None:
+            # Max, no user query: Default except if above max
+            self.actual_timeout = min(default_timeout, max_request_timeout)
+        elif max_request_timeout is not None and query_timeout is not None:
+            # Max & user query: From user query except if above max
+            self.actual_timeout = min(query_timeout, max_request_timeout)
+
+        logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})"
+                     .format(self.actual_timeout, default_timeout, query_timeout, max_request_timeout))
+
+        # send all search-request
         if requests:
-            # send all search-request
-            search_multiple_requests(requests, self.result_container, start_time, timeout_limit)
+            search_multiple_requests(requests, self.result_container, start_time, self.actual_timeout)
             start_new_thread(gc.collect, tuple())
 
         # return results, suggestions, answers and infoboxes

+ 2 - 1
searx/settings.yml

@@ -34,7 +34,8 @@ ui:
 #    key : !!binary "your_morty_proxy_key"
 
 outgoing: # communication with search engines
-    request_timeout : 2.0 # seconds
+    request_timeout : 2.0 # default timeout in seconds, can be override by engine
+    # max_request_timeout: 10.0 # the maximum timeout in seconds
     useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator
     pool_connections : 100 # Number of different hosts
     pool_maxsize : 10 # Number of simultaneous requests by host

+ 1 - 0
searx/templates/oscar/results.html

@@ -5,6 +5,7 @@
     <input type="hidden" name="pageno" value="{{ pageno }}" />
     <input type="hidden" name="time_range" value="{{ time_range }}" />
     <input type="hidden" name="language" value="{{ current_language }}" />
+    <input type="hidden" name="timeout_limit" value="{{ timeout_limit }}" />
 {%- endmacro %}
 {%- macro search_url() %}{{ base_url }}?q={{ q|urlencode }}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if time_range %}&amp;time_range={{ time_range }}{% endif %}{% if current_language != 'all' %}&amp;language={{ current_language }}{% endif %}{% endmacro -%}
 

+ 5 - 0
searx/templates/simple/infobox.html

@@ -36,6 +36,11 @@
       {% for suggestion in topic.suggestions %}
       <form method="{{ method or 'POST' }}" action="{{ url_for('index') }}">
         <input type="hidden" name="q" value="{{ suggestion }}">
+        <input type="hidden" name="time_range" value="{{ time_range }}">
+        <input type="hidden" name="language" value="{{ current_language }}">
+        <input type="hidden" name="safesearch" value="{{ safesearch }}">
+        <input type="hidden" name="theme" value="{{ theme }}">
+        {% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
         <input type="submit" value="{{ suggestion }}" />
       </form>
       {% endfor %}

+ 9 - 3
searx/templates/simple/results.html

@@ -51,9 +51,11 @@
             {% for suggestion in suggestions %}
             <form method="{{ method or 'POST' }}" action="{{ url_for('index') }}">
               <input type="hidden" name="q" value="{{ suggestion.url }}">
+              <input type="hidden" name="time_range" value="{{ time_range }}">
               <input type="hidden" name="language" value="{{ current_language }}">
               <input type="hidden" name="safesearch" value="{{ safesearch }}">
               <input type="hidden" name="theme" value="{{ theme }}">
+              {% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
               <input type="submit" class="suggestion" value="&bull; {{ suggestion.title }}">
             </form>
             {% endfor %}
@@ -63,7 +65,7 @@
 
         <div id="search_url">
             <h4 class="title">{{ _('Search URL') }} :</h4>
-            <div class="selectable_url"><pre>{{ base_url }}?q={{ q|urlencode }}&amp;language={{ current_language }}&amp;time_range={{ time_range }}&amp;safesearch={{ safesearch }}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}</pre></div>
+            <div class="selectable_url"><pre>{{ base_url }}?q={{ q|urlencode }}&amp;language={{ current_language }}&amp;time_range={{ time_range }}&amp;safesearch={{ safesearch }}{% if pageno > 1 %}&amp;pageno={{ pageno }}{% endif %}{% if selected_categories %}&amp;categories={{ selected_categories|join(",") | replace(' ','+') }}{% endif %}{% if timeout_limit %}&amp;timeout_limit={{ timeout_limit|urlencode }}{% endif %}</pre></div>
         </div>
         <div id="apis">
           <h4 class="title">{{ _('Download results') }}</h4>
@@ -79,6 +81,7 @@
               <input type="hidden" name="language" value="{{ current_language }}">
               <input type="hidden" name="safesearch" value="{{ safesearch }}">
               <input type="hidden" name="format" value="{{ output_type }}">
+              {% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
               <input type="submit" value="{{ output_type }}">
             </form>
 	  </div>
@@ -97,6 +100,7 @@
           <input type="hidden" name="language" value="{{ current_language }}">
           <input type="hidden" name="safesearch" value="{{ safesearch }}">
           <input type="hidden" name="theme" value="{{ theme }}">
+          {% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit }}" >{% endif %}
           <input type="submit" value="{{ correction }}">
 	</form>
       </div>
@@ -134,7 +138,8 @@
                   <input type="hidden" name="language" value="{{ current_language }}" >
                   <input type="hidden" name="safesearch" value="{{ safesearch }}" >
                   <input type="hidden" name="theme" value="{{ theme }}" >
-		  <button type="submit">{{ icon_small('chevron-left') }} {{ _('previous page') }}</button>
+                  {% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
+                  <button type="submit">{{ icon_small('chevron-left') }} {{ _('previous page') }}</button>
                 </div>
             </form>
         {% endif %}
@@ -149,7 +154,8 @@
               <input type="hidden" name="language" value="{{ current_language }}" >
               <input type="hidden" name="safesearch" value="{{ safesearch }}" >
               <input type="hidden" name="theme" value="{{ theme }}" >
-	       <button type="submit">{{ _('next page') }} {{ icon_small('chevron-right') }}</button>
+              {% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
+              <button type="submit">{{ _('next page') }} {{ icon_small('chevron-right') }}</button>
             </div>
         </form>
     </nav>

+ 1 - 0
searx/templates/simple/search.html

@@ -14,4 +14,5 @@
   {% include 'simple/categories.html' %}
   <input type="hidden" name="safesearch" value="{{ safesearch }}" >
   <input type="hidden" name="theme" value="{{ theme }}" >
+  {% if timeout_limit %}<input type="hidden" name="timeout_limit" value="{{ timeout_limit|e }}" >{% endif %}
 </form>

+ 2 - 1
searx/webapp.py

@@ -628,7 +628,8 @@ def index():
                                         fallback=settings['search']['language']),
         base_url=get_base_url(),
         theme=get_current_theme_name(),
-        favicons=global_favicons[themes.index(get_current_theme_name())]
+        favicons=global_favicons[themes.index(get_current_theme_name())],
+        timeout_limit=request.form.get('timeout_limit', None)
     )
 
 

+ 42 - 0
tests/unit/test_query.py

@@ -62,3 +62,45 @@ class TestQuery(SearxTestCase):
         self.assertEquals(len(query.query_parts), 1)
         self.assertEquals(len(query.languages), 0)
         self.assertFalse(query.specific)
+
+    def test_timeout_below100(self):
+        query_text = '<3 the query'
+        query = RawTextQuery(query_text, [])
+        query.parse_query()
+
+        self.assertEquals(query.getFullQuery(), query_text)
+        self.assertEquals(len(query.query_parts), 3)
+        self.assertEquals(query.timeout_limit, 3)
+        self.assertFalse(query.specific)
+
+    def test_timeout_above100(self):
+        query_text = '<350 the query'
+        query = RawTextQuery(query_text, [])
+        query.parse_query()
+
+        self.assertEquals(query.getFullQuery(), query_text)
+        self.assertEquals(len(query.query_parts), 3)
+        self.assertEquals(query.timeout_limit, 0.35)
+        self.assertFalse(query.specific)
+
+    def test_timeout_above1000(self):
+        query_text = '<3500 the query'
+        query = RawTextQuery(query_text, [])
+        query.parse_query()
+
+        self.assertEquals(query.getFullQuery(), query_text)
+        self.assertEquals(len(query.query_parts), 3)
+        self.assertEquals(query.timeout_limit, 3.5)
+        self.assertFalse(query.specific)
+
+    def test_timeout_invalid(self):
+        # invalid number: it is not bang but it is part of the query
+        query_text = '<xxx the query'
+        query = RawTextQuery(query_text, [])
+        query.parse_query()
+
+        self.assertEquals(query.getFullQuery(), query_text)
+        self.assertEquals(len(query.query_parts), 1)
+        self.assertEquals(query.query_parts[0], query_text)
+        self.assertEquals(query.timeout_limit, None)
+        self.assertFalse(query.specific)

+ 53 - 3
tests/unit/test_search.py

@@ -2,9 +2,59 @@
 
 from searx.testing import SearxTestCase
 
+import searx.preferences
+import searx.search
+import searx.engines
+
 
-#  TODO
 class SearchTestCase(SearxTestCase):
 
-    def test_(self):
-        pass
+    @classmethod
+    def setUpClass(cls):
+        searx.engines.initialize_engines([{
+            'name': 'general dummy',
+            'engine': 'dummy',
+            'categories': 'general',
+            'shortcut': 'gd',
+            'timeout': 3.0
+        }])
+
+    def test_timeout_simple(self):
+        searx.search.max_request_timeout = None
+        search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
+                                               ['general'], 'en-US', 0, 1, None, None)
+        search = searx.search.Search(search_query)
+        search.search()
+        self.assertEquals(search.actual_timeout, 3.0)
+
+    def test_timeout_query_above_default_nomax(self):
+        searx.search.max_request_timeout = None
+        search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
+                                               ['general'], 'en-US', 0, 1, None, 5.0)
+        search = searx.search.Search(search_query)
+        search.search()
+        self.assertEquals(search.actual_timeout, 3.0)
+
+    def test_timeout_query_below_default_nomax(self):
+        searx.search.max_request_timeout = None
+        search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
+                                               ['general'], 'en-US', 0, 1, None, 1.0)
+        search = searx.search.Search(search_query)
+        search.search()
+        self.assertEquals(search.actual_timeout, 1.0)
+
+    def test_timeout_query_below_max(self):
+        searx.search.max_request_timeout = 10.0
+        search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
+                                               ['general'], 'en-US', 0, 1, None, 5.0)
+        search = searx.search.Search(search_query)
+        search.search()
+        self.assertEquals(search.actual_timeout, 5.0)
+
+    def test_timeout_query_above_max(self):
+        searx.search.max_request_timeout = 10.0
+        search_query = searx.query.SearchQuery('test', [{'category': 'general', 'name': 'general dummy'}],
+                                               ['general'], 'en-US', 0, 1, None, 15.0)
+        search = searx.search.Search(search_query)
+        search.search()
+        self.assertEquals(search.actual_timeout, 10.0)