Browse Source

Merge pull request #800 from dalf/searchpy5

search.py : Fix and some minor modifications
Adam Tauber 8 years ago
parent
commit
3d8c9bab96
2 changed files with 14 additions and 17 deletions
  1. 1 0
      searx/results.py
  2. 13 17
      searx/search.py

+ 1 - 0
searx/results.py

@@ -133,6 +133,7 @@ class ResultContainer(object):
 
 
     def extend(self, engine_name, results):
     def extend(self, engine_name, results):
         for result in list(results):
         for result in list(results):
+            result['engine'] = engine_name
             if 'suggestion' in result:
             if 'suggestion' in result:
                 self.suggestions.add(result['suggestion'])
                 self.suggestions.add(result['suggestion'])
                 results.remove(result)
                 results.remove(result)

+ 13 - 17
searx/search.py

@@ -37,7 +37,7 @@ logger = logger.getChild('search')
 number_of_searches = 0
 number_of_searches = 0
 
 
 
 
-def send_http_request(engine, request_params, timeout_limit):
+def send_http_request(engine, request_params, start_time, timeout_limit):
     # for page_load_time stats
     # for page_load_time stats
     time_before_request = time()
     time_before_request = time()
 
 
@@ -62,9 +62,10 @@ def send_http_request(engine, request_params, timeout_limit):
 
 
     # is there a timeout (no parsing in this case)
     # is there a timeout (no parsing in this case)
     timeout_overhead = 0.2  # seconds
     timeout_overhead = 0.2  # seconds
-    search_duration = time() - request_params['started']
+    time_after_request = time()
+    search_duration = time_after_request - start_time
     if search_duration > timeout_limit + timeout_overhead:
     if search_duration > timeout_limit + timeout_overhead:
-        raise Timeout(response=response)
+        raise requests.exceptions.Timeout(response=response)
 
 
     with threading.RLock():
     with threading.RLock():
         # no error : reset the suspend variables
         # no error : reset the suspend variables
@@ -72,14 +73,14 @@ def send_http_request(engine, request_params, timeout_limit):
         engine.suspend_end_time = 0
         engine.suspend_end_time = 0
         # update stats with current page-load-time
         # update stats with current page-load-time
         # only the HTTP request
         # only the HTTP request
-        engine.stats['page_load_time'] += time() - time_before_request
+        engine.stats['page_load_time'] += time_after_request - time_before_request
         engine.stats['page_load_count'] += 1
         engine.stats['page_load_count'] += 1
 
 
     # everything is ok : return the response
     # everything is ok : return the response
     return response
     return response
 
 
 
 
-def search_one_request(engine, query, request_params, timeout_limit):
+def search_one_request(engine, query, request_params, start_time, timeout_limit):
     # update request parameters dependent on
     # update request parameters dependent on
     # search-engine (contained in engines folder)
     # search-engine (contained in engines folder)
     engine.request(query, request_params)
     engine.request(query, request_params)
@@ -92,24 +93,21 @@ def search_one_request(engine, query, request_params, timeout_limit):
         return []
         return []
 
 
     # send request
     # send request
-    response = send_http_request(engine, request_params, timeout_limit)
+    response = send_http_request(engine, request_params, start_time, timeout_limit)
 
 
     # parse the response
     # parse the response
     response.search_params = request_params
     response.search_params = request_params
     return engine.response(response)
     return engine.response(response)
 
 
 
 
-def search_one_request_safe(engine_name, query, request_params, result_container, timeout_limit):
-    start_time = time()
+def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
     engine = engines[engine_name]
     engine = engines[engine_name]
 
 
     try:
     try:
         # send requests and parse the results
         # send requests and parse the results
-        search_results = search_one_request(engine, query, request_params, timeout_limit)
+        search_results = search_one_request(engine, query, request_params, start_time, timeout_limit)
 
 
         # add results
         # add results
-        for result in search_results:
-            result['engine'] = engine_name
         result_container.extend(engine_name, search_results)
         result_container.extend(engine_name, search_results)
 
 
         # update engine time when there is no exception
         # update engine time when there is no exception
@@ -131,7 +129,7 @@ def search_one_request_safe(engine_name, query, request_params, result_container
                          "(search duration : {1} s, timeout: {2} s) : {3}"
                          "(search duration : {1} s, timeout: {2} s) : {3}"
                          .format(engine_name, search_duration, timeout_limit, e.__class__.__name__))
                          .format(engine_name, search_duration, timeout_limit, e.__class__.__name__))
             requests_exception = True
             requests_exception = True
-        if (issubclass(e.__class__, requests.exceptions.RequestException)):
+        elif (issubclass(e.__class__, requests.exceptions.RequestException)):
             # other requests exception
             # other requests exception
             logger.exception("engine {0} : requests exception"
             logger.exception("engine {0} : requests exception"
                              "(search duration : {1} s, timeout: {2} s) : {3}"
                              "(search duration : {1} s, timeout: {2} s) : {3}"
@@ -151,14 +149,13 @@ def search_one_request_safe(engine_name, query, request_params, result_container
         return False
         return False
 
 
 
 
-def search_multiple_requests(requests, result_container, timeout_limit):
-    start_time = time()
+def search_multiple_requests(requests, result_container, start_time, timeout_limit):
     search_id = uuid4().__str__()
     search_id = uuid4().__str__()
 
 
     for engine_name, query, request_params in requests:
     for engine_name, query, request_params in requests:
         th = threading.Thread(
         th = threading.Thread(
             target=search_one_request_safe,
             target=search_one_request_safe,
-            args=(engine_name, query, request_params, result_container, timeout_limit),
+            args=(engine_name, query, request_params, result_container, start_time, timeout_limit),
             name=search_id,
             name=search_id,
         )
         )
         th._engine_name = engine_name
         th._engine_name = engine_name
@@ -368,7 +365,6 @@ class Search(object):
             request_params = default_request_params()
             request_params = default_request_params()
             request_params['headers']['User-Agent'] = user_agent
             request_params['headers']['User-Agent'] = user_agent
             request_params['category'] = selected_engine['category']
             request_params['category'] = selected_engine['category']
-            request_params['started'] = start_time
             request_params['pageno'] = search_query.pageno
             request_params['pageno'] = search_query.pageno
 
 
             if hasattr(engine, 'language') and engine.language:
             if hasattr(engine, 'language') and engine.language:
@@ -388,7 +384,7 @@ class Search(object):
 
 
         if requests:
         if requests:
             # send all search-request
             # send all search-request
-            search_multiple_requests(requests, self.result_container, timeout_limit - (time() - start_time))
+            search_multiple_requests(requests, self.result_container, start_time, timeout_limit)
             start_new_thread(gc.collect, tuple())
             start_new_thread(gc.collect, tuple())
 
 
         # return results, suggestions, answers and infoboxes
         # return results, suggestions, answers and infoboxes