Browse Source

add time range search with yahoo

Noemi Vanyi 8 years ago
parent
commit
93c0c49e9a
4 changed files with 35 additions and 11 deletions
  1. 2 1
      searx/engines/__init__.py
  2. 25 8
      searx/engines/yahoo.py
  3. 7 2
      searx/search.py
  4. 1 0
      searx/webapp.py

+ 2 - 1
searx/engines/__init__.py

@@ -42,7 +42,8 @@ engine_default_args = {'paging': False,
                        'shortcut': '-',
                        'shortcut': '-',
                        'disabled': False,
                        'disabled': False,
                        'suspend_end_time': 0,
                        'suspend_end_time': 0,
-                       'continuous_errors': 0}
+                       'continuous_errors': 0,
+                       'time_range_support': False}
 
 
 
 
 def load_module(filename):
 def load_module(filename):

+ 25 - 8
searx/engines/yahoo.py

@@ -20,10 +20,12 @@ from searx.engines.xpath import extract_text, extract_url
 categories = ['general']
 categories = ['general']
 paging = True
 paging = True
 language_support = True
 language_support = True
+time_range_support = True
 
 
 # search-url
 # search-url
 base_url = 'https://search.yahoo.com/'
 base_url = 'https://search.yahoo.com/'
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
 search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}'
+search_url_with_time = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}&age={age}&btf={btf}&fr2=time'
 
 
 # specific xpath variables
 # specific xpath variables
 results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
 results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]"
@@ -32,6 +34,9 @@ title_xpath = './/h3/a'
 content_xpath = './/div[@class="compText aAbs"]'
 content_xpath = './/div[@class="compText aAbs"]'
 suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
 suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a"
 
 
+time_range_dict = {'day': ['1d', 'd'],
+                   'week': ['1w', 'w'],
+                   'month': ['1m', 'm']}
 
 
 # remove yahoo-specific tracking-url
 # remove yahoo-specific tracking-url
 def parse_url(url_string):
 def parse_url(url_string):
@@ -51,18 +56,30 @@ def parse_url(url_string):
         return unquote(url_string[start:end])
         return unquote(url_string[start:end])
 
 
 
 
+def _get_url(query, offset, language, time_range):
+    if time_range:
+        return base_url + search_url_with_time.format(offset=offset,
+                                                      query=urlencode({'p': query}),
+                                                      lang=language,
+                                                      age=time_range_dict[time_range][0],
+                                                      btf=time_range_dict[time_range][1])
+    return base_url + search_url.format(offset=offset,
+                                        query=urlencode({'p': query}),
+                                        lang=language)
+
+
+def _get_language(params):
+    if params['language'] == 'all':
+        return 'en'
+    return params['language'].split('_')[0]
+
+
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
     offset = (params['pageno'] - 1) * 10 + 1
     offset = (params['pageno'] - 1) * 10 + 1
+    language = _get_language(params)
 
 
-    if params['language'] == 'all':
-        language = 'en'
-    else:
-        language = params['language'].split('_')[0]
-
-    params['url'] = base_url + search_url.format(offset=offset,
-                                                 query=urlencode({'p': query}),
-                                                 lang=language)
+    params['url'] = _get_url(query, offset, language, params['time_range'])
 
 
     # TODO required?
     # TODO required?
     params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\
     params['cookies']['sB'] = 'fl=1&vl=lang_{lang}&sh=1&rw=new&v=1'\

+ 7 - 2
searx/search.py

@@ -138,6 +138,7 @@ class Search(object):
         self.paging = False
         self.paging = False
         self.pageno = 1
         self.pageno = 1
         self.lang = 'all'
         self.lang = 'all'
+        self.time_range = None
 
 
         # set blocked engines
         # set blocked engines
         self.disabled_engines = request.preferences.engines.get_disabled()
         self.disabled_engines = request.preferences.engines.get_disabled()
@@ -178,9 +179,9 @@ class Search(object):
         if len(query_obj.languages):
         if len(query_obj.languages):
             self.lang = query_obj.languages[-1]
             self.lang = query_obj.languages[-1]
 
 
-        self.engines = query_obj.engines
+        self.time_range = self.request_data.get('time_range')
 
 
-        self.categories = []
+        self.engines = query_obj.engines
 
 
         # if engines are calculated from query,
         # if engines are calculated from query,
         # set categories by using that informations
         # set categories by using that informations
@@ -279,6 +280,9 @@ class Search(object):
             if self.lang != 'all' and not engine.language_support:
             if self.lang != 'all' and not engine.language_support:
                 continue
                 continue
 
 
+            if self.time_range and not engine.time_range_support:
+                continue
+
             # set default request parameters
             # set default request parameters
             request_params = default_request_params()
             request_params = default_request_params()
             request_params['headers']['User-Agent'] = user_agent
             request_params['headers']['User-Agent'] = user_agent
@@ -293,6 +297,7 @@ class Search(object):
 
 
             # 0 = None, 1 = Moderate, 2 = Strict
             # 0 = None, 1 = Moderate, 2 = Strict
             request_params['safesearch'] = request.preferences.get_value('safesearch')
             request_params['safesearch'] = request.preferences.get_value('safesearch')
+            request_params['time_range'] = self.time_range
 
 
             # update request parameters dependent on
             # update request parameters dependent on
             # search-engine (contained in engines folder)
             # search-engine (contained in engines folder)

+ 1 - 0
searx/webapp.py

@@ -459,6 +459,7 @@ def index():
         paging=search.paging,
         paging=search.paging,
         number_of_results=format_decimal(number_of_results),
         number_of_results=format_decimal(number_of_results),
         pageno=search.pageno,
         pageno=search.pageno,
+        time_range=search.time_range,
         base_url=get_base_url(),
         base_url=get_base_url(),
         suggestions=search.result_container.suggestions,
         suggestions=search.result_container.suggestions,
         answers=search.result_container.answers,
         answers=search.result_container.answers,