Browse Source

[enh] engine types

asciimoo 11 years ago
parent
commit
d793c2733c

+ 1 - 1
examples/basic_engine.py

@@ -17,5 +17,5 @@ def response(resp):
     '''post-response callback
     '''post-response callback
     resp: requests response object
     resp: requests response object
     '''
     '''
-    return [resp.text]
+    return [{'url': '', 'title': '', 'content': ''}]
 
 

+ 0 - 7
searx/__init__.py

@@ -1,7 +0,0 @@
-
-base_result_template = """
-<div class="result">
-    <h3 class="result_title"><a href="{url}">{title}</a></h3>
-    <p class="content">{content}<br />{url}</p>
-</div>
-"""

+ 8 - 6
searx/engines/__init__.py

@@ -6,7 +6,7 @@ import grequests
 
 
 engine_dir = dirname(realpath(__file__))
 engine_dir = dirname(realpath(__file__))
 
 
-engines = []
+engines = {}
 
 
 for filename in listdir(engine_dir):
 for filename in listdir(engine_dir):
     modname = splitext(filename)[0]
     modname = splitext(filename)[0]
@@ -16,14 +16,16 @@ for filename in listdir(engine_dir):
     engine = load_source(modname, filepath)
     engine = load_source(modname, filepath)
     if not hasattr(engine, 'request') or not hasattr(engine, 'response'):
     if not hasattr(engine, 'request') or not hasattr(engine, 'response'):
         continue
         continue
-    engines.append(engine)
+    engines[modname] = engine
 
 
 def default_request_params():
 def default_request_params():
     return {'method': 'GET', 'headers': {}, 'data': {}, 'url': ''}
     return {'method': 'GET', 'headers': {}, 'data': {}, 'url': ''}
 
 
-def make_callback(results, callback):
+def make_callback(engine_name, results, callback):
     def process_callback(response, **kwargs):
     def process_callback(response, **kwargs):
-        results.extend(callback(response))
+        for result in callback(response):
+            result['engine'] = engine_name
+            results.append(result)
     return process_callback
     return process_callback
 
 
 def search(query, request):
 def search(query, request):
@@ -31,11 +33,11 @@ def search(query, request):
     requests = []
     requests = []
     results = []
     results = []
     user_agent = request.headers.get('User-Agent', '')
     user_agent = request.headers.get('User-Agent', '')
-    for engine in engines:
+    for ename, engine in engines.items():
         headers = default_request_params()
         headers = default_request_params()
         headers['User-Agent'] = user_agent
         headers['User-Agent'] = user_agent
         request_params = engine.request(query, headers)
         request_params = engine.request(query, headers)
-        callback = make_callback(results, engine.response)
+        callback = make_callback(ename, results, engine.response)
         if request_params['method'] == 'GET':
         if request_params['method'] == 'GET':
             req = grequests.get(request_params['url']
             req = grequests.get(request_params['url']
                                 ,headers=headers
                                 ,headers=headers

+ 12 - 7
searx/engines/duckduckgo.py

@@ -1,14 +1,19 @@
-from lxml import html
+from json import loads
 
 
 
 
 def request(query, params):
 def request(query, params):
-    params['method']    = 'POST'
-    params['url']       = 'https://duckduckgo.com/html'
-    params['data']['q'] = query
+    params['url'] = 'https://duckduckgo.com/d.js?q=%s&l=us-en&p=1&s=0' % query
     return params
     return params
 
 
 
 
 def response(resp):
 def response(resp):
-    dom = html.fromstring(resp.text)
-    results = dom.xpath('//div[@class="results_links results_links_deep web-result"]')
-    return [html.tostring(x) for x in results]
+    results = []
+    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1]
+    for r in search_res:
+        if not r.get('t'):
+            continue
+        results.append({'title': r['t']
+                       ,'content': r['a']
+                       ,'url': r['u']
+                       })
+    return results

+ 6 - 6
searx/engines/duckduckgo_definitions.py

@@ -1,5 +1,4 @@
 import json
 import json
-from searx import base_result_template
 
 
 def request(query, params):
 def request(query, params):
     params['url'] = 'http://api.duckduckgo.com/?q=%s&format=json&pretty=0' % query
     params['url'] = 'http://api.duckduckgo.com/?q=%s&format=json&pretty=0' % query
@@ -10,10 +9,11 @@ def response(resp):
     search_res = json.loads(resp.text)
     search_res = json.loads(resp.text)
     results = []
     results = []
     if 'Definition' in search_res:
     if 'Definition' in search_res:
-        res = {'title'   : search_res.get('Heading', '')
-              ,'content' : search_res.get('Definition', '')
-              ,'url'     : search_res.get('AbstractURL', '')
-              }
-        results.append(base_result_template.format(**res))
+        if search_res.get('AbstractURL'):
+            res = {'title'   : search_res.get('Heading', '')
+                  ,'content' : search_res.get('Definition', '')
+                  ,'url'     : search_res.get('AbstractURL', '')
+                  }
+            results.append(res)
 
 
     return results
     return results

+ 3 - 1
searx/static/css/style.css

@@ -8,7 +8,9 @@ html {
 
 
 h1 { font-size: 5em; }
 h1 { font-size: 5em; }
 
 
-input { border: 2px solid #8888FF; padding: 8px; background-color: #FFFFFF; font-size: 1.6em; }
+input { border: 2px solid #8888FF; padding: 8px; background-color: #FFFFFF; font-size: 1.3em; }
+
+a { text-decoration: none; }
 
 
 .result_title { margin-bottom: 0; }
 .result_title { margin-bottom: 0; }
 
 

+ 4 - 1
searx/templates/results.html

@@ -5,6 +5,9 @@
     <input type="submit" value="search" />
     <input type="submit" value="search" />
 </form>
 </form>
 {% for result in results %}
 {% for result in results %}
-    <p>{{ result|safe }}</p>
+    <div class="result">
+        <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
+        <p class="content"><span class="engine">{{ result.engine }}</span><br />{% if result.content %}{{ result.content|safe }}<br />{% endif %}<span class="url">{{ result.url }}</span></p>
+    </div>
 {% endfor %}
 {% endfor %}
 {% endblock %}
 {% endblock %}