Browse Source

Merge branch 'master' into languages

Adam Tauber 8 years ago
parent
commit
8bff42f049

+ 1 - 0
AUTHORS.rst

@@ -59,3 +59,4 @@ generally made searx better:
 - Thomas Renard @threnard
 - Thomas Renard @threnard
 - Pydo `<https://github.com/pydo>`_
 - Pydo `<https://github.com/pydo>`_
 - Athemis `<https://github.com/Athemis>`_
 - Athemis `<https://github.com/Athemis>`_
+- Stefan Antoni `<http://stefan.antoni.io>`

+ 5 - 10
searx/engines/__init__.py

@@ -34,7 +34,8 @@ engine_dir = dirname(realpath(__file__))
 engines = {}
 engines = {}
 
 
 categories = {'general': []}
 categories = {'general': []}
-_initialized = False
+
+languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
 
 
 engine_shortcuts = {}
 engine_shortcuts = {}
 engine_default_args = {'paging': False,
 engine_default_args = {'paging': False,
@@ -214,13 +215,7 @@ def get_engines_stats():
     ]
     ]
 
 
 
 
-if 'engines' not in settings or not settings['engines']:
-    logger.error('No engines found. Edit your settings.yml')
-    exit(2)
-
-languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
-
-for engine_data in settings['engines']:
-    engine = load_engine(engine_data)
-    if engine is not None:
+def initialize_engines(engine_list):
+    for engine_data in engine_list:
+        engine = load_engine(engine_data)
         engines[engine.name] = engine
         engines[engine.name] = engine

+ 3 - 3
searx/engines/google_news.py

@@ -72,9 +72,9 @@ def response(resp):
             'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
             'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
         }
         }
 
 
-        img = result.xpath('.//img/@src')[0]
-        if img and not img.startswith('data'):
-            r['img_src'] = img
+        imgs = result.xpath('.//img/@src')
+        if len(imgs) and not imgs[0].startswith('data'):
+            r['img_src'] = imgs[0]
 
 
         results.append(r)
         results.append(r)
 
 

+ 57 - 0
searx/engines/searx_engine.py

@@ -0,0 +1,57 @@
+"""
+ Searx (all)
+
+ @website     https://github.com/asciimoo/searx
+ @provide-api yes (https://asciimoo.ithub.io/searx/dev/search_api.html)
+
+ @using-api   yes
+ @results     JSON
+ @stable      yes (using api)
+ @parse       url, title, content
+"""
+
+from json import loads
+from searx.engines import categories as searx_categories
+
+
+categories = searx_categories.keys()
+
+# search-url
+instance_urls = []
+instance_index = 0
+
+
+# do search-request
+def request(query, params):
+    global instance_index
+    params['url'] = instance_urls[instance_index % len(instance_urls)]
+    params['method'] = 'POST'
+
+    instance_index += 1
+
+    params['data'] = {
+        'q': query,
+        'pageno': params['pageno'],
+        'language': params['language'],
+        'time_range': params['time_range'],
+        'category': params['category'],
+        'format': 'json'
+    }
+
+    return params
+
+
+# get response from search-request
+def response(resp):
+
+    response_json = loads(resp.text)
+    results = response_json['results']
+
+    for i in ('answers', 'infoboxes'):
+        results.extend(response_json[i])
+
+    results.extend({'suggestion': s} for s in response_json['suggestions'])
+
+    results.append({'number_of_results': response_json['number_of_results']})
+
+    return results

+ 32 - 0
searx/settings.yml

@@ -13,6 +13,7 @@ server:
     secret_key : "ultrasecretkey" # change this!
     secret_key : "ultrasecretkey" # change this!
     base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
     base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
     image_proxy : False # Proxying image results through searx
     image_proxy : False # Proxying image results through searx
+    http_protocol_version : "1.0"  # 1.0 and 1.1 are supported
 
 
 ui:
 ui:
     themes_path : "" # Custom ui themes path - leave it blank if you didn't change
     themes_path : "" # Custom ui themes path - leave it blank if you didn't change
@@ -91,6 +92,17 @@ engines:
     disabled : True
     disabled : True
     shortcut : bb
     shortcut : bb
 
 
+  - name : ccc-tv
+    engine : xpath
+    paging : False
+    search_url : https://media.ccc.de/search/?q={query}
+    url_xpath : //div[@class="caption"]/h3/a/@href
+    title_xpath : //div[@class="caption"]/h3/a/text()
+    content_xpath : //div[@class="caption"]/h4/@title
+    categories : videos
+    disabled : True
+    shortcut : c3tv
+
   - name : crossref
   - name : crossref
     engine : json_engine
     engine : json_engine
     paging : True
     paging : True
@@ -154,6 +166,18 @@ engines:
     shortcut : ddg
     shortcut : ddg
     disabled : True
     disabled : True
 
 
+  - name : etymonline
+    engine : xpath
+    paging : True
+    search_url : http://etymonline.com/?search={query}&p={pageno}
+    url_xpath : //dt/a[1]/@href
+    title_xpath : //dt
+    content_xpath : //dd
+    suggestion_xpath : //a[@class="crossreference"]
+    first_page_num : 0
+    shortcut : et
+    disabled : True
+
 # api-key required: http://www.faroo.com/hp/api/api.html#key
 # api-key required: http://www.faroo.com/hp/api/api.html#key
 #  - name : faroo
 #  - name : faroo
 #    engine : faroo
 #    engine : faroo
@@ -430,6 +454,14 @@ engines:
     shortcut : scc
     shortcut : scc
     disabled : True
     disabled : True
 
 
+#  - name : searx
+#    engine : searx_engine
+#    shortcut : se
+#    instance_urls :
+#        - http://127.0.0.1:8888/
+#        - ...
+#    disabled : True
+
   - name : spotify
   - name : spotify
     engine : spotify
     engine : spotify
     shortcut : stf
     shortcut : stf

+ 1 - 0
searx/settings_robot.yml

@@ -13,6 +13,7 @@ server:
     secret_key : "ultrasecretkey" # change this!
     secret_key : "ultrasecretkey" # change this!
     base_url : False
     base_url : False
     image_proxy : False
     image_proxy : False
+    http_protocol_version : "1.0"
 
 
 ui:
 ui:
     themes_path : ""
     themes_path : ""

+ 5 - 2
searx/webapp.py

@@ -53,7 +53,7 @@ from flask_babel import Babel, gettext, format_date, format_decimal
 from flask.json import jsonify
 from flask.json import jsonify
 from searx import settings, searx_dir, searx_debug
 from searx import settings, searx_dir, searx_debug
 from searx.engines import (
 from searx.engines import (
-    categories, engines, get_engines_stats, engine_shortcuts
+    categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
 )
 )
 from searx.utils import (
 from searx.utils import (
     UnicodeWriter, highlight_content, html_to_text, get_themes,
     UnicodeWriter, highlight_content, html_to_text, get_themes,
@@ -81,7 +81,7 @@ except ImportError:
 
 
 # serve pages with HTTP/1.1
 # serve pages with HTTP/1.1
 from werkzeug.serving import WSGIRequestHandler
 from werkzeug.serving import WSGIRequestHandler
-WSGIRequestHandler.protocol_version = "HTTP/1.1"
+WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
 
 
 static_path, templates_path, themes =\
 static_path, templates_path, themes =\
     get_themes(settings['ui']['themes_path']
     get_themes(settings['ui']['themes_path']
@@ -769,6 +769,9 @@ def page_not_found(e):
 
 
 
 
 def run():
 def run():
+    if not searx_debug or os.environ.get("WERKZEUG_RUN_MAIN") == "true":
+        initialize_engines(settings['engines'])
+
     app.run(
     app.run(
         debug=searx_debug,
         debug=searx_debug,
         use_debugger=searx_debug,
         use_debugger=searx_debug,