Browse Source

Merge branch 'master' of https://github.com/asciimoo/searx

pw3t 11 years ago
parent
commit
9e72ebe064
13 changed files with 154 additions and 150 deletions
  1. 2 0
      .gitignore
  2. 1 2
      README.md
  3. 0 99
      engines.cfg_sample
  4. 1 0
      requirements.txt
  5. 22 0
      searx/__init__.py
  6. 11 18
      searx/engines/__init__.py
  7. 0 16
      searx/settings.py
  8. 0 1
      searx/templates/about.html
  9. 1 1
      searx/utils.py
  10. 7 13
      searx/webapp.py
  11. 107 0
      settings.yml
  12. 1 0
      setup.py
  13. 1 0
      versions.cfg

+ 2 - 0
.gitignore

@@ -1,6 +1,8 @@
 env
 env
 engines.cfg
 engines.cfg
 .installed.cfg
 .installed.cfg
+.coverage
+covearge/
 setup.cfg
 setup.cfg
 
 
 *.pyc
 *.pyc

+ 1 - 2
README.md

@@ -25,8 +25,7 @@ List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instanc
 
 
 * clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
 * clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
 * install dependencies: `pip install -r requirements.txt`
 * install dependencies: `pip install -r requirements.txt`
-* edit your [searx/settings.py](https://github.com/asciimoo/searx/blob/master/searx/settings.py) (set your `secret_key`!)
-* rename `engines.cfg_sample` to `engines.cfg`
+* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!)
 * run `python searx/webapp.py` to start the application
 * run `python searx/webapp.py` to start the application
 
 
 For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)
 For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)

+ 0 - 99
engines.cfg_sample

@@ -1,99 +0,0 @@
-[wikipedia]
-engine = mediawiki
-url    = https://en.wikipedia.org/
-number_of_results = 1
-
-[bing]
-engine = bing
-locale = en-US
-
-[currency]
-engine=currency_convert
-categories = general
-
-[deviantart]
-engine = deviantart
-categories = images
-
-[ddg definitions]
-engine = duckduckgo_definitions
-
-[duckduckgo]
-engine = duckduckgo
-locale = en-us
-
-[filecrop]
-engine = filecrop
-categories = files
-
-[flickr]
-engine = flickr
-categories = images
-
-[github]
-engine = github
-categories = it
-
-[google]
-engine        = json_engine
-search_url    = https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
-categories    = general
-url_query     = /responseData/results/unescapedUrl
-content_query = /responseData/results/content
-title_query   = /responseData/results/titleNoFormatting
-
-[google images]
-engine = google_images
-categories = images
-
-[piratebay]
-engine = piratebay
-categories = videos, music, files
-
-[soundcloud]
-engine = soundcloud
-categories = music
-
-[stackoverflow]
-engine = stackoverflow
-categories = it
-
-[startpage]
-engine = startpage
-
-[twitter]
-engine = twitter
-categories = social media
-
-[urbandictionary]
-engine        = xpath
-search_url    = http://www.urbandictionary.com/define.php?term={query}
-url_xpath     = //div[@class="word"]//a/@href
-title_xpath   = //div[@class="word"]//a
-content_xpath = //div[@class="definition"]
-
-[yahoo]
-engine           = xpath
-search_url       = http://search.yahoo.com/search?p={query}
-results_xpath    = //div[@class="res"]
-url_xpath        = .//h3/a/@href
-title_xpath      = .//h3/a
-content_xpath    = .//div[@class="abstr"]
-suggestion_xpath = //div[@id="satat"]//a
-
-[youtube]
-engine = youtube
-categories = videos
-
-[dailymotion]
-engine = dailymotion
-locale = en_US
-categories = videos
-
-[vimeo]
-engine = vimeo
-categories = videos
-results_xpath = //div[@id="browse_content"]/ol/li
-url_xpath=./a/@href
-title_xpath=./a/div[@class="data"]/p[@class="title"]/text()
-content_xpath=./a/img/@src

+ 1 - 0
requirements.txt

@@ -1,3 +1,4 @@
 flask
 flask
 grequests
 grequests
 lxml
 lxml
+pyyaml

+ 22 - 0
searx/__init__.py

@@ -0,0 +1,22 @@
+from os import environ
+from os.path import realpath, dirname, join
+try:
+    from yaml import load
+except:
+    from sys import exit, stderr
+    stderr.write('[E] install pyyaml\n')
+    exit(2)
+
+
+searx_dir  = realpath(dirname(realpath(__file__))+'/../')
+engine_dir = dirname(realpath(__file__))
+
+if 'SEARX_SETTINGS_PATH' in environ:
+    settings_path = environ['SEARX_SETTINGS_PATH']
+else:
+    settings_path = join(searx_dir, 'settings.yml')
+
+
+with open(settings_path) as settings_yaml:
+    settings = load(settings_yaml)
+

+ 11 - 18
searx/engines/__init__.py

@@ -23,16 +23,12 @@ from itertools import izip_longest, chain
 from operator import itemgetter
 from operator import itemgetter
 from urlparse import urlparse
 from urlparse import urlparse
 from searx import settings
 from searx import settings
-from searx.utils import get_useragent
-import ConfigParser
+from searx.utils import gen_useragent
 import sys
 import sys
 from datetime import datetime
 from datetime import datetime
 
 
 engine_dir = dirname(realpath(__file__))
 engine_dir = dirname(realpath(__file__))
-searx_dir  = join(engine_dir, '../../')
 
 
-engines_config = ConfigParser.SafeConfigParser()
-engines_config.read(join(searx_dir, 'engines.cfg'))
 number_of_searches = 0
 number_of_searches = 0
 
 
 engines = {}
 engines = {}
@@ -48,24 +44,23 @@ def load_module(filename):
     module.name = modname
     module.name = modname
     return module
     return module
 
 
-if not engines_config.sections():
-    print '[E] Error no engines found. Edit your engines.cfg'
+if not 'engines' in settings or not settings['engines']:
+    print '[E] Error no engines found. Edit your settings.yml'
     exit(2)
     exit(2)
 
 
-for engine_config_name in engines_config.sections():
-    engine_data = engines_config.options(engine_config_name)
-    engine = load_module(engines_config.get(engine_config_name, 'engine')+'.py')
-    engine.name = engine_config_name
+for engine_data in settings['engines']:
+    engine_name = engine_data['engine']
+    engine = load_module(engine_name+'.py')
     for param_name in engine_data:
     for param_name in engine_data:
         if param_name == 'engine':
         if param_name == 'engine':
             continue
             continue
         if param_name == 'categories':
         if param_name == 'categories':
-            if engines_config.get(engine_config_name, param_name) == 'none':
+            if engine_data['categories'] == 'none':
                 engine.categories = []
                 engine.categories = []
             else:
             else:
-                engine.categories = map(str.strip, engines_config.get(engine_config_name, param_name).split(','))
+                engine.categories = map(str.strip, engine_data['categories'].split(','))
             continue
             continue
-        setattr(engine, param_name, engines_config.get(engine_config_name, param_name))
+        setattr(engine, param_name, engine_data[param_name])
     for engine_attr in dir(engine):
     for engine_attr in dir(engine):
         if engine_attr.startswith('_'):
         if engine_attr.startswith('_'):
             continue
             continue
@@ -118,8 +113,6 @@ def score_results(results):
         weight = 1.0
         weight = 1.0
         if hasattr(engines[res['engine']], 'weight'):
         if hasattr(engines[res['engine']], 'weight'):
             weight = float(engines[res['engine']].weight)
             weight = float(engines[res['engine']].weight)
-        elif res['engine'] in settings.weights:
-            weight = float(settings.weights[res['engine']])
         score = int((flat_len - i)/engines_len)*weight+1
         score = int((flat_len - i)/engines_len)*weight+1
         duplicated = False
         duplicated = False
         for new_res in results:
         for new_res in results:
@@ -153,7 +146,7 @@ def search(query, request, selected_engines):
     suggestions = set()
     suggestions = set()
     number_of_searches += 1
     number_of_searches += 1
     #user_agent = request.headers.get('User-Agent', '')
     #user_agent = request.headers.get('User-Agent', '')
-    user_agent = get_useragent()
+    user_agent = gen_useragent()
 
 
     for selected_engine in selected_engines:
     for selected_engine in selected_engines:
         if selected_engine['name'] not in engines:
         if selected_engine['name'] not in engines:
@@ -172,7 +165,7 @@ def search(query, request, selected_engines):
         request_args = dict(headers = request_params['headers']
         request_args = dict(headers = request_params['headers']
                            ,hooks   = dict(response=callback)
                            ,hooks   = dict(response=callback)
                            ,cookies = request_params['cookies']
                            ,cookies = request_params['cookies']
-                           ,timeout = settings.request_timeout
+                           ,timeout = settings['server']['request_timeout']
                            )
                            )
 
 
         if request_params['method'] == 'GET':
         if request_params['method'] == 'GET':

+ 0 - 16
searx/settings.py

@@ -1,16 +0,0 @@
-
-port = 8888
-
-secret_key = "ultrasecretkey" # change this!
-
-debug = True
-
-request_timeout = 5.0 # seconds
-
-weights = {} # 'search_engine_name': float(weight) | default is 1.0
-
-blacklist = [] # search engine blacklist
-
-categories = {} # custom search engine categories
-
-base_url = None # "https://your.domain.tld/" or None (to use request parameters)

+ 0 - 1
searx/templates/about.html

@@ -10,7 +10,6 @@
     <ul>
     <ul>
         <li>Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you</li>
         <li>Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you</li>
         <li>Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you</li>
         <li>Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you</li>
-        <li>Searx doesn't make money on ads and it isn't customised based on your interests. You get the pure search results</li>
         <li>Searx is a free software, the code is 100% open and you can help to make it better. See more on <a href="https://gmail.com/asciimoo/searx">github</a></li>
         <li>Searx is a free software, the code is 100% open and you can help to make it better. See more on <a href="https://gmail.com/asciimoo/searx">github</a></li>
     </ul>
     </ul>
     <p>If you do care about privacy, want to be a conscious user, moreover believe
     <p>If you do care about privacy, want to be a conscious user, moreover believe

+ 1 - 1
searx/utils.py

@@ -5,7 +5,7 @@ import codecs
 import cStringIO
 import cStringIO
 import re
 import re
 
 
-def get_useragent():
+def gen_useragent():
     # TODO
     # TODO
     return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
     return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
 
 

+ 7 - 13
searx/webapp.py

@@ -22,13 +22,7 @@ import sys
 if __name__ == "__main__":
 if __name__ == "__main__":
     sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
     sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
 
 
-# first argument is for specifying settings module, used mostly by robot tests
-from sys import argv
-if len(argv) == 2:
-    from importlib import import_module
-    settings = import_module('searx.' + argv[1])
-else:
-    from searx import settings
+from searx import settings
 
 
 from flask import Flask, request, render_template, url_for, Response, make_response, redirect
 from flask import Flask, request, render_template, url_for, Response, make_response, redirect
 from searx.engines import search, categories, engines, get_engines_stats
 from searx.engines import search, categories, engines, get_engines_stats
@@ -41,7 +35,7 @@ from searx.utils import highlight_content, html_to_text
 
 
 
 
 app = Flask(__name__)
 app = Flask(__name__)
-app.secret_key = settings.secret_key
+app.secret_key = settings['server']['secret_key']
 
 
 
 
 opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
 opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
@@ -58,8 +52,8 @@ opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
 
 
 
 
 def get_base_url():
 def get_base_url():
-    if settings.base_url:
-        hostname = settings.base_url
+    if settings['server']['base_url']:
+        hostname = settings['server']['base_url']
     else:
     else:
         scheme = 'http'
         scheme = 'http'
         if request.is_secure:
         if request.is_secure:
@@ -252,9 +246,9 @@ def run():
     from gevent import monkey
     from gevent import monkey
     monkey.patch_all()
     monkey.patch_all()
 
 
-    app.run(debug        = settings.debug
-           ,use_debugger = settings.debug
-           ,port         = settings.port
+    app.run(debug        = settings['server']['debug']
+           ,use_debugger = settings['server']['debug']
+           ,port         = settings['server']['port']
            )
            )
 
 
 
 

+ 107 - 0
settings.yml

@@ -0,0 +1,107 @@
+server:
+    port : 8888
+    secret_key : "ultrasecretkey" # change this!
+    debug : True
+    request_timeout : 3.0 # seconds
+    base_url: False
+
+engines:
+  - name : wikipedia
+    engine : mediawiki
+    url    : https://en.wikipedia.org/
+    number_of_results : 1
+
+  - name : bing
+    engine : bing
+    locale : en-US
+
+  - name : currency
+    engine : currency_convert
+    categories : general
+
+  - name : deviantart
+    engine : deviantart
+    categories : images
+
+  - name : ddg definitions
+    engine : duckduckgo_definitions
+
+  - name : duckduckgo
+    engine : duckduckgo
+    locale : en-us
+
+  - name : filecrop
+    engine : filecrop
+    categories : files
+
+  - name : flickr
+    engine : flickr
+    categories : images
+
+  - name : github
+    engine : github
+    categories : it
+
+  - name : google
+    engine        : json_engine
+    search_url    : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
+    categories    : general
+    url_query     : /responseData/results/unescapedUrl
+    content_query : /responseData/results/content
+    title_query   : /responseData/results/titleNoFormatting
+
+  - name : google images
+    engine : google_images
+    categories : images
+
+  - name : piratebay
+    engine : piratebay
+    categories : videos, music, files
+
+  - name : soundcloud
+    engine : soundcloud
+    categories : music
+
+  - name : stackoverflow
+    engine : stackoverflow
+    categories : it
+
+  - name : startpage
+    engine : startpage
+
+  - name : twitter
+    engine : twitter
+    categories : social media
+
+  - name : urbandictionary
+    engine        : xpath
+    search_url    : http://www.urbandictionary.com/define.php?term={query}
+    url_xpath     : //div[@class="word"]//a/@href
+    title_xpath   : //div[@class="word"]//a
+    content_xpath : //div[@class="definition"]
+
+  - name : yahoo
+    engine           : xpath
+    search_url       : http://search.yahoo.com/search?p={query}
+    results_xpath    : //div[@class="res"]
+    url_xpath        : .//h3/a/@href
+    title_xpath      : .//h3/a
+    content_xpath    : .//div[@class="abstr"]
+    suggestion_xpath : //div[@id="satat"]//a
+
+  - name : youtube
+    engine : youtube
+    categories : videos
+
+  - name : dailymotion
+    engine : dailymotion
+    locale : en_US
+    categories : videos
+
+  - name : vimeo
+    engine : vimeo
+    categories : videos
+    results_xpath : //div[@id="browse_content"]/ol/li
+    url_xpath : ./a/@href
+    title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
+    content_xpath : ./a/img/@src

+ 1 - 0
setup.py

@@ -32,6 +32,7 @@ setup(
         'flask',
         'flask',
         'grequests',
         'grequests',
         'lxml',
         'lxml',
+        'pyyaml',
         'setuptools',
         'setuptools',
     ],
     ],
     extras_require={
     extras_require={

+ 1 - 0
versions.cfg

@@ -16,6 +16,7 @@ mccabe = 0.2.1
 pep8 = 1.4.6
 pep8 = 1.4.6
 plone.testing = 4.0.8
 plone.testing = 4.0.8
 pyflakes = 0.7.3
 pyflakes = 0.7.3
+pyyaml = 3.10
 requests = 2.2.0
 requests = 2.2.0
 robotframework-debuglibrary = 0.3
 robotframework-debuglibrary = 0.3
 robotframework-httplibrary = 0.4.2
 robotframework-httplibrary = 0.4.2