Browse Source

Merge pull request #913 from asciimoo/py3

Add Python3 compatibility
Adam Tauber 8 years ago
parent
commit
4cffd78650
100 changed files with 448 additions and 446 deletions
  1. 3 2
      .travis.yml
  2. 1 2
      requirements-dev.txt
  3. 8 4
      searx/answerers/__init__.py
  4. 9 4
      searx/answerers/random/answerer.py
  5. 10 6
      searx/answerers/statistics/answerer.py
  6. 5 1
      searx/autocomplete.py
  7. 1 2
      searx/engines/1337x.py
  8. 2 3
      searx/engines/__init__.py
  9. 1 2
      searx/engines/archlinux.py
  10. 3 3
      searx/engines/base.py
  11. 1 1
      searx/engines/bing.py
  12. 1 1
      searx/engines/bing_images.py
  13. 2 3
      searx/engines/bing_news.py
  14. 1 1
      searx/engines/blekko_images.py
  15. 2 3
      searx/engines/btdigg.py
  16. 9 5
      searx/engines/currency_convert.py
  17. 1 2
      searx/engines/dailymotion.py
  18. 2 3
      searx/engines/deezer.py
  19. 1 1
      searx/engines/deviantart.py
  20. 3 3
      searx/engines/dictzone.py
  21. 6 2
      searx/engines/digbt.py
  22. 2 2
      searx/engines/digg.py
  23. 1 1
      searx/engines/doku.py
  24. 1 1
      searx/engines/duckduckgo.py
  25. 3 3
      searx/engines/duckduckgo_definitions.py
  26. 1 1
      searx/engines/faroo.py
  27. 3 4
      searx/engines/fdroid.py
  28. 7 4
      searx/engines/filecrop.py
  29. 1 1
      searx/engines/flickr.py
  30. 1 1
      searx/engines/flickr_noapi.py
  31. 1 3
      searx/engines/framalibre.py
  32. 1 1
      searx/engines/frinkiac.py
  33. 1 2
      searx/engines/gigablast.py
  34. 1 1
      searx/engines/github.py
  35. 2 3
      searx/engines/google.py
  36. 1 1
      searx/engines/google_images.py
  37. 1 2
      searx/engines/google_news.py
  38. 7 3
      searx/engines/ina.py
  39. 8 3
      searx/engines/json_engine.py
  40. 1 2
      searx/engines/kickass.py
  41. 1 1
      searx/engines/mediawiki.py
  42. 1 1
      searx/engines/mixcloud.py
  43. 1 1
      searx/engines/nyaa.py
  44. 0 4
      searx/engines/openstreetmap.py
  45. 1 1
      searx/engines/photon.py
  46. 1 2
      searx/engines/piratebay.py
  47. 1 2
      searx/engines/qwant.py
  48. 2 4
      searx/engines/reddit.py
  49. 1 3
      searx/engines/scanr_structures.py
  50. 2 3
      searx/engines/searchcode_code.py
  51. 2 3
      searx/engines/searchcode_doc.py
  52. 1 3
      searx/engines/seedpeer.py
  53. 12 7
      searx/engines/soundcloud.py
  54. 2 3
      searx/engines/spotify.py
  55. 2 4
      searx/engines/stackoverflow.py
  56. 1 1
      searx/engines/startpage.py
  57. 1 1
      searx/engines/subtitleseeker.py
  58. 13 14
      searx/engines/swisscows.py
  59. 5 6
      searx/engines/tokyotoshokan.py
  60. 4 4
      searx/engines/torrentz.py
  61. 4 0
      searx/engines/translated.py
  62. 1 2
      searx/engines/twitter.py
  63. 1 1
      searx/engines/vimeo.py
  64. 5 8
      searx/engines/wikidata.py
  65. 9 12
      searx/engines/wikipedia.py
  66. 6 7
      searx/engines/wolframalpha_api.py
  67. 4 5
      searx/engines/wolframalpha_noapi.py
  68. 2 4
      searx/engines/www1x.py
  69. 1 2
      searx/engines/www500px.py
  70. 2 2
      searx/engines/xpath.py
  71. 1 1
      searx/engines/yacy.py
  72. 1 2
      searx/engines/yahoo.py
  73. 3 3
      searx/engines/yahoo_news.py
  74. 2 2
      searx/engines/yandex.py
  75. 1 1
      searx/engines/youtube_api.py
  76. 1 1
      searx/engines/youtube_noapi.py
  77. 4 1
      searx/plugins/__init__.py
  78. 1 1
      searx/plugins/doai_rewrite.py
  79. 4 1
      searx/plugins/https_rewrite.py
  80. 2 2
      searx/plugins/self_info.py
  81. 1 1
      searx/plugins/tracker_url_remover.py
  82. 9 9
      searx/preferences.py
  83. 6 2
      searx/query.py
  84. 5 1
      searx/results.py
  85. 10 2
      searx/search.py
  86. 1 1
      searx/settings_robot.yml
  87. 1 1
      searx/templates/courgette/404.html
  88. 1 1
      searx/templates/legacy/404.html
  89. 1 1
      searx/templates/oscar/404.html
  90. 1 1
      searx/templates/pix-art/404.html
  91. 26 16
      searx/testing.py
  92. 28 0
      searx/url_utils.py
  93. 18 8
      searx/utils.py
  94. 23 13
      searx/webapp.py
  95. 75 0
      tests/robot/__init__.py
  96. 0 153
      tests/robot/test_basic.robot
  97. 2 2
      tests/unit/engines/test_archlinux.py
  98. 3 3
      tests/unit/engines/test_bing.py
  99. 6 6
      tests/unit/engines/test_bing_news.py
  100. 6 6
      tests/unit/engines/test_btdigg.py

+ 3 - 2
.travis.yml

@@ -9,6 +9,7 @@ addons:
 language: python
 python:
   - "2.7"
+  - "3.6"
 before_install:
   - "export DISPLAY=:99.0"
   - "sh -e /etc/init.d/xvfb start"
@@ -24,9 +25,9 @@ script:
   - ./manage.sh styles
   - ./manage.sh grunt_build
   - ./manage.sh tests
-  - ./manage.sh py_test_coverage
 after_success:
-  coveralls
+  - ./manage.sh py_test_coverage
+  - coveralls
 notifications:
   irc:
     channels:

+ 1 - 2
requirements-dev.txt

@@ -3,8 +3,7 @@ mock==2.0.0
 nose2[coverage-plugin]
 pep8==1.7.0
 plone.testing==5.0.0
-robotframework-selenium2library==1.8.0
-robotsuite==1.7.0
+splinter==0.7.5
 transifex-client==0.12.2
 unittest2==1.1.0
 zope.testrunner==4.5.1

+ 8 - 4
searx/answerers/__init__.py

@@ -1,8 +1,12 @@
 from os import listdir
 from os.path import realpath, dirname, join, isdir
+from sys import version_info
 from searx.utils import load_module
 from collections import defaultdict
 
+if version_info[0] == 3:
+    unicode = str
+
 
 answerers_dir = dirname(realpath(__file__))
 
@@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__))
 def load_answerers():
     answerers = []
     for filename in listdir(answerers_dir):
-        if not isdir(join(answerers_dir, filename)):
+        if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
             continue
         module = load_module('answerer.py', join(answerers_dir, filename))
         if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
@@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers):
 
 def ask(query):
     results = []
-    query_parts = filter(None, query.query.split())
+    query_parts = list(filter(None, query.query.split()))
 
-    if query_parts[0] not in answerers_by_keywords:
+    if query_parts[0].decode('utf-8') not in answerers_by_keywords:
         return results
 
-    for answerer in answerers_by_keywords[query_parts[0]]:
+    for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
         result = answerer(query)
         if result:
             results.append(result)

+ 9 - 4
searx/answerers/random/answerer.py

@@ -1,5 +1,6 @@
 import random
 import string
+import sys
 from flask_babel import gettext
 
 # required answerer attribute
@@ -8,7 +9,11 @@ keywords = ('random',)
 
 random_int_max = 2**31
 
-random_string_letters = string.lowercase + string.digits + string.uppercase
+if sys.version_info[0] == 2:
+    random_string_letters = string.lowercase + string.digits + string.uppercase
+else:
+    unicode = str
+    random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
 
 
 def random_string():
@@ -24,9 +29,9 @@ def random_int():
     return unicode(random.randint(-random_int_max, random_int_max))
 
 
-random_types = {u'string': random_string,
-                u'int': random_int,
-                u'float': random_float}
+random_types = {b'string': random_string,
+                b'int': random_int,
+                b'float': random_float}
 
 
 # required answerer function

+ 10 - 6
searx/answerers/statistics/answerer.py

@@ -1,8 +1,12 @@
+from sys import version_info
 from functools import reduce
 from operator import mul
 
 from flask_babel import gettext
 
+if version_info[0] == 3:
+    unicode = str
+
 keywords = ('min',
             'max',
             'avg',
@@ -19,22 +23,22 @@ def answer(query):
         return []
 
     try:
-        args = map(float, parts[1:])
+        args = list(map(float, parts[1:]))
     except:
         return []
 
     func = parts[0]
     answer = None
 
-    if func == 'min':
+    if func == b'min':
         answer = min(args)
-    elif func == 'max':
+    elif func == b'max':
         answer = max(args)
-    elif func == 'avg':
+    elif func == b'avg':
         answer = sum(args) / len(args)
-    elif func == 'sum':
+    elif func == b'sum':
         answer = sum(args)
-    elif func == 'prod':
+    elif func == b'prod':
         answer = reduce(mul, args, 1)
 
     if answer is None:

+ 5 - 1
searx/autocomplete.py

@@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 
 from lxml import etree
 from json import loads
-from urllib import urlencode
 from searx import settings
 from searx.languages import language_codes
 from searx.engines import (
@@ -26,6 +25,11 @@ from searx.engines import (
 )
 from searx.poolrequests import get as http_get
 
+try:
+    from urllib import urlencode
+except:
+    from urllib.parse import urlencode
+
 
 def get(*args, **kwargs):
     if 'timeout' not in kwargs:

+ 1 - 2
searx/engines/1337x.py

@@ -1,8 +1,7 @@
-from urllib import quote
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
-from urlparse import urljoin
+from searx.url_utils import quote, urljoin
 
 url = 'https://1337x.to/'
 search_url = url + 'search/{search_term}/{pageno}/'

+ 2 - 3
searx/engines/__init__.py

@@ -72,12 +72,11 @@ def load_engine(engine_data):
             if engine_data['categories'] == 'none':
                 engine.categories = []
             else:
-                engine.categories = map(
-                    str.strip, engine_data['categories'].split(','))
+                engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
             continue
         setattr(engine, param_name, engine_data[param_name])
 
-    for arg_name, arg_value in engine_default_args.iteritems():
+    for arg_name, arg_value in engine_default_args.items():
         if not hasattr(engine, arg_name):
             setattr(engine, arg_name, arg_value)
 

+ 1 - 2
searx/engines/archlinux.py

@@ -11,10 +11,9 @@
  @parse        url, title
 """
 
-from urlparse import urljoin
-from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode, urljoin
 
 # engine dependent config
 categories = ['it']

+ 3 - 3
searx/engines/base.py

@@ -14,10 +14,10 @@
 """
 
 from lxml import etree
-from urllib import urlencode
-from searx.utils import searx_useragent
 from datetime import datetime
 import re
+from searx.url_utils import urlencode
+from searx.utils import searx_useragent
 
 
 categories = ['science']
@@ -73,7 +73,7 @@ def request(query, params):
 def response(resp):
     results = []
 
-    search_results = etree.XML(resp.content)
+    search_results = etree.XML(resp.text)
 
     for entry in search_results.xpath('./result/doc'):
         content = "No description available"

+ 1 - 1
searx/engines/bing.py

@@ -13,9 +13,9 @@
  @todo        publishedDate
 """
 
-from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['general']

+ 1 - 1
searx/engines/bing_images.py

@@ -15,11 +15,11 @@
               limited response to 10 images
 """
 
-from urllib import urlencode
 from lxml import html
 from json import loads
 import re
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['images']

+ 2 - 3
searx/engines/bing_news.py

@@ -11,13 +11,12 @@
  @parse       url, title, content, publishedDate, thumbnail
 """
 
-from urllib import urlencode
-from urlparse import urlparse, parse_qsl
 from datetime import datetime
 from dateutil import parser
 from lxml import etree
 from searx.utils import list_get
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode, urlparse, parse_qsl
 
 # engine dependent config
 categories = ['news']
@@ -86,7 +85,7 @@ def request(query, params):
 def response(resp):
     results = []
 
-    rss = etree.fromstring(resp.content)
+    rss = etree.fromstring(resp.text)
 
     ns = rss.nsmap
 

+ 1 - 1
searx/engines/blekko_images.py

@@ -11,7 +11,7 @@
 """
 
 from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['images']

+ 2 - 3
searx/engines/btdigg.py

@@ -10,11 +10,10 @@
  @parse       url, title, content, seed, leech, magnetlink
 """
 
-from urlparse import urljoin
-from urllib import quote
 from lxml import html
 from operator import itemgetter
 from searx.engines.xpath import extract_text
+from searx.url_utils import quote, urljoin
 from searx.utils import get_torrent_size
 
 # engine dependent config
@@ -38,7 +37,7 @@ def request(query, params):
 def response(resp):
     results = []
 
-    dom = html.fromstring(resp.content)
+    dom = html.fromstring(resp.text)
 
     search_res = dom.xpath('//div[@id="search_res"]/table/tr')
 

+ 9 - 5
searx/engines/currency_convert.py

@@ -1,21 +1,25 @@
-from datetime import datetime
+import json
 import re
 import os
-import json
+import sys
 import unicodedata
 
+from datetime import datetime
+
+if sys.version_info[0] == 3:
+    unicode = str
 
 categories = []
 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
 weight = 100
 
-parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
+parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
 
 db = 1
 
 
 def normalize_name(name):
-    name = name.lower().replace('-', ' ').rstrip('s')
+    name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
     name = re.sub(' +', ' ', name)
     return unicodedata.normalize('NFKD', name).lower()
 
@@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language):
 
 
 def request(query, params):
-    m = parser_re.match(unicode(query, 'utf8'))
+    m = parser_re.match(query)
     if not m:
         # wrong query
         return params

+ 1 - 2
searx/engines/dailymotion.py

@@ -12,10 +12,9 @@
  @todo        set content-parameter with correct data
 """
 
-from urllib import urlencode
 from json import loads
 from datetime import datetime
-from requests import get
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['videos']

+ 2 - 3
searx/engines/deezer.py

@@ -11,7 +11,7 @@
 """
 
 from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['music']
@@ -30,8 +30,7 @@ embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true"
 def request(query, params):
     offset = (params['pageno'] - 1) * 25
 
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      offset=offset)
+    params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
 
     return params
 

+ 1 - 1
searx/engines/deviantart.py

@@ -12,10 +12,10 @@
  @todo        rewrite to api
 """
 
-from urllib import urlencode
 from lxml import html
 import re
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['images']

+ 3 - 3
searx/engines/dictzone.py

@@ -10,20 +10,20 @@
 """
 
 import re
-from urlparse import urljoin
 from lxml import html
 from searx.utils import is_valid_lang
+from searx.url_utils import urljoin
 
 categories = ['general']
 url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
 weight = 100
 
-parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
 results_xpath = './/table[@id="r"]/tr'
 
 
 def request(query, params):
-    m = parser_re.match(unicode(query, 'utf8'))
+    m = parser_re.match(query)
     if not m:
         return params
 

+ 6 - 2
searx/engines/digbt.py

@@ -10,10 +10,14 @@
  @parse       url, title, content, magnetlink
 """
 
-from urlparse import urljoin
+from sys import version_info
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
+from searx.url_utils import urljoin
+
+if version_info[0] == 3:
+    unicode = str
 
 categories = ['videos', 'music', 'files']
 paging = True
@@ -31,7 +35,7 @@ def request(query, params):
 
 
 def response(resp):
-    dom = html.fromstring(resp.content)
+    dom = html.fromstring(resp.text)
     search_res = dom.xpath('.//td[@class="x-item"]')
 
     if not search_res:

+ 2 - 2
searx/engines/digg.py

@@ -10,10 +10,10 @@
  @parse       url, title, content, publishedDate, thumbnail
 """
 
-from urllib import quote_plus
+from dateutil import parser
 from json import loads
 from lxml import html
-from dateutil import parser
+from searx.url_utils import quote_plus
 
 # engine dependent config
 categories = ['news', 'social media']

+ 1 - 1
searx/engines/doku.py

@@ -9,9 +9,9 @@
 # @stable      yes
 # @parse       (general)    url, title, content
 
-from urllib import urlencode
 from lxml.html import fromstring
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'

+ 1 - 1
searx/engines/duckduckgo.py

@@ -13,11 +13,11 @@
  @todo        rewrite to api
 """
 
-from urllib import urlencode
 from lxml.html import fromstring
 from requests import get
 from json import loads
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['general']

+ 3 - 3
searx/engines/duckduckgo_definitions.py

@@ -1,10 +1,10 @@
 import json
-from urllib import urlencode
-from re import compile, sub
 from lxml import html
-from searx.utils import html_to_text
+from re import compile
 from searx.engines.xpath import extract_text
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
+from searx.utils import html_to_text
 
 url = 'https://api.duckduckgo.com/'\
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

+ 1 - 1
searx/engines/faroo.py

@@ -10,10 +10,10 @@
  @parse       url, title, content, publishedDate, img_src
 """
 
-from urllib import urlencode
 from json import loads
 import datetime
 from searx.utils import searx_useragent
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['general', 'news']

+ 3 - 4
searx/engines/fdroid.py

@@ -9,9 +9,9 @@
  @parse        url, title, content
 """
 
-from urllib import urlencode
-from searx.engines.xpath import extract_text
 from lxml import html
+from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['files']
@@ -24,8 +24,7 @@ search_url = base_url + 'repository/browse/?{query}'
 
 # do search-request
 def request(query, params):
-    query = urlencode({'fdfilter': query,
-                       'fdpage': params['pageno']})
+    query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
     params['url'] = search_url.format(query=query)
     return params
 

+ 7 - 4
searx/engines/filecrop.py

@@ -1,5 +1,9 @@
-from urllib import urlencode
-from HTMLParser import HTMLParser
+from searx.url_utils import urlencode
+
+try:
+    from HTMLParser import HTMLParser
+except:
+    from html.parser import HTMLParser
 
 url = 'http://www.filecrop.com/'
 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa
@@ -73,8 +77,7 @@ class FilecropResultParser(HTMLParser):
 
 def request(query, params):
     index = 1 + (params['pageno'] - 1) * 30
-    params['url'] = search_url.format(query=urlencode({'w': query}),
-                                      index=index)
+    params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
     return params
 
 

+ 1 - 1
searx/engines/flickr.py

@@ -13,8 +13,8 @@
  More info on api-key : https://www.flickr.com/services/apps/create/
 """
 
-from urllib import urlencode
 from json import loads
+from searx.url_utils import urlencode
 
 categories = ['images']
 

+ 1 - 1
searx/engines/flickr_noapi.py

@@ -12,11 +12,11 @@
  @parse       url, title, thumbnail, img_src
 """
 
-from urllib import urlencode
 from json import loads
 from time import time
 import re
 from searx.engines import logger
+from searx.url_utils import urlencode
 
 
 logger = logger.getChild('flickr-noapi')

+ 1 - 3
searx/engines/framalibre.py

@@ -10,12 +10,10 @@
  @parse       url, title, content, thumbnail, img_src
 """
 
-from urlparse import urljoin
 from cgi import escape
-from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
-from dateutil import parser
+from searx.url_utils import urljoin, urlencode
 
 # engine dependent config
 categories = ['it']

+ 1 - 1
searx/engines/frinkiac.py

@@ -10,7 +10,7 @@ Frinkiac (Images)
 """
 
 from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
 
 categories = ['images']
 

+ 1 - 2
searx/engines/gigablast.py

@@ -11,10 +11,9 @@
 """
 
 from json import loads
-from random import randint
 from time import time
-from urllib import urlencode
 from lxml.html import fromstring
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['general']

+ 1 - 1
searx/engines/github.py

@@ -10,8 +10,8 @@
  @parse       url, title, content
 """
 
-from urllib import urlencode
 from json import loads
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['it']

+ 2 - 3
searx/engines/google.py

@@ -9,11 +9,10 @@
 # @parse       url, title, content, suggestion
 
 import re
-from urllib import urlencode
-from urlparse import urlparse, parse_qsl
 from lxml import html, etree
 from searx.engines.xpath import extract_text, extract_url
-from searx.search import logger
+from searx import logger
+from searx.url_utils import urlencode, urlparse, parse_qsl
 
 logger = logger.getChild('google engine')
 

+ 1 - 1
searx/engines/google_images.py

@@ -11,9 +11,9 @@
 """
 
 from datetime import date, timedelta
-from urllib import urlencode
 from json import loads
 from lxml import html
+from searx.url_utils import urlencode
 
 
 # engine dependent config

+ 1 - 2
searx/engines/google_news.py

@@ -11,9 +11,8 @@
 """
 
 from lxml import html
-from urllib import urlencode
-from json import loads
 from searx.engines.google import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
 
 # search-url
 categories = ['news']

+ 7 - 3
searx/engines/ina.py

@@ -12,11 +12,15 @@
 # @todo        embedded (needs some md5 from video page)
 
 from json import loads
-from urllib import urlencode
 from lxml import html
-from HTMLParser import HTMLParser
-from searx.engines.xpath import extract_text
 from dateutil import parser
+from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
+
+try:
+    from HTMLParser import HTMLParser
+except:
+    from html.parser import HTMLParser
 
 # engine dependent config
 categories = ['videos']

+ 8 - 3
searx/engines/json_engine.py

@@ -1,11 +1,16 @@
-from urllib import urlencode
-from json import loads
 from collections import Iterable
+from json import loads
+from sys import version_info
+from searx.url_utils import urlencode
+
+if version_info[0] == 3:
+    unicode = str
 
 search_url = None
 url_query = None
 content_query = None
 title_query = None
+paging = False
 suggestion_query = ''
 results_query = ''
 
@@ -20,7 +25,7 @@ first_page_num = 1
 
 def iterate(iterable):
     if type(iterable) == dict:
-        it = iterable.iteritems()
+        it = iterable.items()
 
     else:
         it = enumerate(iterable)

+ 1 - 2
searx/engines/kickass.py

@@ -10,12 +10,11 @@
  @parse       url, title, content, seed, leech, magnetlink
 """
 
-from urlparse import urljoin
-from urllib import quote
 from lxml import html
 from operator import itemgetter
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size, convert_str_to_int
+from searx.url_utils import quote, urljoin
 
 # engine dependent config
 categories = ['videos', 'music', 'files']

+ 1 - 1
searx/engines/mediawiki.py

@@ -14,7 +14,7 @@
 
 from json import loads
 from string import Formatter
-from urllib import urlencode, quote
+from searx.url_utils import urlencode, quote
 
 # engine dependent config
 categories = ['general']

+ 1 - 1
searx/engines/mixcloud.py

@@ -11,8 +11,8 @@
 """
 
 from json import loads
-from urllib import urlencode
 from dateutil import parser
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['music']

+ 1 - 1
searx/engines/nyaa.py

@@ -9,9 +9,9 @@
  @parse        url, title, content, seed, leech, torrentfile
 """
 
-from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['files', 'images', 'videos', 'music']

+ 0 - 4
searx/engines/openstreetmap.py

@@ -11,7 +11,6 @@
 """
 
 from json import loads
-from searx.utils import searx_useragent
 
 # engine dependent config
 categories = ['map']
@@ -27,9 +26,6 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
 def request(query, params):
     params['url'] = base_url + search_string.format(query=query)
 
-    # using searx User-Agent
-    params['headers']['User-Agent'] = searx_useragent()
-
     return params
 
 

+ 1 - 1
searx/engines/photon.py

@@ -10,9 +10,9 @@
  @parse       url, title
 """
 
-from urllib import urlencode
 from json import loads
 from searx.utils import searx_useragent
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['map']

+ 1 - 2
searx/engines/piratebay.py

@@ -8,11 +8,10 @@
 # @stable      yes (HTML can change)
 # @parse       url, title, content, seed, leech, magnetlink
 
-from urlparse import urljoin
-from urllib import quote
 from lxml import html
 from operator import itemgetter
 from searx.engines.xpath import extract_text
+from searx.url_utils import quote, urljoin
 
 # engine dependent config
 categories = ['videos', 'music', 'files']

+ 1 - 2
searx/engines/qwant.py

@@ -12,9 +12,8 @@
 
 from datetime import datetime
 from json import loads
-from urllib import urlencode
-
 from searx.utils import html_to_text
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = None

+ 2 - 4
searx/engines/reddit.py

@@ -11,9 +11,8 @@
 """
 
 import json
-from urllib import urlencode
-from urlparse import urlparse, urljoin
 from datetime import datetime
+from searx.url_utils import urlencode, urljoin, urlparse
 
 # engine dependent config
 categories = ['general', 'images', 'news', 'social media']
@@ -26,8 +25,7 @@ search_url = base_url + 'search.json?{query}'
 
 # do search-request
 def request(query, params):
-    query = urlencode({'q': query,
-                       'limit': page_size})
+    query = urlencode({'q': query, 'limit': page_size})
     params['url'] = search_url.format(query=query)
 
     return params

+ 1 - 3
searx/engines/scanr_structures.py

@@ -10,9 +10,7 @@
  @parse       url, title, content, img_src
 """
 
-from urllib import urlencode
 from json import loads, dumps
-from dateutil import parser
 from searx.utils import html_to_text
 
 # engine dependent config
@@ -48,7 +46,7 @@ def response(resp):
     search_res = loads(resp.text)
 
     # return empty array if there are no results
-    if search_res.get('total') < 1:
+    if search_res.get('total', 0) < 1:
         return []
 
     # parse results

+ 2 - 3
searx/engines/searchcode_code.py

@@ -10,8 +10,8 @@
  @parse       url, title, content
 """
 
-from urllib import urlencode
 from json import loads
+from searx.url_utils import urlencode
 
 
 # engine dependent config
@@ -31,8 +31,7 @@ code_endings = {'cs': 'c#',
 
 # do search-request
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      pageno=params['pageno'] - 1)
+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
 
     return params
 

+ 2 - 3
searx/engines/searchcode_doc.py

@@ -10,8 +10,8 @@
  @parse       url, title, content
 """
 
-from urllib import urlencode
 from json import loads
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['it']
@@ -24,8 +24,7 @@ search_url = url + 'api/search_IV/?{query}&p={pageno}'
 
 # do search-request
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      pageno=params['pageno'] - 1)
+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
 
     return params
 

+ 1 - 3
searx/engines/seedpeer.py

@@ -8,11 +8,9 @@
 # @stable      yes (HTML can change)
 # @parse       url, title, content, seed, leech, magnetlink
 
-from urlparse import urljoin
-from urllib import quote
 from lxml import html
 from operator import itemgetter
-from searx.engines.xpath import extract_text
+from searx.url_utils import quote, urljoin
 
 
 url = 'http://www.seedpeer.eu/'

+ 12 - 7
searx/engines/soundcloud.py

@@ -11,13 +11,17 @@
 """
 
 import re
-from StringIO import StringIO
 from json import loads
-from lxml import etree
-from urllib import urlencode, quote_plus
+from lxml import html
 from dateutil import parser
 from searx import logger
 from searx.poolrequests import get as http_get
+from searx.url_utils import quote_plus, urlencode
+
+try:
+    from cStringIO import StringIO
+except:
+    from io import StringIO
 
 # engine dependent config
 categories = ['music']
@@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\
     'scrolling="no" frameborder="no" ' +\
     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
 
+cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
+
 
 def get_client_id():
     response = http_get("https://soundcloud.com")
-    rx_namespace = {"re": "http://exslt.org/regular-expressions"}
 
     if response.ok:
-        tree = etree.parse(StringIO(response.content), etree.HTMLParser())
-        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
+        tree = html.fromstring(response.content)
+        script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
         app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
 
         # extracts valid app_js urls from soundcloud.com content
@@ -51,7 +56,7 @@ def get_client_id():
             # gets app_js and searches for the clientid
             response = http_get(app_js_url)
             if response.ok:
-                cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
+                cids = cid_re.search(response.text)
                 if cids is not None and len(cids.groups()):
                     return cids.groups()[0]
     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

+ 2 - 3
searx/engines/spotify.py

@@ -11,7 +11,7 @@
 """
 
 from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['music']
@@ -29,8 +29,7 @@ embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{
 def request(query, params):
     offset = (params['pageno'] - 1) * 20
 
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      offset=offset)
+    params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
 
     return params
 

+ 2 - 4
searx/engines/stackoverflow.py

@@ -10,10 +10,9 @@
  @parse       url, title, content
 """
 
-from urlparse import urljoin
-from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode, urljoin
 
 # engine dependent config
 categories = ['it']
@@ -31,8 +30,7 @@ content_xpath = './/div[@class="excerpt"]'
 
 # do search-request
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      pageno=params['pageno'])
+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
 
     return params
 

+ 1 - 1
searx/engines/startpage.py

@@ -56,7 +56,7 @@ def request(query, params):
 def response(resp):
     results = []
 
-    dom = html.fromstring(resp.content)
+    dom = html.fromstring(resp.text)
 
     # parse results
     for result in dom.xpath(results_xpath):

+ 1 - 1
searx/engines/subtitleseeker.py

@@ -10,10 +10,10 @@
  @parse       url, title, content
 """
 
-from urllib import quote_plus
 from lxml import html
 from searx.languages import language_codes
 from searx.engines.xpath import extract_text
+from searx.url_utils import quote_plus
 
 # engine dependent config
 categories = ['videos']

+ 13 - 14
searx/engines/swisscows.py

@@ -11,9 +11,9 @@
 """
 
 from json import loads
-from urllib import urlencode, unquote
 import re
 from lxml.html import fromstring
+from searx.url_utils import unquote, urlencode
 
 # engine dependent config
 categories = ['general', 'images']
@@ -27,10 +27,10 @@ search_string = '?{query}&page={page}'
 supported_languages_url = base_url
 
 # regex
-regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
-regex_json_remove_start = re.compile(r'^initialData:\s*')
-regex_json_remove_end = re.compile(r',\s*environment$')
-regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
+regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment')
+regex_json_remove_start = re.compile(b'^initialData:\s*')
+regex_json_remove_end = re.compile(b',\s*environment$')
+regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
 
 
 # do search-request
@@ -45,10 +45,9 @@ def request(query, params):
         ui_language = params['language'].split('-')[0]
 
     search_path = search_string.format(
-        query=urlencode({'query': query,
-                         'uiLanguage': ui_language,
-                         'region': region}),
-        page=params['pageno'])
+        query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
+        page=params['pageno']
+    )
 
     # image search query is something like 'image?{query}&page={page}'
     if params['category'] == 'images':
@@ -63,14 +62,14 @@ def request(query, params):
 def response(resp):
     results = []
 
-    json_regex = regex_json.search(resp.content)
+    json_regex = regex_json.search(resp.text)
 
     # check if results are returned
     if not json_regex:
         return []
 
-    json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
-    json = loads(json_raw)
+    json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group()))
+    json = loads(json_raw.decode('utf-8'))
 
     # parse results
     for result in json['Results'].get('items', []):
@@ -78,7 +77,7 @@ def response(resp):
 
         # parse image results
         if result.get('ContentType', '').startswith('image'):
-            img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
+            img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
 
             # append result
             results.append({'url': result['SourceUrl'],
@@ -100,7 +99,7 @@ def response(resp):
     # parse images
     for result in json.get('Images', []):
         # decode image url
-        img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
+        img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
 
         # append result
         results.append({'url': result['SourceUrl'],

+ 5 - 6
searx/engines/tokyotoshokan.py

@@ -11,11 +11,11 @@
 """
 
 import re
-from urllib import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
 from datetime import datetime
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['files', 'videos', 'music']
@@ -28,8 +28,7 @@ search_url = base_url + 'search.php?{query}'
 
 # do search-request
 def request(query, params):
-    query = urlencode({'page': params['pageno'],
-                       'terms': query})
+    query = urlencode({'page': params['pageno'], 'terms': query})
     params['url'] = search_url.format(query=query)
     return params
 
@@ -50,7 +49,7 @@ def response(resp):
     size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
 
     # processing the results, two rows at a time
-    for i in xrange(0, len(rows), 2):
+    for i in range(0, len(rows), 2):
         # parse the first row
         name_row = rows[i]
 
@@ -79,14 +78,14 @@ def response(resp):
                     groups = size_re.match(item).groups()
                     multiplier = get_filesize_mul(groups[1])
                     params['filesize'] = int(multiplier * float(groups[0]))
-                except Exception as e:
+                except:
                     pass
             elif item.startswith('Date:'):
                 try:
                     # Date: 2016-02-21 21:44 UTC
                     date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
                     params['publishedDate'] = date
-                except Exception as e:
+                except:
                     pass
             elif item.startswith('Comment:'):
                 params['content'] = item

+ 4 - 4
searx/engines/torrentz.py

@@ -12,11 +12,11 @@
 """
 
 import re
-from urllib import urlencode
 from lxml import html
-from searx.engines.xpath import extract_text
 from datetime import datetime
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
+from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['files', 'videos', 'music']
@@ -70,7 +70,7 @@ def response(resp):
             size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
             size, suffix = size_str.split()
             params['filesize'] = int(size) * get_filesize_mul(suffix)
-        except Exception as e:
+        except:
             pass
 
         # does our link contain a valid SHA1 sum?
@@ -84,7 +84,7 @@ def response(resp):
             # Fri, 25 Mar 2016 16:29:01
             date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
             params['publishedDate'] = date
-        except Exception as e:
+        except:
             pass
 
         results.append(params)

+ 4 - 0
searx/engines/translated.py

@@ -9,8 +9,12 @@
  @parse       url, title, content
 """
 import re
+from sys import version_info
 from searx.utils import is_valid_lang
 
+if version_info[0] == 3:
+    unicode = str
+
 categories = ['general']
 url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
 web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'

+ 1 - 2
searx/engines/twitter.py

@@ -12,11 +12,10 @@
  @todo        publishedDate
 """
 
-from urlparse import urljoin
-from urllib import urlencode
 from lxml import html
 from datetime import datetime
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode, urljoin
 
 # engine dependent config
 categories = ['social media']

+ 1 - 1
searx/engines/vimeo.py

@@ -13,8 +13,8 @@
 # @todo        set content-parameter with correct data
 
 from json import loads
-from urllib import urlencode
 from dateutil import parser
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['videos']

+ 5 - 8
searx/engines/wikidata.py

@@ -14,12 +14,11 @@
 from searx import logger
 from searx.poolrequests import get
 from searx.engines.xpath import extract_text
-from searx.utils import format_date_by_locale
 from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
 
 from json import loads
 from lxml.html import fromstring
-from urllib import urlencode
 
 logger = logger.getChild('wikidata')
 result_count = 1
@@ -62,14 +61,13 @@ def request(query, params):
         language = 'en'
 
     params['url'] = url_search.format(
-        query=urlencode({'label': query,
-                        'language': language}))
+        query=urlencode({'label': query, 'language': language}))
     return params
 
 
 def response(resp):
     results = []
-    html = fromstring(resp.content)
+    html = fromstring(resp.text)
     wikidata_ids = html.xpath(wikidata_ids_xpath)
 
     language = resp.search_params['language'].split('-')[0]
@@ -78,10 +76,9 @@ def response(resp):
 
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
     for wikidata_id in wikidata_ids[:result_count]:
-        url = url_detail.format(query=urlencode({'page': wikidata_id,
-                                                'uselang': language}))
+        url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
         htmlresponse = get(url)
-        jsonresponse = loads(htmlresponse.content)
+        jsonresponse = loads(htmlresponse.text)
         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
 
     return results

+ 9 - 12
searx/engines/wikipedia.py

@@ -11,13 +11,12 @@
 """
 
 from json import loads
-from urllib import urlencode, quote
 from lxml.html import fromstring
-
+from searx.url_utils import quote, urlencode
 
 # search-url
-base_url = 'https://{language}.wikipedia.org/'
-search_postfix = 'w/api.php?'\
+base_url = u'https://{language}.wikipedia.org/'
+search_url = base_url + u'w/api.php?'\
     'action=query'\
     '&format=json'\
     '&{query}'\
@@ -37,16 +36,16 @@ def url_lang(lang):
     else:
         language = lang
 
-    return base_url.format(language=language)
+    return language
 
 
 # do search-request
 def request(query, params):
     if query.islower():
-        query += '|' + query.title()
+        query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
 
-    params['url'] = url_lang(params['language']) \
-        + search_postfix.format(query=urlencode({'titles': query}))
+    params['url'] = search_url.format(query=urlencode({'titles': query}),
+                                      language=url_lang(params['language']))
 
     return params
 
@@ -78,7 +77,7 @@ def extract_first_paragraph(content, title, image):
 def response(resp):
     results = []
 
-    search_result = loads(resp.content)
+    search_result = loads(resp.text)
 
     # wikipedia article's unique id
     # first valid id is assumed to be the requested article
@@ -99,11 +98,9 @@ def response(resp):
     extract = page.get('extract')
 
     summary = extract_first_paragraph(extract, title, image)
-    if not summary:
-        return []
 
     # link to wikipedia article
-    wikipedia_link = url_lang(resp.search_params['language']) \
+    wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
         + 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
 
     results.append({'url': wikipedia_link, 'title': title})

+ 6 - 7
searx/engines/wolframalpha_api.py

@@ -8,8 +8,8 @@
 # @stable      yes
 # @parse       url, infobox
 
-from urllib import urlencode
 from lxml import etree
+from searx.url_utils import urlencode
 
 # search-url
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@@ -37,8 +37,7 @@ image_pods = {'VisualRepresentation',
 
 # do search-request
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'input': query}),
-                                      api_key=api_key)
+    params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
     params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
 
     return params
@@ -56,7 +55,7 @@ def replace_pua_chars(text):
                  u'\uf74e': 'i',        # imaginary number
                  u'\uf7d9': '='}        # equals sign
 
-    for k, v in pua_chars.iteritems():
+    for k, v in pua_chars.items():
         text = text.replace(k, v)
 
     return text
@@ -66,7 +65,7 @@ def replace_pua_chars(text):
 def response(resp):
     results = []
 
-    search_results = etree.XML(resp.content)
+    search_results = etree.XML(resp.text)
 
     # return empty array if there are no results
     if search_results.xpath(failure_xpath):
@@ -120,10 +119,10 @@ def response(resp):
     # append infobox
     results.append({'infobox': infobox_title,
                     'attributes': result_chunks,
-                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
+                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
 
     # append link to site
-    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
+    results.append({'url': resp.request.headers['Referer'],
                     'title': title,
                     'content': result_content})
 

+ 4 - 5
searx/engines/wolframalpha_noapi.py

@@ -10,10 +10,9 @@
 
 from json import loads
 from time import time
-from urllib import urlencode
-from lxml.etree import XML
 
 from searx.poolrequests import get as http_get
+from searx.url_utils import urlencode
 
 # search-url
 url = 'https://www.wolframalpha.com/'
@@ -62,7 +61,7 @@ obtain_token()
 # do search-request
 def request(query, params):
     # obtain token if last update was more than an hour
-    if time() - token['last_updated'] > 3600:
+    if time() - (token['last_updated'] or 0) > 3600:
         obtain_token()
     params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
     params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
@@ -112,9 +111,9 @@ def response(resp):
 
     results.append({'infobox': infobox_title,
                     'attributes': result_chunks,
-                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
+                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
 
-    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
+    results.append({'url': resp.request.headers['Referer'],
                     'title': 'Wolfram|Alpha (' + infobox_title + ')',
                     'content': result_content})
 

+ 2 - 4
searx/engines/www1x.py

@@ -10,11 +10,9 @@
  @parse       url, title, thumbnail, img_src, content
 """
 
-from urllib import urlencode
-from urlparse import urljoin
 from lxml import html
-import string
 import re
+from searx.url_utils import urlencode, urljoin
 
 # engine dependent config
 categories = ['images']
@@ -55,7 +53,7 @@ def response(resp):
         cur_element += result_part
 
         # fix xml-error
-        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
+        cur_element = cur_element.replace('"></a>', '"/></a>')
 
         dom = html.fromstring(cur_element)
         link = dom.xpath('//a')[0]

+ 1 - 2
searx/engines/www500px.py

@@ -13,8 +13,7 @@
 """
 
 from json import loads
-from urllib import urlencode
-from urlparse import urljoin
+from searx.url_utils import urlencode, urljoin
 
 # engine dependent config
 categories = ['images']

+ 2 - 2
searx/engines/xpath.py

@@ -1,13 +1,13 @@
 from lxml import html
-from urllib import urlencode, unquote
-from urlparse import urlparse, urljoin
 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
 from searx.utils import html_to_text
+from searx.url_utils import unquote, urlencode, urljoin, urlparse
 
 search_url = None
 url_xpath = None
 content_xpath = None
 title_xpath = None
+paging = False
 suggestion_xpath = ''
 results_xpath = ''
 

+ 1 - 1
searx/engines/yacy.py

@@ -13,8 +13,8 @@
 # @todo        parse video, audio and file results
 
 from json import loads
-from urllib import urlencode
 from dateutil import parser
+from searx.url_utils import urlencode
 
 from searx.utils import html_to_text
 

+ 1 - 2
searx/engines/yahoo.py

@@ -11,10 +11,9 @@
  @parse       url, title, content, suggestion
 """
 
-from urllib import urlencode
-from urlparse import unquote
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
+from searx.url_utils import unquote, urlencode
 
 # engine dependent config
 categories = ['general']

+ 3 - 3
searx/engines/yahoo_news.py

@@ -9,13 +9,13 @@
 # @stable      no (HTML can change)
 # @parse       url, title, content, publishedDate
 
-from urllib import urlencode
+import re
+from datetime import datetime, timedelta
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
-from datetime import datetime, timedelta
-import re
 from dateutil import parser
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['news']

+ 2 - 2
searx/engines/yandex.py

@@ -9,9 +9,9 @@
  @parse       url, title, content
 """
 
-from urllib import urlencode
 from lxml import html
-from searx.search import logger
+from searx import logger
+from searx.url_utils import urlencode
 
 logger = logger.getChild('yandex engine')
 

+ 1 - 1
searx/engines/youtube_api.py

@@ -9,8 +9,8 @@
 # @parse       url, title, content, publishedDate, thumbnail, embedded
 
 from json import loads
-from urllib import urlencode
 from dateutil import parser
+from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['videos', 'music']

+ 1 - 1
searx/engines/youtube_noapi.py

@@ -8,10 +8,10 @@
 # @stable      no
 # @parse       url, title, content, publishedDate, thumbnail, embedded
 
-from urllib import quote_plus
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.utils import list_get
+from searx.url_utils import quote_plus
 
 # engine dependent config
 categories = ['videos', 'music']

+ 4 - 1
searx/plugins/__init__.py

@@ -14,9 +14,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 
 (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
 '''
-from sys import exit
+from sys import exit, version_info
 from searx import logger
 
+if version_info[0] == 3:
+    unicode = str
+
 logger = logger.getChild('plugins')
 
 from searx.plugins import (doai_rewrite,

+ 1 - 1
searx/plugins/doai_rewrite.py

@@ -1,6 +1,6 @@
 from flask_babel import gettext
 import re
-from urlparse import urlparse, parse_qsl
+from searx.url_utils import urlparse, parse_qsl
 
 regex = re.compile(r'10\.\d{4,9}/[^\s]+')
 

+ 4 - 1
searx/plugins/https_rewrite.py

@@ -16,14 +16,17 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 '''
 
 import re
-from urlparse import urlparse
+import sys
 from lxml import etree
 from os import listdir, environ
 from os.path import isfile, isdir, join
 from searx.plugins import logger
 from flask_babel import gettext
 from searx import searx_dir
+from searx.url_utils import urlparse
 
+if sys.version_info[0] == 3:
+    unicode = str
 
 name = "HTTPS rewrite"
 description = gettext('Rewrite HTTP links to HTTPS if possible')

+ 2 - 2
searx/plugins/self_info.py

@@ -22,7 +22,7 @@ default_on = True
 
 
 # Self User Agent regex
-p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
+p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
 
 
 # attach callback to the post search hook
@@ -31,7 +31,7 @@ p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
 def post_search(request, search):
     if search.search_query.pageno > 1:
         return True
-    if search.search_query.query == 'ip':
+    if search.search_query.query == b'ip':
         x_forwarded_for = request.headers.getlist("X-Forwarded-For")
         if x_forwarded_for:
             ip = x_forwarded_for[0]

+ 1 - 1
searx/plugins/tracker_url_remover.py

@@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 
 from flask_babel import gettext
 import re
-from urlparse import urlunparse
+from searx.url_utils import urlunparse
 
 regexes = {re.compile(r'utm_[^&]+&?'),
            re.compile(r'(wkey|wemail)[^&]+&?'),

+ 9 - 9
searx/preferences.py

@@ -23,7 +23,7 @@ class Setting(object):
     def __init__(self, default_value, **kwargs):
         super(Setting, self).__init__()
         self.value = default_value
-        for key, value in kwargs.iteritems():
+        for key, value in kwargs.items():
             setattr(self, key, value)
 
         self._post_init()
@@ -38,7 +38,7 @@ class Setting(object):
         return self.value
 
     def save(self, name, resp):
-        resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE)
+        resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
 
 
 class StringSetting(Setting):
@@ -133,7 +133,7 @@ class MapSetting(Setting):
 
     def save(self, name, resp):
         if hasattr(self, 'key'):
-            resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE)
+            resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE)
 
 
 class SwitchableSetting(Setting):
@@ -194,7 +194,7 @@ class EnginesSetting(SwitchableSetting):
     def _post_init(self):
         super(EnginesSetting, self)._post_init()
         transformed_choices = []
-        for engine_name, engine in self.choices.iteritems():
+        for engine_name, engine in self.choices.items():
             for category in engine.categories:
                 transformed_choice = dict()
                 transformed_choice['default_on'] = not engine.disabled
@@ -241,9 +241,9 @@ class Preferences(object):
                                    'language': SearchLanguageSetting(settings['search']['language'],
                                                                      choices=LANGUAGE_CODES),
                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
-                                                               choices=settings['locales'].keys() + ['']),
+                                                               choices=list(settings['locales'].keys()) + ['']),
                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
-                                                                     choices=autocomplete.backends.keys() + ['']),
+                                                                     choices=list(autocomplete.backends.keys()) + ['']),
                                    'image_proxy': MapSetting(settings['server']['image_proxy'],
                                                              map={'': settings['server']['image_proxy'],
                                                                   '0': False,
@@ -260,7 +260,7 @@ class Preferences(object):
         self.unknown_params = {}
 
     def parse_cookies(self, input_data):
-        for user_setting_name, user_setting in input_data.iteritems():
+        for user_setting_name, user_setting in input_data.items():
             if user_setting_name in self.key_value_settings:
                 self.key_value_settings[user_setting_name].parse(user_setting)
             elif user_setting_name == 'disabled_engines':
@@ -274,7 +274,7 @@ class Preferences(object):
         disabled_engines = []
         enabled_categories = []
         disabled_plugins = []
-        for user_setting_name, user_setting in input_data.iteritems():
+        for user_setting_name, user_setting in input_data.items():
             if user_setting_name in self.key_value_settings:
                 self.key_value_settings[user_setting_name].parse(user_setting)
             elif user_setting_name.startswith('engine_'):
@@ -295,7 +295,7 @@ class Preferences(object):
             return self.key_value_settings[user_setting_name].get_value()
 
     def save(self, resp):
-        for user_setting_name, user_setting in self.key_value_settings.iteritems():
+        for user_setting_name, user_setting in self.key_value_settings.items():
             user_setting.save(user_setting_name, resp)
         self.engines.save(resp)
         self.plugins.save(resp)

+ 6 - 2
searx/query.py

@@ -21,8 +21,12 @@ from searx.languages import language_codes
 from searx.engines import (
     categories, engines, engine_shortcuts
 )
-import string
 import re
+import string
+import sys
+
+if sys.version_info[0] == 3:
+    unicode = str
 
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 
@@ -146,7 +150,7 @@ class SearchQuery(object):
     """container for all the search parameters (query, language, etc...)"""
 
     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
-        self.query = query
+        self.query = query.encode('utf-8')
         self.engines = engines
         self.categories = categories
         self.lang = lang

+ 5 - 1
searx/results.py

@@ -1,9 +1,13 @@
 import re
+import sys
 from collections import defaultdict
 from operator import itemgetter
 from threading import RLock
-from urlparse import urlparse, unquote
 from searx.engines import engines
+from searx.url_utils import urlparse, unquote
+
+if sys.version_info[0] == 3:
+    basestring = str
 
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

+ 10 - 2
searx/search.py

@@ -16,8 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 '''
 
 import gc
+import sys
 import threading
-from thread import start_new_thread
 from time import time
 from uuid import uuid4
 import requests.exceptions
@@ -33,6 +33,14 @@ from searx import logger
 from searx.plugins import plugins
 from searx.exceptions import SearxParameterException
 
+try:
+    from thread import start_new_thread
+except:
+    from _thread import start_new_thread
+
+if sys.version_info[0] == 3:
+    unicode = str
+
 logger = logger.getChild('search')
 
 number_of_searches = 0
@@ -387,7 +395,7 @@ class Search(object):
             request_params['time_range'] = search_query.time_range
 
             # append request to list
-            requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params))
+            requests.append((selected_engine['name'], search_query.query, request_params))
 
             # update timeout_limit
             timeout_limit = max(timeout_limit, engine.timeout)

+ 1 - 1
searx/settings_robot.yml

@@ -17,7 +17,7 @@ server:
 
 ui:
     themes_path : ""
-    default_theme : legacy
+    default_theme : oscar
     default_locale : ""
 
 outgoing:

+ 1 - 1
searx/templates/courgette/404.html

@@ -3,7 +3,7 @@
 <div class="center">
     <h1>{{ _('Page not found') }}</h1>
     {% autoescape false %}
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
     {% endautoescape %}
 </div>
 {% endblock %}

+ 1 - 1
searx/templates/legacy/404.html

@@ -3,7 +3,7 @@
 <div class="center">
     <h1>{{ _('Page not found') }}</h1>
     {% autoescape false %}
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
     {% endautoescape %}
 </div>
 {% endblock %}

+ 1 - 1
searx/templates/oscar/404.html

@@ -3,7 +3,7 @@
 <div class="text-center">
     <h1>{{ _('Page not found') }}</h1>
     {% autoescape false %}
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
     {% endautoescape %}
 </div>
 {% endblock %}

+ 1 - 1
searx/templates/pix-art/404.html

@@ -3,7 +3,7 @@
 <div class="center">
     <h1>{{ _('Page not found') }}</h1>
     {% autoescape false %}
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
     {% endautoescape %}
 </div>
 {% endblock %}

+ 26 - 16
searx/testing.py

@@ -1,13 +1,16 @@
 # -*- coding: utf-8 -*-
 """Shared testing code."""
 
-from plone.testing import Layer
-from unittest2 import TestCase
-from os.path import dirname, join, abspath
-
 
 import os
 import subprocess
+import traceback
+
+
+from os.path import dirname, join, abspath
+
+from splinter import Browser
+from unittest2 import TestCase
 
 
 class SearxTestLayer:
@@ -32,7 +35,7 @@ class SearxTestLayer:
     testTearDown = classmethod(testTearDown)
 
 
-class SearxRobotLayer(Layer):
+class SearxRobotLayer():
     """Searx Robot Test Layer"""
 
     def setUp(self):
@@ -62,7 +65,12 @@ class SearxRobotLayer(Layer):
         del os.environ['SEARX_SETTINGS_PATH']
 
 
-SEARXROBOTLAYER = SearxRobotLayer()
+# SEARXROBOTLAYER = SearxRobotLayer()
+def run_robot_tests(tests):
+    print('Running {0} tests'.format(len(tests)))
+    for test in tests:
+        with Browser() as browser:
+            test(browser)
 
 
 class SearxTestCase(TestCase):
@@ -72,17 +80,19 @@ class SearxTestCase(TestCase):
 
 
 if __name__ == '__main__':
-    from tests.test_robot import test_suite
     import sys
-    from zope.testrunner.runner import Runner
+    # test cases
+    from tests import robot
 
     base_dir = abspath(join(dirname(__file__), '../tests'))
     if sys.argv[1] == 'robot':
-        r = Runner(['--color',
-                    '--auto-progress',
-                    '--stop-on-error',
-                    '--path',
-                    base_dir],
-                   found_suites=[test_suite()])
-        r.run()
-        sys.exit(int(r.failed))
+        test_layer = SearxRobotLayer()
+        errors = False
+        try:
+            test_layer.setUp()
+            run_robot_tests([getattr(robot, x) for x in dir(robot) if x.startswith('test_')])
+        except Exception:
+            errors = True
+            print('Error occured: {0}'.format(traceback.format_exc()))
+        test_layer.tearDown()
+        sys.exit(1 if errors else 0)

+ 28 - 0
searx/url_utils.py

@@ -0,0 +1,28 @@
+from sys import version_info
+
+if version_info[0] == 2:
+    from urllib import quote, quote_plus, unquote, urlencode
+    from urlparse import parse_qsl, urljoin, urlparse, urlunparse, ParseResult
+else:
+    from urllib.parse import (
+        parse_qsl,
+        quote,
+        quote_plus,
+        unquote,
+        urlencode,
+        urljoin,
+        urlparse,
+        urlunparse,
+        ParseResult
+    )
+
+
+__export__ = (parse_qsl,
+              quote,
+              quote_plus,
+              unquote,
+              urlencode,
+              urljoin,
+              urlparse,
+              urlunparse,
+              ParseResult)

+ 18 - 8
searx/utils.py

@@ -1,11 +1,9 @@
-import cStringIO
 import csv
 import os
 import re
 
 from babel.dates import format_date
 from codecs import getincrementalencoder
-from HTMLParser import HTMLParser
 from imp import load_source
 from os.path import splitext, join
 from random import choice
@@ -16,6 +14,19 @@ from searx.languages import language_codes
 from searx import settings
 from searx import logger
 
+try:
+    from cStringIO import StringIO
+except:
+    from io import StringIO
+
+try:
+    from HTMLParser import HTMLParser
+except:
+    from html.parser import HTMLParser
+
+if sys.version_info[0] == 3:
+    unichr = chr
+    unicode = str
 
 logger = logger.getChild('utils')
 
@@ -140,7 +151,7 @@ class UnicodeWriter:
 
     def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
         # Redirect output to a queue
-        self.queue = cStringIO.StringIO()
+        self.queue = StringIO()
         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
         self.stream = f
         self.encoder = getincrementalencoder(encoding)()
@@ -152,14 +163,13 @@ class UnicodeWriter:
                 unicode_row.append(col.encode('utf-8').strip())
             else:
                 unicode_row.append(col)
-        self.writer.writerow(unicode_row)
+        self.writer.writerow([x.decode('utf-8') if hasattr(x, 'decode') else x for x in unicode_row])
         # Fetch UTF-8 output from the queue ...
-        data = self.queue.getvalue()
-        data = data.decode("utf-8")
+        data = self.queue.getvalue().strip('\x00')
         # ... and reencode it into the target encoding
         data = self.encoder.encode(data)
         # write to the target stream
-        self.stream.write(data)
+        self.stream.write(data.decode('utf-8'))
         # empty queue
         self.queue.truncate(0)
 
@@ -231,7 +241,7 @@ def dict_subset(d, properties):
 
 def prettify_url(url, max_length=74):
     if len(url) > max_length:
-        chunk_len = max_length / 2 + 1
+        chunk_len = int(max_length / 2 + 1)
         return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
     else:
         return url

+ 23 - 13
searx/webapp.py

@@ -22,11 +22,12 @@ if __name__ == '__main__':
     from os.path import realpath, dirname
     path.append(realpath(dirname(realpath(__file__)) + '/../'))
 
-import cStringIO
 import hashlib
 import hmac
 import json
 import os
+import sys
+
 import requests
 
 from searx import logger
@@ -42,8 +43,6 @@ except:
     exit(1)
 from cgi import escape
 from datetime import datetime, timedelta
-from urllib import urlencode
-from urlparse import urlparse, urljoin
 from werkzeug.contrib.fixers import ProxyFix
 from flask import (
     Flask, request, render_template, url_for, Response, make_response,
@@ -52,7 +51,7 @@ from flask import (
 from flask_babel import Babel, gettext, format_date, format_decimal
 from flask.json import jsonify
 from searx import settings, searx_dir, searx_debug
-from searx.exceptions import SearxException, SearxParameterException
+from searx.exceptions import SearxParameterException
 from searx.engines import (
     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
 )
@@ -69,6 +68,7 @@ from searx.autocomplete import searx_bang, backends as autocomplete_backends
 from searx.plugins import plugins
 from searx.preferences import Preferences, ValidationException
 from searx.answerers import answerers
+from searx.url_utils import urlencode, urlparse, urljoin
 
 # check if the pyopenssl package is installed.
 # It is needed for SSL connection without trouble, see #298
@@ -78,6 +78,15 @@ except ImportError:
     logger.critical("The pyopenssl package has to be installed.\n"
                     "Some HTTPS connections will fail")
 
+try:
+    from cStringIO import StringIO
+except:
+    from io import StringIO
+
+
+if sys.version_info[0] == 3:
+    unicode = str
+
 # serve pages with HTTP/1.1
 from werkzeug.serving import WSGIRequestHandler
 WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@@ -357,6 +366,8 @@ def render(template_name, override_theme=None, **kwargs):
 
     kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
 
+    kwargs['unicode'] = unicode
+
     kwargs['scripts'] = set()
     for plugin in request.user_plugins:
         for script in plugin.js_dependencies:
@@ -375,7 +386,7 @@ def render(template_name, override_theme=None, **kwargs):
 def pre_request():
     request.errors = []
 
-    preferences = Preferences(themes, categories.keys(), engines, plugins)
+    preferences = Preferences(themes, list(categories.keys()), engines, plugins)
     request.preferences = preferences
     try:
         preferences.parse_cookies(request.cookies)
@@ -479,10 +490,8 @@ def index():
     for result in results:
         if output_format == 'html':
             if 'content' in result and result['content']:
-                result['content'] = highlight_content(escape(result['content'][:1024]),
-                                                      search_query.query.encode('utf-8'))
-            result['title'] = highlight_content(escape(result['title'] or u''),
-                                                search_query.query.encode('utf-8'))
+                result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
+            result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
         else:
             if result.get('content'):
                 result['content'] = html_to_text(result['content']).strip()
@@ -510,7 +519,7 @@ def index():
                     result['publishedDate'] = format_date(result['publishedDate'])
 
     if output_format == 'json':
-        return Response(json.dumps({'query': search_query.query,
+        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
                                     'number_of_results': number_of_results,
                                     'results': results,
                                     'answers': list(result_container.answers),
@@ -519,7 +528,7 @@ def index():
                                     'suggestions': list(result_container.suggestions)}),
                         mimetype='application/json')
     elif output_format == 'csv':
-        csv = UnicodeWriter(cStringIO.StringIO())
+        csv = UnicodeWriter(StringIO())
         keys = ('title', 'url', 'content', 'host', 'engine', 'score')
         csv.writerow(keys)
         for row in results:
@@ -527,7 +536,7 @@ def index():
             csv.writerow([row.get(key, '') for key in keys])
         csv.stream.seek(0)
         response = Response(csv.stream.read(), mimetype='application/csv')
-        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8'))
+        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
         response.headers.add('Content-Disposition', cont_disp)
         return response
     elif output_format == 'rss':
@@ -578,7 +587,7 @@ def autocompleter():
     disabled_engines = request.preferences.engines.get_disabled()
 
     # parse query
-    raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines)
+    raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
     raw_text_query.parse_query()
 
     # check if search query is set
@@ -820,6 +829,7 @@ def page_not_found(e):
 
 
 def run():
+    logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address'])
     app.run(
         debug=searx_debug,
         use_debugger=searx_debug,

+ 75 - 0
tests/robot/__init__.py

@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+
+from time import sleep
+
+url = "http://localhost:11111/"
+
+
+def test_index(browser):
+    # Visit URL
+    browser.visit(url)
+    assert browser.is_text_present('about')
+
+
+def test_404(browser):
+    # Visit URL
+    browser.visit(url + 'missing_link')
+    assert browser.is_text_present('Page not found')
+
+
+def test_about(browser):
+    browser.visit(url)
+    browser.click_link_by_text('about')
+    assert browser.is_text_present('Why use searx?')
+
+
+def test_preferences(browser):
+    browser.visit(url)
+    browser.click_link_by_text('preferences')
+    assert browser.is_text_present('Preferences')
+    assert browser.is_text_present('Cookies')
+
+    assert browser.is_element_present_by_xpath('//label[@for="checkbox_dummy"]')
+
+
+def test_preferences_engine_select(browser):
+    browser.visit(url)
+    browser.click_link_by_text('preferences')
+
+    assert browser.is_element_present_by_xpath('//a[@href="#tab_engine"]')
+    browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
+
+    assert not browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
+    browser.find_by_xpath('//label[@for="engine_general_dummy__general"]').first.check()
+    browser.find_by_xpath('//input[@value="save"]').first.click()
+
+    # waiting for the redirect - without this the test is flaky..
+    sleep(1)
+
+    browser.visit(url)
+    browser.click_link_by_text('preferences')
+    browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
+
+    assert browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
+
+
+def test_preferences_locale(browser):
+    browser.visit(url)
+    browser.click_link_by_text('preferences')
+
+    browser.select('locale', 'hu')
+    browser.find_by_xpath('//input[@value="save"]').first.click()
+
+    # waiting for the redirect - without this the test is flaky..
+    sleep(1)
+
+    browser.visit(url)
+    browser.click_link_by_text('beállítások')
+    browser.is_text_present('Beállítások')
+
+
+def test_search(browser):
+    browser.visit(url)
+    browser.fill('q', 'test search query')
+    browser.find_by_xpath('//button[@type="submit"]').first.click()
+    assert browser.is_text_present('didn\'t find any results')

+ 0 - 153
tests/robot/test_basic.robot

@@ -1,153 +0,0 @@
-*** Settings ***
-Library         Selenium2Library  timeout=10  implicit_wait=0.5
-Test Setup      Open Browser  http://localhost:11111/
-Test Teardown   Close All Browsers
-
-
-*** Keywords ***
-Submit Preferences
-    Set Selenium Speed  2 seconds
-    Submit Form  id=search_form
-    Location Should Be  http://localhost:11111/
-    Set Selenium Speed  0 seconds
-
-
-*** Test Cases ***
-Front page
-    Page Should Contain  about
-    Page Should Contain  preferences
-
-404 page
-    Go To  http://localhost:11111/no-such-page
-    Page Should Contain  Page not found
-    Page Should Contain  Go to search page
-
-About page
-    Click Element  link=about
-    Page Should Contain  Why use searx?
-    Page Should Contain Element  link=search engines
-
-Preferences page
-    Click Element  link=preferences
-    Page Should Contain  Preferences
-    Page Should Contain  Default categories
-    Page Should Contain  Currently used search engines
-    Page Should Contain  dummy dummy
-    Page Should Contain  general dummy
-
-Switch category
-    Go To  http://localhost:11111/preferences
-    Page Should Contain Checkbox  category_general
-    Page Should Contain Checkbox  category_dummy
-    Click Element  xpath=//*[.="general"]
-    Click Element  xpath=//*[.="dummy"]
-    Submit Preferences
-    Checkbox Should Not Be Selected  category_general
-    Checkbox Should Be Selected  category_dummy
-
-Change language
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    Select From List  locale  hu
-    Submit Preferences
-    Page Should Contain  rólunk
-    Page Should Contain  beállítások
-
-Change method
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    Select From List  method  GET
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  method  GET
-    Select From List  method  POST
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  method  POST
-
-Change theme
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  theme  legacy
-    Select From List  theme  oscar
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  theme  oscar
-
-Change safesearch
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  safesearch  None
-    Select From List  safesearch  Strict
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  safesearch  Strict
-
-Change image proxy
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  image_proxy  Disabled
-    Select From List  image_proxy  Enabled
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  image_proxy  Enabled
-
-Change search language
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  language  Default language
-    Select From List  language  Türkçe - tr-TR
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  language  Türkçe - tr-TR
-
-Change autocomplete
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  autocomplete  -
-    Select From List  autocomplete  google
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  autocomplete  google
-
-Change allowed/disabled engines
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    Page Should Contain  Engine name
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy']  Block
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_general_general_dummy']  Block
-    Click Element  xpath=//label[@class="deny"][@for='engine_general_general_dummy']
-    Submit Preferences
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    Page Should Contain  Engine name
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy']  Block
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_general_general_dummy']  \
-
-Block a plugin
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  theme  legacy
-    Select From List  theme  oscar
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  theme  oscar
-    Page Should Contain  Plugins
-    Click Link  Plugins
-    Checkbox Should Not Be Selected  id=plugin_HTTPS_rewrite
-    Click Element  xpath=//label[@for='plugin_HTTPS_rewrite']
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    Page Should Contain  Plugins
-    Click Link  Plugins
-    Checkbox Should Be Selected  id=plugin_HTTPS_rewrite

+ 2 - 2
tests/unit/engines/test_archlinux.py

@@ -25,7 +25,7 @@ class TestArchLinuxEngine(SearxTestCase):
         self.assertTrue(query in params['url'])
         self.assertTrue('wiki.archlinux.org' in params['url'])
 
-        for lang, domain in domains.iteritems():
+        for lang, domain in domains.items():
             dic['language'] = lang
             params = archlinux.request(query, dic)
             self.assertTrue(domain in params['url'])
@@ -102,5 +102,5 @@ class TestArchLinuxEngine(SearxTestCase):
         for exp in expected:
             res = results[i]
             i += 1
-            for key, value in exp.iteritems():
+            for key, value in exp.items():
                 self.assertEqual(res[key], value)

+ 3 - 3
tests/unit/engines/test_bing.py

@@ -7,18 +7,18 @@ from searx.testing import SearxTestCase
 class TestBingEngine(SearxTestCase):
 
     def test_request(self):
-        query = 'test_query'
+        query = u'test_query'
         dicto = defaultdict(dict)
         dicto['pageno'] = 0
         dicto['language'] = 'fr_FR'
-        params = bing.request(query, dicto)
+        params = bing.request(query.encode('utf-8'), dicto)
         self.assertTrue('url' in params)
         self.assertTrue(query in params['url'])
         self.assertTrue('language%3AFR' in params['url'])
         self.assertTrue('bing.com' in params['url'])
 
         dicto['language'] = 'all'
-        params = bing.request(query, dicto)
+        params = bing.request(query.encode('utf-8'), dicto)
         self.assertTrue('language' in params['url'])
 
     def test_response(self):

+ 6 - 6
tests/unit/engines/test_bing_news.py

@@ -36,10 +36,10 @@ class TestBingNewsEngine(SearxTestCase):
         self.assertRaises(AttributeError, bing_news.response, '')
         self.assertRaises(AttributeError, bing_news.response, '[]')
 
-        response = mock.Mock(content='<html></html>')
+        response = mock.Mock(text='<html></html>')
         self.assertEqual(bing_news.response(response), [])
 
-        response = mock.Mock(content='<html></html>')
+        response = mock.Mock(text='<html></html>')
         self.assertEqual(bing_news.response(response), [])
 
         html = """<?xml version="1.0" encoding="utf-8" ?>
@@ -74,7 +74,7 @@ class TestBingNewsEngine(SearxTestCase):
         </item>
     </channel>
 </rss>"""  # noqa
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = bing_news.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 2)
@@ -113,7 +113,7 @@ class TestBingNewsEngine(SearxTestCase):
         </item>
     </channel>
 </rss>"""  # noqa
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = bing_news.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 1)
@@ -136,11 +136,11 @@ class TestBingNewsEngine(SearxTestCase):
     </channel>
 </rss>"""  # noqa
 
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = bing_news.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 0)
 
         html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)

+ 6 - 6
tests/unit/engines/test_btdigg.py

@@ -22,10 +22,10 @@ class TestBtdiggEngine(SearxTestCase):
         self.assertRaises(AttributeError, btdigg.response, '')
         self.assertRaises(AttributeError, btdigg.response, '[]')
 
-        response = mock.Mock(content='<html></html>')
+        response = mock.Mock(text='<html></html>')
         self.assertEqual(btdigg.response(response), [])
 
-        html = """
+        html = u"""
         <div id="search_res">
             <table>
                 <tr>
@@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase):
             </table>
         </div>
         """
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = btdigg.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 1)
@@ -101,12 +101,12 @@ class TestBtdiggEngine(SearxTestCase):
             </table>
         </div>
         """
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = btdigg.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 0)
 
-        html = """
+        html = u"""
         <div id="search_res">
             <table>
                 <tr>
@@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase):
             </table>
         </div>
         """
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = btdigg.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 5)

Some files were not shown because too many files changed in this diff