Browse Source

Merge pull request #913 from asciimoo/py3

Add Python3 compatibility
Adam Tauber 8 years ago
parent
commit
4cffd78650
100 changed files with 448 additions and 446 deletions
  1. 3 2
      .travis.yml
  2. 1 2
      requirements-dev.txt
  3. 8 4
      searx/answerers/__init__.py
  4. 9 4
      searx/answerers/random/answerer.py
  5. 10 6
      searx/answerers/statistics/answerer.py
  6. 5 1
      searx/autocomplete.py
  7. 1 2
      searx/engines/1337x.py
  8. 2 3
      searx/engines/__init__.py
  9. 1 2
      searx/engines/archlinux.py
  10. 3 3
      searx/engines/base.py
  11. 1 1
      searx/engines/bing.py
  12. 1 1
      searx/engines/bing_images.py
  13. 2 3
      searx/engines/bing_news.py
  14. 1 1
      searx/engines/blekko_images.py
  15. 2 3
      searx/engines/btdigg.py
  16. 9 5
      searx/engines/currency_convert.py
  17. 1 2
      searx/engines/dailymotion.py
  18. 2 3
      searx/engines/deezer.py
  19. 1 1
      searx/engines/deviantart.py
  20. 3 3
      searx/engines/dictzone.py
  21. 6 2
      searx/engines/digbt.py
  22. 2 2
      searx/engines/digg.py
  23. 1 1
      searx/engines/doku.py
  24. 1 1
      searx/engines/duckduckgo.py
  25. 3 3
      searx/engines/duckduckgo_definitions.py
  26. 1 1
      searx/engines/faroo.py
  27. 3 4
      searx/engines/fdroid.py
  28. 7 4
      searx/engines/filecrop.py
  29. 1 1
      searx/engines/flickr.py
  30. 1 1
      searx/engines/flickr_noapi.py
  31. 1 3
      searx/engines/framalibre.py
  32. 1 1
      searx/engines/frinkiac.py
  33. 1 2
      searx/engines/gigablast.py
  34. 1 1
      searx/engines/github.py
  35. 2 3
      searx/engines/google.py
  36. 1 1
      searx/engines/google_images.py
  37. 1 2
      searx/engines/google_news.py
  38. 7 3
      searx/engines/ina.py
  39. 8 3
      searx/engines/json_engine.py
  40. 1 2
      searx/engines/kickass.py
  41. 1 1
      searx/engines/mediawiki.py
  42. 1 1
      searx/engines/mixcloud.py
  43. 1 1
      searx/engines/nyaa.py
  44. 0 4
      searx/engines/openstreetmap.py
  45. 1 1
      searx/engines/photon.py
  46. 1 2
      searx/engines/piratebay.py
  47. 1 2
      searx/engines/qwant.py
  48. 2 4
      searx/engines/reddit.py
  49. 1 3
      searx/engines/scanr_structures.py
  50. 2 3
      searx/engines/searchcode_code.py
  51. 2 3
      searx/engines/searchcode_doc.py
  52. 1 3
      searx/engines/seedpeer.py
  53. 12 7
      searx/engines/soundcloud.py
  54. 2 3
      searx/engines/spotify.py
  55. 2 4
      searx/engines/stackoverflow.py
  56. 1 1
      searx/engines/startpage.py
  57. 1 1
      searx/engines/subtitleseeker.py
  58. 13 14
      searx/engines/swisscows.py
  59. 5 6
      searx/engines/tokyotoshokan.py
  60. 4 4
      searx/engines/torrentz.py
  61. 4 0
      searx/engines/translated.py
  62. 1 2
      searx/engines/twitter.py
  63. 1 1
      searx/engines/vimeo.py
  64. 5 8
      searx/engines/wikidata.py
  65. 9 12
      searx/engines/wikipedia.py
  66. 6 7
      searx/engines/wolframalpha_api.py
  67. 4 5
      searx/engines/wolframalpha_noapi.py
  68. 2 4
      searx/engines/www1x.py
  69. 1 2
      searx/engines/www500px.py
  70. 2 2
      searx/engines/xpath.py
  71. 1 1
      searx/engines/yacy.py
  72. 1 2
      searx/engines/yahoo.py
  73. 3 3
      searx/engines/yahoo_news.py
  74. 2 2
      searx/engines/yandex.py
  75. 1 1
      searx/engines/youtube_api.py
  76. 1 1
      searx/engines/youtube_noapi.py
  77. 4 1
      searx/plugins/__init__.py
  78. 1 1
      searx/plugins/doai_rewrite.py
  79. 4 1
      searx/plugins/https_rewrite.py
  80. 2 2
      searx/plugins/self_info.py
  81. 1 1
      searx/plugins/tracker_url_remover.py
  82. 9 9
      searx/preferences.py
  83. 6 2
      searx/query.py
  84. 5 1
      searx/results.py
  85. 10 2
      searx/search.py
  86. 1 1
      searx/settings_robot.yml
  87. 1 1
      searx/templates/courgette/404.html
  88. 1 1
      searx/templates/legacy/404.html
  89. 1 1
      searx/templates/oscar/404.html
  90. 1 1
      searx/templates/pix-art/404.html
  91. 26 16
      searx/testing.py
  92. 28 0
      searx/url_utils.py
  93. 18 8
      searx/utils.py
  94. 23 13
      searx/webapp.py
  95. 75 0
      tests/robot/__init__.py
  96. 0 153
      tests/robot/test_basic.robot
  97. 2 2
      tests/unit/engines/test_archlinux.py
  98. 3 3
      tests/unit/engines/test_bing.py
  99. 6 6
      tests/unit/engines/test_bing_news.py
  100. 6 6
      tests/unit/engines/test_btdigg.py

+ 3 - 2
.travis.yml

@@ -9,6 +9,7 @@ addons:
 language: python
 language: python
 python:
 python:
   - "2.7"
   - "2.7"
+  - "3.6"
 before_install:
 before_install:
   - "export DISPLAY=:99.0"
   - "export DISPLAY=:99.0"
   - "sh -e /etc/init.d/xvfb start"
   - "sh -e /etc/init.d/xvfb start"
@@ -24,9 +25,9 @@ script:
   - ./manage.sh styles
   - ./manage.sh styles
   - ./manage.sh grunt_build
   - ./manage.sh grunt_build
   - ./manage.sh tests
   - ./manage.sh tests
-  - ./manage.sh py_test_coverage
 after_success:
 after_success:
-  coveralls
+  - ./manage.sh py_test_coverage
+  - coveralls
 notifications:
 notifications:
   irc:
   irc:
     channels:
     channels:

+ 1 - 2
requirements-dev.txt

@@ -3,8 +3,7 @@ mock==2.0.0
 nose2[coverage-plugin]
 nose2[coverage-plugin]
 pep8==1.7.0
 pep8==1.7.0
 plone.testing==5.0.0
 plone.testing==5.0.0
-robotframework-selenium2library==1.8.0
-robotsuite==1.7.0
+splinter==0.7.5
 transifex-client==0.12.2
 transifex-client==0.12.2
 unittest2==1.1.0
 unittest2==1.1.0
 zope.testrunner==4.5.1
 zope.testrunner==4.5.1

+ 8 - 4
searx/answerers/__init__.py

@@ -1,8 +1,12 @@
 from os import listdir
 from os import listdir
 from os.path import realpath, dirname, join, isdir
 from os.path import realpath, dirname, join, isdir
+from sys import version_info
 from searx.utils import load_module
 from searx.utils import load_module
 from collections import defaultdict
 from collections import defaultdict
 
 
+if version_info[0] == 3:
+    unicode = str
+
 
 
 answerers_dir = dirname(realpath(__file__))
 answerers_dir = dirname(realpath(__file__))
 
 
@@ -10,7 +14,7 @@ answerers_dir = dirname(realpath(__file__))
 def load_answerers():
 def load_answerers():
     answerers = []
     answerers = []
     for filename in listdir(answerers_dir):
     for filename in listdir(answerers_dir):
-        if not isdir(join(answerers_dir, filename)):
+        if not isdir(join(answerers_dir, filename)) or filename.startswith('_'):
             continue
             continue
         module = load_module('answerer.py', join(answerers_dir, filename))
         module = load_module('answerer.py', join(answerers_dir, filename))
         if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
         if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords):
@@ -30,12 +34,12 @@ def get_answerers_by_keywords(answerers):
 
 
 def ask(query):
 def ask(query):
     results = []
     results = []
-    query_parts = filter(None, query.query.split())
+    query_parts = list(filter(None, query.query.split()))
 
 
-    if query_parts[0] not in answerers_by_keywords:
+    if query_parts[0].decode('utf-8') not in answerers_by_keywords:
         return results
         return results
 
 
-    for answerer in answerers_by_keywords[query_parts[0]]:
+    for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
         result = answerer(query)
         result = answerer(query)
         if result:
         if result:
             results.append(result)
             results.append(result)

+ 9 - 4
searx/answerers/random/answerer.py

@@ -1,5 +1,6 @@
 import random
 import random
 import string
 import string
+import sys
 from flask_babel import gettext
 from flask_babel import gettext
 
 
 # required answerer attribute
 # required answerer attribute
@@ -8,7 +9,11 @@ keywords = ('random',)
 
 
 random_int_max = 2**31
 random_int_max = 2**31
 
 
-random_string_letters = string.lowercase + string.digits + string.uppercase
+if sys.version_info[0] == 2:
+    random_string_letters = string.lowercase + string.digits + string.uppercase
+else:
+    unicode = str
+    random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
 
 
 
 
 def random_string():
 def random_string():
@@ -24,9 +29,9 @@ def random_int():
     return unicode(random.randint(-random_int_max, random_int_max))
     return unicode(random.randint(-random_int_max, random_int_max))
 
 
 
 
-random_types = {u'string': random_string,
-                u'int': random_int,
-                u'float': random_float}
+random_types = {b'string': random_string,
+                b'int': random_int,
+                b'float': random_float}
 
 
 
 
 # required answerer function
 # required answerer function

+ 10 - 6
searx/answerers/statistics/answerer.py

@@ -1,8 +1,12 @@
+from sys import version_info
 from functools import reduce
 from functools import reduce
 from operator import mul
 from operator import mul
 
 
 from flask_babel import gettext
 from flask_babel import gettext
 
 
+if version_info[0] == 3:
+    unicode = str
+
 keywords = ('min',
 keywords = ('min',
             'max',
             'max',
             'avg',
             'avg',
@@ -19,22 +23,22 @@ def answer(query):
         return []
         return []
 
 
     try:
     try:
-        args = map(float, parts[1:])
+        args = list(map(float, parts[1:]))
     except:
     except:
         return []
         return []
 
 
     func = parts[0]
     func = parts[0]
     answer = None
     answer = None
 
 
-    if func == 'min':
+    if func == b'min':
         answer = min(args)
         answer = min(args)
-    elif func == 'max':
+    elif func == b'max':
         answer = max(args)
         answer = max(args)
-    elif func == 'avg':
+    elif func == b'avg':
         answer = sum(args) / len(args)
         answer = sum(args) / len(args)
-    elif func == 'sum':
+    elif func == b'sum':
         answer = sum(args)
         answer = sum(args)
-    elif func == 'prod':
+    elif func == b'prod':
         answer = reduce(mul, args, 1)
         answer = reduce(mul, args, 1)
 
 
     if answer is None:
     if answer is None:

+ 5 - 1
searx/autocomplete.py

@@ -18,7 +18,6 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 
 
 from lxml import etree
 from lxml import etree
 from json import loads
 from json import loads
-from urllib import urlencode
 from searx import settings
 from searx import settings
 from searx.languages import language_codes
 from searx.languages import language_codes
 from searx.engines import (
 from searx.engines import (
@@ -26,6 +25,11 @@ from searx.engines import (
 )
 )
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
 
 
+try:
+    from urllib import urlencode
+except:
+    from urllib.parse import urlencode
+
 
 
 def get(*args, **kwargs):
 def get(*args, **kwargs):
     if 'timeout' not in kwargs:
     if 'timeout' not in kwargs:

+ 1 - 2
searx/engines/1337x.py

@@ -1,8 +1,7 @@
-from urllib import quote
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
-from urlparse import urljoin
+from searx.url_utils import quote, urljoin
 
 
 url = 'https://1337x.to/'
 url = 'https://1337x.to/'
 search_url = url + 'search/{search_term}/{pageno}/'
 search_url = url + 'search/{search_term}/{pageno}/'

+ 2 - 3
searx/engines/__init__.py

@@ -72,12 +72,11 @@ def load_engine(engine_data):
             if engine_data['categories'] == 'none':
             if engine_data['categories'] == 'none':
                 engine.categories = []
                 engine.categories = []
             else:
             else:
-                engine.categories = map(
-                    str.strip, engine_data['categories'].split(','))
+                engine.categories = list(map(str.strip, engine_data['categories'].split(',')))
             continue
             continue
         setattr(engine, param_name, engine_data[param_name])
         setattr(engine, param_name, engine_data[param_name])
 
 
-    for arg_name, arg_value in engine_default_args.iteritems():
+    for arg_name, arg_value in engine_default_args.items():
         if not hasattr(engine, arg_name):
         if not hasattr(engine, arg_name):
             setattr(engine, arg_name, arg_value)
             setattr(engine, arg_name, arg_value)
 
 

+ 1 - 2
searx/engines/archlinux.py

@@ -11,10 +11,9 @@
  @parse        url, title
  @parse        url, title
 """
 """
 
 
-from urlparse import urljoin
-from urllib import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 3 - 3
searx/engines/base.py

@@ -14,10 +14,10 @@
 """
 """
 
 
 from lxml import etree
 from lxml import etree
-from urllib import urlencode
-from searx.utils import searx_useragent
 from datetime import datetime
 from datetime import datetime
 import re
 import re
+from searx.url_utils import urlencode
+from searx.utils import searx_useragent
 
 
 
 
 categories = ['science']
 categories = ['science']
@@ -73,7 +73,7 @@ def request(query, params):
 def response(resp):
 def response(resp):
     results = []
     results = []
 
 
-    search_results = etree.XML(resp.content)
+    search_results = etree.XML(resp.text)
 
 
     for entry in search_results.xpath('./result/doc'):
     for entry in search_results.xpath('./result/doc'):
         content = "No description available"
         content = "No description available"

+ 1 - 1
searx/engines/bing.py

@@ -13,9 +13,9 @@
  @todo        publishedDate
  @todo        publishedDate
 """
 """
 
 
-from urllib import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']

+ 1 - 1
searx/engines/bing_images.py

@@ -15,11 +15,11 @@
               limited response to 10 images
               limited response to 10 images
 """
 """
 
 
-from urllib import urlencode
 from lxml import html
 from lxml import html
 from json import loads
 from json import loads
 import re
 import re
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']

+ 2 - 3
searx/engines/bing_news.py

@@ -11,13 +11,12 @@
  @parse       url, title, content, publishedDate, thumbnail
  @parse       url, title, content, publishedDate, thumbnail
 """
 """
 
 
-from urllib import urlencode
-from urlparse import urlparse, parse_qsl
 from datetime import datetime
 from datetime import datetime
 from dateutil import parser
 from dateutil import parser
 from lxml import etree
 from lxml import etree
 from searx.utils import list_get
 from searx.utils import list_get
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode, urlparse, parse_qsl
 
 
 # engine dependent config
 # engine dependent config
 categories = ['news']
 categories = ['news']
@@ -86,7 +85,7 @@ def request(query, params):
 def response(resp):
 def response(resp):
     results = []
     results = []
 
 
-    rss = etree.fromstring(resp.content)
+    rss = etree.fromstring(resp.text)
 
 
     ns = rss.nsmap
     ns = rss.nsmap
 
 

+ 1 - 1
searx/engines/blekko_images.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']

+ 2 - 3
searx/engines/btdigg.py

@@ -10,11 +10,10 @@
  @parse       url, title, content, seed, leech, magnetlink
  @parse       url, title, content, seed, leech, magnetlink
 """
 """
 
 
-from urlparse import urljoin
-from urllib import quote
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import quote, urljoin
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
 
 
 # engine dependent config
 # engine dependent config
@@ -38,7 +37,7 @@ def request(query, params):
 def response(resp):
 def response(resp):
     results = []
     results = []
 
 
-    dom = html.fromstring(resp.content)
+    dom = html.fromstring(resp.text)
 
 
     search_res = dom.xpath('//div[@id="search_res"]/table/tr')
     search_res = dom.xpath('//div[@id="search_res"]/table/tr')
 
 

+ 9 - 5
searx/engines/currency_convert.py

@@ -1,21 +1,25 @@
-from datetime import datetime
+import json
 import re
 import re
 import os
 import os
-import json
+import sys
 import unicodedata
 import unicodedata
 
 
+from datetime import datetime
+
+if sys.version_info[0] == 3:
+    unicode = str
 
 
 categories = []
 categories = []
 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
 weight = 100
 weight = 100
 
 
-parser_re = re.compile(u'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)  # noqa
+parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
 
 
 db = 1
 db = 1
 
 
 
 
 def normalize_name(name):
 def normalize_name(name):
-    name = name.lower().replace('-', ' ').rstrip('s')
+    name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
     name = re.sub(' +', ' ', name)
     name = re.sub(' +', ' ', name)
     return unicodedata.normalize('NFKD', name).lower()
     return unicodedata.normalize('NFKD', name).lower()
 
 
@@ -35,7 +39,7 @@ def iso4217_to_name(iso4217, language):
 
 
 
 
 def request(query, params):
 def request(query, params):
-    m = parser_re.match(unicode(query, 'utf8'))
+    m = parser_re.match(query)
     if not m:
     if not m:
         # wrong query
         # wrong query
         return params
         return params

+ 1 - 2
searx/engines/dailymotion.py

@@ -12,10 +12,9 @@
  @todo        set content-parameter with correct data
  @todo        set content-parameter with correct data
 """
 """
 
 
-from urllib import urlencode
 from json import loads
 from json import loads
 from datetime import datetime
 from datetime import datetime
-from requests import get
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']

+ 2 - 3
searx/engines/deezer.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']
@@ -30,8 +30,7 @@ embedded_url = '<iframe scrolling="no" frameborder="0" allowTransparency="true"
 def request(query, params):
 def request(query, params):
     offset = (params['pageno'] - 1) * 25
     offset = (params['pageno'] - 1) * 25
 
 
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      offset=offset)
+    params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
 
 
     return params
     return params
 
 

+ 1 - 1
searx/engines/deviantart.py

@@ -12,10 +12,10 @@
  @todo        rewrite to api
  @todo        rewrite to api
 """
 """
 
 
-from urllib import urlencode
 from lxml import html
 from lxml import html
 import re
 import re
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']

+ 3 - 3
searx/engines/dictzone.py

@@ -10,20 +10,20 @@
 """
 """
 
 
 import re
 import re
-from urlparse import urljoin
 from lxml import html
 from lxml import html
 from searx.utils import is_valid_lang
 from searx.utils import is_valid_lang
+from searx.url_utils import urljoin
 
 
 categories = ['general']
 categories = ['general']
 url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
 url = u'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
 weight = 100
 weight = 100
 
 
-parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
+parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
 results_xpath = './/table[@id="r"]/tr'
 results_xpath = './/table[@id="r"]/tr'
 
 
 
 
 def request(query, params):
 def request(query, params):
-    m = parser_re.match(unicode(query, 'utf8'))
+    m = parser_re.match(query)
     if not m:
     if not m:
         return params
         return params
 
 

+ 6 - 2
searx/engines/digbt.py

@@ -10,10 +10,14 @@
  @parse       url, title, content, magnetlink
  @parse       url, title, content, magnetlink
 """
 """
 
 
-from urlparse import urljoin
+from sys import version_info
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
+from searx.url_utils import urljoin
+
+if version_info[0] == 3:
+    unicode = str
 
 
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']
 paging = True
 paging = True
@@ -31,7 +35,7 @@ def request(query, params):
 
 
 
 
 def response(resp):
 def response(resp):
-    dom = html.fromstring(resp.content)
+    dom = html.fromstring(resp.text)
     search_res = dom.xpath('.//td[@class="x-item"]')
     search_res = dom.xpath('.//td[@class="x-item"]')
 
 
     if not search_res:
     if not search_res:

+ 2 - 2
searx/engines/digg.py

@@ -10,10 +10,10 @@
  @parse       url, title, content, publishedDate, thumbnail
  @parse       url, title, content, publishedDate, thumbnail
 """
 """
 
 
-from urllib import quote_plus
+from dateutil import parser
 from json import loads
 from json import loads
 from lxml import html
 from lxml import html
-from dateutil import parser
+from searx.url_utils import quote_plus
 
 
 # engine dependent config
 # engine dependent config
 categories = ['news', 'social media']
 categories = ['news', 'social media']

+ 1 - 1
searx/engines/doku.py

@@ -9,9 +9,9 @@
 # @stable      yes
 # @stable      yes
 # @parse       (general)    url, title, content
 # @parse       (general)    url, title, content
 
 
-from urllib import urlencode
 from lxml.html import fromstring
 from lxml.html import fromstring
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'

+ 1 - 1
searx/engines/duckduckgo.py

@@ -13,11 +13,11 @@
  @todo        rewrite to api
  @todo        rewrite to api
 """
 """
 
 
-from urllib import urlencode
 from lxml.html import fromstring
 from lxml.html import fromstring
 from requests import get
 from requests import get
 from json import loads
 from json import loads
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']

+ 3 - 3
searx/engines/duckduckgo_definitions.py

@@ -1,10 +1,10 @@
 import json
 import json
-from urllib import urlencode
-from re import compile, sub
 from lxml import html
 from lxml import html
-from searx.utils import html_to_text
+from re import compile
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
+from searx.utils import html_to_text
 
 
 url = 'https://api.duckduckgo.com/'\
 url = 'https://api.duckduckgo.com/'\
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'

+ 1 - 1
searx/engines/faroo.py

@@ -10,10 +10,10 @@
  @parse       url, title, content, publishedDate, img_src
  @parse       url, title, content, publishedDate, img_src
 """
 """
 
 
-from urllib import urlencode
 from json import loads
 from json import loads
 import datetime
 import datetime
 from searx.utils import searx_useragent
 from searx.utils import searx_useragent
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general', 'news']
 categories = ['general', 'news']

+ 3 - 4
searx/engines/fdroid.py

@@ -9,9 +9,9 @@
  @parse        url, title, content
  @parse        url, title, content
 """
 """
 
 
-from urllib import urlencode
-from searx.engines.xpath import extract_text
 from lxml import html
 from lxml import html
+from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['files']
 categories = ['files']
@@ -24,8 +24,7 @@ search_url = base_url + 'repository/browse/?{query}'
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
-    query = urlencode({'fdfilter': query,
-                       'fdpage': params['pageno']})
+    query = urlencode({'fdfilter': query, 'fdpage': params['pageno']})
     params['url'] = search_url.format(query=query)
     params['url'] = search_url.format(query=query)
     return params
     return params
 
 

+ 7 - 4
searx/engines/filecrop.py

@@ -1,5 +1,9 @@
-from urllib import urlencode
-from HTMLParser import HTMLParser
+from searx.url_utils import urlencode
+
+try:
+    from HTMLParser import HTMLParser
+except:
+    from html.parser import HTMLParser
 
 
 url = 'http://www.filecrop.com/'
 url = 'http://www.filecrop.com/'
 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa
 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa
@@ -73,8 +77,7 @@ class FilecropResultParser(HTMLParser):
 
 
 def request(query, params):
 def request(query, params):
     index = 1 + (params['pageno'] - 1) * 30
     index = 1 + (params['pageno'] - 1) * 30
-    params['url'] = search_url.format(query=urlencode({'w': query}),
-                                      index=index)
+    params['url'] = search_url.format(query=urlencode({'w': query}), index=index)
     return params
     return params
 
 
 
 

+ 1 - 1
searx/engines/flickr.py

@@ -13,8 +13,8 @@
  More info on api-key : https://www.flickr.com/services/apps/create/
  More info on api-key : https://www.flickr.com/services/apps/create/
 """
 """
 
 
-from urllib import urlencode
 from json import loads
 from json import loads
+from searx.url_utils import urlencode
 
 
 categories = ['images']
 categories = ['images']
 
 

+ 1 - 1
searx/engines/flickr_noapi.py

@@ -12,11 +12,11 @@
  @parse       url, title, thumbnail, img_src
  @parse       url, title, thumbnail, img_src
 """
 """
 
 
-from urllib import urlencode
 from json import loads
 from json import loads
 from time import time
 from time import time
 import re
 import re
 from searx.engines import logger
 from searx.engines import logger
+from searx.url_utils import urlencode
 
 
 
 
 logger = logger.getChild('flickr-noapi')
 logger = logger.getChild('flickr-noapi')

+ 1 - 3
searx/engines/framalibre.py

@@ -10,12 +10,10 @@
  @parse       url, title, content, thumbnail, img_src
  @parse       url, title, content, thumbnail, img_src
 """
 """
 
 
-from urlparse import urljoin
 from cgi import escape
 from cgi import escape
-from urllib import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from dateutil import parser
+from searx.url_utils import urljoin, urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 1 - 1
searx/engines/frinkiac.py

@@ -10,7 +10,7 @@ Frinkiac (Images)
 """
 """
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
 
 
 categories = ['images']
 categories = ['images']
 
 

+ 1 - 2
searx/engines/gigablast.py

@@ -11,10 +11,9 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from random import randint
 from time import time
 from time import time
-from urllib import urlencode
 from lxml.html import fromstring
 from lxml.html import fromstring
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']

+ 1 - 1
searx/engines/github.py

@@ -10,8 +10,8 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 
 
-from urllib import urlencode
 from json import loads
 from json import loads
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 2 - 3
searx/engines/google.py

@@ -9,11 +9,10 @@
 # @parse       url, title, content, suggestion
 # @parse       url, title, content, suggestion
 
 
 import re
 import re
-from urllib import urlencode
-from urlparse import urlparse, parse_qsl
 from lxml import html, etree
 from lxml import html, etree
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.xpath import extract_text, extract_url
-from searx.search import logger
+from searx import logger
+from searx.url_utils import urlencode, urlparse, parse_qsl
 
 
 logger = logger.getChild('google engine')
 logger = logger.getChild('google engine')
 
 

+ 1 - 1
searx/engines/google_images.py

@@ -11,9 +11,9 @@
 """
 """
 
 
 from datetime import date, timedelta
 from datetime import date, timedelta
-from urllib import urlencode
 from json import loads
 from json import loads
 from lxml import html
 from lxml import html
+from searx.url_utils import urlencode
 
 
 
 
 # engine dependent config
 # engine dependent config

+ 1 - 2
searx/engines/google_news.py

@@ -11,9 +11,8 @@
 """
 """
 
 
 from lxml import html
 from lxml import html
-from urllib import urlencode
-from json import loads
 from searx.engines.google import _fetch_supported_languages, supported_languages_url
 from searx.engines.google import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
 
 
 # search-url
 # search-url
 categories = ['news']
 categories = ['news']

+ 7 - 3
searx/engines/ina.py

@@ -12,11 +12,15 @@
 # @todo        embedded (needs some md5 from video page)
 # @todo        embedded (needs some md5 from video page)
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
 from lxml import html
 from lxml import html
-from HTMLParser import HTMLParser
-from searx.engines.xpath import extract_text
 from dateutil import parser
 from dateutil import parser
+from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
+
+try:
+    from HTMLParser import HTMLParser
+except:
+    from html.parser import HTMLParser
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']

+ 8 - 3
searx/engines/json_engine.py

@@ -1,11 +1,16 @@
-from urllib import urlencode
-from json import loads
 from collections import Iterable
 from collections import Iterable
+from json import loads
+from sys import version_info
+from searx.url_utils import urlencode
+
+if version_info[0] == 3:
+    unicode = str
 
 
 search_url = None
 search_url = None
 url_query = None
 url_query = None
 content_query = None
 content_query = None
 title_query = None
 title_query = None
+paging = False
 suggestion_query = ''
 suggestion_query = ''
 results_query = ''
 results_query = ''
 
 
@@ -20,7 +25,7 @@ first_page_num = 1
 
 
 def iterate(iterable):
 def iterate(iterable):
     if type(iterable) == dict:
     if type(iterable) == dict:
-        it = iterable.iteritems()
+        it = iterable.items()
 
 
     else:
     else:
         it = enumerate(iterable)
         it = enumerate(iterable)

+ 1 - 2
searx/engines/kickass.py

@@ -10,12 +10,11 @@
  @parse       url, title, content, seed, leech, magnetlink
  @parse       url, title, content, seed, leech, magnetlink
 """
 """
 
 
-from urlparse import urljoin
-from urllib import quote
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size, convert_str_to_int
 from searx.utils import get_torrent_size, convert_str_to_int
+from searx.url_utils import quote, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']

+ 1 - 1
searx/engines/mediawiki.py

@@ -14,7 +14,7 @@
 
 
 from json import loads
 from json import loads
 from string import Formatter
 from string import Formatter
-from urllib import urlencode, quote
+from searx.url_utils import urlencode, quote
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']

+ 1 - 1
searx/engines/mixcloud.py

@@ -11,8 +11,8 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
 from dateutil import parser
 from dateutil import parser
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']

+ 1 - 1
searx/engines/nyaa.py

@@ -9,9 +9,9 @@
  @parse        url, title, content, seed, leech, torrentfile
  @parse        url, title, content, seed, leech, torrentfile
 """
 """
 
 
-from urllib import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['files', 'images', 'videos', 'music']
 categories = ['files', 'images', 'videos', 'music']

+ 0 - 4
searx/engines/openstreetmap.py

@@ -11,7 +11,6 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from searx.utils import searx_useragent
 
 
 # engine dependent config
 # engine dependent config
 categories = ['map']
 categories = ['map']
@@ -27,9 +26,6 @@ result_base_url = 'https://openstreetmap.org/{osm_type}/{osm_id}'
 def request(query, params):
 def request(query, params):
     params['url'] = base_url + search_string.format(query=query)
     params['url'] = base_url + search_string.format(query=query)
 
 
-    # using searx User-Agent
-    params['headers']['User-Agent'] = searx_useragent()
-
     return params
     return params
 
 
 
 

+ 1 - 1
searx/engines/photon.py

@@ -10,9 +10,9 @@
  @parse       url, title
  @parse       url, title
 """
 """
 
 
-from urllib import urlencode
 from json import loads
 from json import loads
 from searx.utils import searx_useragent
 from searx.utils import searx_useragent
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['map']
 categories = ['map']

+ 1 - 2
searx/engines/piratebay.py

@@ -8,11 +8,10 @@
 # @stable      yes (HTML can change)
 # @stable      yes (HTML can change)
 # @parse       url, title, content, seed, leech, magnetlink
 # @parse       url, title, content, seed, leech, magnetlink
 
 
-from urlparse import urljoin
-from urllib import quote
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import quote, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']

+ 1 - 2
searx/engines/qwant.py

@@ -12,9 +12,8 @@
 
 
 from datetime import datetime
 from datetime import datetime
 from json import loads
 from json import loads
-from urllib import urlencode
-
 from searx.utils import html_to_text
 from searx.utils import html_to_text
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = None
 categories = None

+ 2 - 4
searx/engines/reddit.py

@@ -11,9 +11,8 @@
 """
 """
 
 
 import json
 import json
-from urllib import urlencode
-from urlparse import urlparse, urljoin
 from datetime import datetime
 from datetime import datetime
+from searx.url_utils import urlencode, urljoin, urlparse
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general', 'images', 'news', 'social media']
 categories = ['general', 'images', 'news', 'social media']
@@ -26,8 +25,7 @@ search_url = base_url + 'search.json?{query}'
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
-    query = urlencode({'q': query,
-                       'limit': page_size})
+    query = urlencode({'q': query, 'limit': page_size})
     params['url'] = search_url.format(query=query)
     params['url'] = search_url.format(query=query)
 
 
     return params
     return params

+ 1 - 3
searx/engines/scanr_structures.py

@@ -10,9 +10,7 @@
  @parse       url, title, content, img_src
  @parse       url, title, content, img_src
 """
 """
 
 
-from urllib import urlencode
 from json import loads, dumps
 from json import loads, dumps
-from dateutil import parser
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 
 # engine dependent config
 # engine dependent config
@@ -48,7 +46,7 @@ def response(resp):
     search_res = loads(resp.text)
     search_res = loads(resp.text)
 
 
     # return empty array if there are no results
     # return empty array if there are no results
-    if search_res.get('total') < 1:
+    if search_res.get('total', 0) < 1:
         return []
         return []
 
 
     # parse results
     # parse results

+ 2 - 3
searx/engines/searchcode_code.py

@@ -10,8 +10,8 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 
 
-from urllib import urlencode
 from json import loads
 from json import loads
+from searx.url_utils import urlencode
 
 
 
 
 # engine dependent config
 # engine dependent config
@@ -31,8 +31,7 @@ code_endings = {'cs': 'c#',
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      pageno=params['pageno'] - 1)
+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
 
 
     return params
     return params
 
 

+ 2 - 3
searx/engines/searchcode_doc.py

@@ -10,8 +10,8 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 
 
-from urllib import urlencode
 from json import loads
 from json import loads
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']
@@ -24,8 +24,7 @@ search_url = url + 'api/search_IV/?{query}&p={pageno}'
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      pageno=params['pageno'] - 1)
+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1)
 
 
     return params
     return params
 
 

+ 1 - 3
searx/engines/seedpeer.py

@@ -8,11 +8,9 @@
 # @stable      yes (HTML can change)
 # @stable      yes (HTML can change)
 # @parse       url, title, content, seed, leech, magnetlink
 # @parse       url, title, content, seed, leech, magnetlink
 
 
-from urlparse import urljoin
-from urllib import quote
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
-from searx.engines.xpath import extract_text
+from searx.url_utils import quote, urljoin
 
 
 
 
 url = 'http://www.seedpeer.eu/'
 url = 'http://www.seedpeer.eu/'

+ 12 - 7
searx/engines/soundcloud.py

@@ -11,13 +11,17 @@
 """
 """
 
 
 import re
 import re
-from StringIO import StringIO
 from json import loads
 from json import loads
-from lxml import etree
-from urllib import urlencode, quote_plus
+from lxml import html
 from dateutil import parser
 from dateutil import parser
 from searx import logger
 from searx import logger
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
+from searx.url_utils import quote_plus, urlencode
+
+try:
+    from cStringIO import StringIO
+except:
+    from io import StringIO
 
 
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']
@@ -36,14 +40,15 @@ embedded_url = '<iframe width="100%" height="166" ' +\
     'scrolling="no" frameborder="no" ' +\
     'scrolling="no" frameborder="no" ' +\
     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
 
 
+cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U)
+
 
 
 def get_client_id():
 def get_client_id():
     response = http_get("https://soundcloud.com")
     response = http_get("https://soundcloud.com")
-    rx_namespace = {"re": "http://exslt.org/regular-expressions"}
 
 
     if response.ok:
     if response.ok:
-        tree = etree.parse(StringIO(response.content), etree.HTMLParser())
-        script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
+        tree = html.fromstring(response.content)
+        script_tags = tree.xpath("//script[contains(@src, '/assets/app')]")
         app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
         app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
 
 
         # extracts valid app_js urls from soundcloud.com content
         # extracts valid app_js urls from soundcloud.com content
@@ -51,7 +56,7 @@ def get_client_id():
             # gets app_js and searches for the clientid
             # gets app_js and searches for the clientid
             response = http_get(app_js_url)
             response = http_get(app_js_url)
             if response.ok:
             if response.ok:
-                cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
+                cids = cid_re.search(response.text)
                 if cids is not None and len(cids.groups()):
                 if cids is not None and len(cids.groups()):
                     return cids.groups()[0]
                     return cids.groups()[0]
     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

+ 2 - 3
searx/engines/spotify.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']
@@ -29,8 +29,7 @@ embedded_url = '<iframe data-src="https://embed.spotify.com/?uri=spotify:track:{
 def request(query, params):
 def request(query, params):
     offset = (params['pageno'] - 1) * 20
     offset = (params['pageno'] - 1) * 20
 
 
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      offset=offset)
+    params['url'] = search_url.format(query=urlencode({'q': query}), offset=offset)
 
 
     return params
     return params
 
 

+ 2 - 4
searx/engines/stackoverflow.py

@@ -10,10 +10,9 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 
 
-from urlparse import urljoin
-from urllib import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']
@@ -31,8 +30,7 @@ content_xpath = './/div[@class="excerpt"]'
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'q': query}),
-                                      pageno=params['pageno'])
+    params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'])
 
 
     return params
     return params
 
 

+ 1 - 1
searx/engines/startpage.py

@@ -56,7 +56,7 @@ def request(query, params):
 def response(resp):
 def response(resp):
     results = []
     results = []
 
 
-    dom = html.fromstring(resp.content)
+    dom = html.fromstring(resp.text)
 
 
     # parse results
     # parse results
     for result in dom.xpath(results_xpath):
     for result in dom.xpath(results_xpath):

+ 1 - 1
searx/engines/subtitleseeker.py

@@ -10,10 +10,10 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 
 
-from urllib import quote_plus
 from lxml import html
 from lxml import html
 from searx.languages import language_codes
 from searx.languages import language_codes
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import quote_plus
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']

+ 13 - 14
searx/engines/swisscows.py

@@ -11,9 +11,9 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from urllib import urlencode, unquote
 import re
 import re
 from lxml.html import fromstring
 from lxml.html import fromstring
+from searx.url_utils import unquote, urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general', 'images']
 categories = ['general', 'images']
@@ -27,10 +27,10 @@ search_string = '?{query}&page={page}'
 supported_languages_url = base_url
 supported_languages_url = base_url
 
 
 # regex
 # regex
-regex_json = re.compile(r'initialData: {"Request":(.|\n)*},\s*environment')
-regex_json_remove_start = re.compile(r'^initialData:\s*')
-regex_json_remove_end = re.compile(r',\s*environment$')
-regex_img_url_remove_start = re.compile(r'^https?://i\.swisscows\.ch/\?link=')
+regex_json = re.compile(b'initialData: {"Request":(.|\n)*},\s*environment')
+regex_json_remove_start = re.compile(b'^initialData:\s*')
+regex_json_remove_end = re.compile(b',\s*environment$')
+regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
 
 
 
 
 # do search-request
 # do search-request
@@ -45,10 +45,9 @@ def request(query, params):
         ui_language = params['language'].split('-')[0]
         ui_language = params['language'].split('-')[0]
 
 
     search_path = search_string.format(
     search_path = search_string.format(
-        query=urlencode({'query': query,
-                         'uiLanguage': ui_language,
-                         'region': region}),
-        page=params['pageno'])
+        query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),
+        page=params['pageno']
+    )
 
 
     # image search query is something like 'image?{query}&page={page}'
     # image search query is something like 'image?{query}&page={page}'
     if params['category'] == 'images':
     if params['category'] == 'images':
@@ -63,14 +62,14 @@ def request(query, params):
 def response(resp):
 def response(resp):
     results = []
     results = []
 
 
-    json_regex = regex_json.search(resp.content)
+    json_regex = regex_json.search(resp.text)
 
 
     # check if results are returned
     # check if results are returned
     if not json_regex:
     if not json_regex:
         return []
         return []
 
 
-    json_raw = regex_json_remove_end.sub('', regex_json_remove_start.sub('', json_regex.group()))
-    json = loads(json_raw)
+    json_raw = regex_json_remove_end.sub(b'', regex_json_remove_start.sub(b'', json_regex.group()))
+    json = loads(json_raw.decode('utf-8'))
 
 
     # parse results
     # parse results
     for result in json['Results'].get('items', []):
     for result in json['Results'].get('items', []):
@@ -78,7 +77,7 @@ def response(resp):
 
 
         # parse image results
         # parse image results
         if result.get('ContentType', '').startswith('image'):
         if result.get('ContentType', '').startswith('image'):
-            img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
+            img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
 
 
             # append result
             # append result
             results.append({'url': result['SourceUrl'],
             results.append({'url': result['SourceUrl'],
@@ -100,7 +99,7 @@ def response(resp):
     # parse images
     # parse images
     for result in json.get('Images', []):
     for result in json.get('Images', []):
         # decode image url
         # decode image url
-        img_url = unquote(regex_img_url_remove_start.sub('', result['Url']))
+        img_url = unquote(regex_img_url_remove_start.sub(b'', result['Url'].encode('utf-8')).decode('utf-8'))
 
 
         # append result
         # append result
         results.append({'url': result['SourceUrl'],
         results.append({'url': result['SourceUrl'],

+ 5 - 6
searx/engines/tokyotoshokan.py

@@ -11,11 +11,11 @@
 """
 """
 
 
 import re
 import re
-from urllib import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from datetime import datetime
 from datetime import datetime
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['files', 'videos', 'music']
 categories = ['files', 'videos', 'music']
@@ -28,8 +28,7 @@ search_url = base_url + 'search.php?{query}'
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
-    query = urlencode({'page': params['pageno'],
-                       'terms': query})
+    query = urlencode({'page': params['pageno'], 'terms': query})
     params['url'] = search_url.format(query=query)
     params['url'] = search_url.format(query=query)
     return params
     return params
 
 
@@ -50,7 +49,7 @@ def response(resp):
     size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
     size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE)
 
 
     # processing the results, two rows at a time
     # processing the results, two rows at a time
-    for i in xrange(0, len(rows), 2):
+    for i in range(0, len(rows), 2):
         # parse the first row
         # parse the first row
         name_row = rows[i]
         name_row = rows[i]
 
 
@@ -79,14 +78,14 @@ def response(resp):
                     groups = size_re.match(item).groups()
                     groups = size_re.match(item).groups()
                     multiplier = get_filesize_mul(groups[1])
                     multiplier = get_filesize_mul(groups[1])
                     params['filesize'] = int(multiplier * float(groups[0]))
                     params['filesize'] = int(multiplier * float(groups[0]))
-                except Exception as e:
+                except:
                     pass
                     pass
             elif item.startswith('Date:'):
             elif item.startswith('Date:'):
                 try:
                 try:
                     # Date: 2016-02-21 21:44 UTC
                     # Date: 2016-02-21 21:44 UTC
                     date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
                     date = datetime.strptime(item, 'Date: %Y-%m-%d %H:%M UTC')
                     params['publishedDate'] = date
                     params['publishedDate'] = date
-                except Exception as e:
+                except:
                     pass
                     pass
             elif item.startswith('Comment:'):
             elif item.startswith('Comment:'):
                 params['content'] = item
                 params['content'] = item

+ 4 - 4
searx/engines/torrentz.py

@@ -12,11 +12,11 @@
 """
 """
 
 
 import re
 import re
-from urllib import urlencode
 from lxml import html
 from lxml import html
-from searx.engines.xpath import extract_text
 from datetime import datetime
 from datetime import datetime
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
 from searx.engines.nyaa import int_or_zero, get_filesize_mul
+from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['files', 'videos', 'music']
 categories = ['files', 'videos', 'music']
@@ -70,7 +70,7 @@ def response(resp):
             size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
             size_str = result.xpath('./dd/span[@class="s"]/text()')[0]
             size, suffix = size_str.split()
             size, suffix = size_str.split()
             params['filesize'] = int(size) * get_filesize_mul(suffix)
             params['filesize'] = int(size) * get_filesize_mul(suffix)
-        except Exception as e:
+        except:
             pass
             pass
 
 
         # does our link contain a valid SHA1 sum?
         # does our link contain a valid SHA1 sum?
@@ -84,7 +84,7 @@ def response(resp):
             # Fri, 25 Mar 2016 16:29:01
             # Fri, 25 Mar 2016 16:29:01
             date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
             date = datetime.strptime(date_str, '%a, %d %b %Y %H:%M:%S')
             params['publishedDate'] = date
             params['publishedDate'] = date
-        except Exception as e:
+        except:
             pass
             pass
 
 
         results.append(params)
         results.append(params)

+ 4 - 0
searx/engines/translated.py

@@ -9,8 +9,12 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 import re
 import re
+from sys import version_info
 from searx.utils import is_valid_lang
 from searx.utils import is_valid_lang
 
 
+if version_info[0] == 3:
+    unicode = str
+
 categories = ['general']
 categories = ['general']
 url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
 url = u'http://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
 web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
 web_url = u'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'

+ 1 - 2
searx/engines/twitter.py

@@ -12,11 +12,10 @@
  @todo        publishedDate
  @todo        publishedDate
 """
 """
 
 
-from urlparse import urljoin
-from urllib import urlencode
 from lxml import html
 from lxml import html
 from datetime import datetime
 from datetime import datetime
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
+from searx.url_utils import urlencode, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['social media']
 categories = ['social media']

+ 1 - 1
searx/engines/vimeo.py

@@ -13,8 +13,8 @@
 # @todo        set content-parameter with correct data
 # @todo        set content-parameter with correct data
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
 from dateutil import parser
 from dateutil import parser
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']

+ 5 - 8
searx/engines/wikidata.py

@@ -14,12 +14,11 @@
 from searx import logger
 from searx import logger
 from searx.poolrequests import get
 from searx.poolrequests import get
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.utils import format_date_by_locale
 from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
 from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
+from searx.url_utils import urlencode
 
 
 from json import loads
 from json import loads
 from lxml.html import fromstring
 from lxml.html import fromstring
-from urllib import urlencode
 
 
 logger = logger.getChild('wikidata')
 logger = logger.getChild('wikidata')
 result_count = 1
 result_count = 1
@@ -62,14 +61,13 @@ def request(query, params):
         language = 'en'
         language = 'en'
 
 
     params['url'] = url_search.format(
     params['url'] = url_search.format(
-        query=urlencode({'label': query,
-                        'language': language}))
+        query=urlencode({'label': query, 'language': language}))
     return params
     return params
 
 
 
 
 def response(resp):
 def response(resp):
     results = []
     results = []
-    html = fromstring(resp.content)
+    html = fromstring(resp.text)
     wikidata_ids = html.xpath(wikidata_ids_xpath)
     wikidata_ids = html.xpath(wikidata_ids_xpath)
 
 
     language = resp.search_params['language'].split('-')[0]
     language = resp.search_params['language'].split('-')[0]
@@ -78,10 +76,9 @@ def response(resp):
 
 
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
     # TODO: make requests asynchronous to avoid timeout when result_count > 1
     for wikidata_id in wikidata_ids[:result_count]:
     for wikidata_id in wikidata_ids[:result_count]:
-        url = url_detail.format(query=urlencode({'page': wikidata_id,
-                                                'uselang': language}))
+        url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
         htmlresponse = get(url)
         htmlresponse = get(url)
-        jsonresponse = loads(htmlresponse.content)
+        jsonresponse = loads(htmlresponse.text)
         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'])
 
 
     return results
     return results

+ 9 - 12
searx/engines/wikipedia.py

@@ -11,13 +11,12 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from urllib import urlencode, quote
 from lxml.html import fromstring
 from lxml.html import fromstring
-
+from searx.url_utils import quote, urlencode
 
 
 # search-url
 # search-url
-base_url = 'https://{language}.wikipedia.org/'
-search_postfix = 'w/api.php?'\
+base_url = u'https://{language}.wikipedia.org/'
+search_url = base_url + u'w/api.php?'\
     'action=query'\
     'action=query'\
     '&format=json'\
     '&format=json'\
     '&{query}'\
     '&{query}'\
@@ -37,16 +36,16 @@ def url_lang(lang):
     else:
     else:
         language = lang
         language = lang
 
 
-    return base_url.format(language=language)
+    return language
 
 
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
     if query.islower():
     if query.islower():
-        query += '|' + query.title()
+        query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
 
 
-    params['url'] = url_lang(params['language']) \
-        + search_postfix.format(query=urlencode({'titles': query}))
+    params['url'] = search_url.format(query=urlencode({'titles': query}),
+                                      language=url_lang(params['language']))
 
 
     return params
     return params
 
 
@@ -78,7 +77,7 @@ def extract_first_paragraph(content, title, image):
 def response(resp):
 def response(resp):
     results = []
     results = []
 
 
-    search_result = loads(resp.content)
+    search_result = loads(resp.text)
 
 
     # wikipedia article's unique id
     # wikipedia article's unique id
     # first valid id is assumed to be the requested article
     # first valid id is assumed to be the requested article
@@ -99,11 +98,9 @@ def response(resp):
     extract = page.get('extract')
     extract = page.get('extract')
 
 
     summary = extract_first_paragraph(extract, title, image)
     summary = extract_first_paragraph(extract, title, image)
-    if not summary:
-        return []
 
 
     # link to wikipedia article
     # link to wikipedia article
-    wikipedia_link = url_lang(resp.search_params['language']) \
+    wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
         + 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
         + 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
 
 
     results.append({'url': wikipedia_link, 'title': title})
     results.append({'url': wikipedia_link, 'title': title})

+ 6 - 7
searx/engines/wolframalpha_api.py

@@ -8,8 +8,8 @@
 # @stable      yes
 # @stable      yes
 # @parse       url, infobox
 # @parse       url, infobox
 
 
-from urllib import urlencode
 from lxml import etree
 from lxml import etree
+from searx.url_utils import urlencode
 
 
 # search-url
 # search-url
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@@ -37,8 +37,7 @@ image_pods = {'VisualRepresentation',
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
-    params['url'] = search_url.format(query=urlencode({'input': query}),
-                                      api_key=api_key)
+    params['url'] = search_url.format(query=urlencode({'input': query}), api_key=api_key)
     params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
     params['headers']['Referer'] = site_url.format(query=urlencode({'i': query}))
 
 
     return params
     return params
@@ -56,7 +55,7 @@ def replace_pua_chars(text):
                  u'\uf74e': 'i',        # imaginary number
                  u'\uf74e': 'i',        # imaginary number
                  u'\uf7d9': '='}        # equals sign
                  u'\uf7d9': '='}        # equals sign
 
 
-    for k, v in pua_chars.iteritems():
+    for k, v in pua_chars.items():
         text = text.replace(k, v)
         text = text.replace(k, v)
 
 
     return text
     return text
@@ -66,7 +65,7 @@ def replace_pua_chars(text):
 def response(resp):
 def response(resp):
     results = []
     results = []
 
 
-    search_results = etree.XML(resp.content)
+    search_results = etree.XML(resp.text)
 
 
     # return empty array if there are no results
     # return empty array if there are no results
     if search_results.xpath(failure_xpath):
     if search_results.xpath(failure_xpath):
@@ -120,10 +119,10 @@ def response(resp):
     # append infobox
     # append infobox
     results.append({'infobox': infobox_title,
     results.append({'infobox': infobox_title,
                     'attributes': result_chunks,
                     'attributes': result_chunks,
-                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
+                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
 
 
     # append link to site
     # append link to site
-    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
+    results.append({'url': resp.request.headers['Referer'],
                     'title': title,
                     'title': title,
                     'content': result_content})
                     'content': result_content})
 
 

+ 4 - 5
searx/engines/wolframalpha_noapi.py

@@ -10,10 +10,9 @@
 
 
 from json import loads
 from json import loads
 from time import time
 from time import time
-from urllib import urlencode
-from lxml.etree import XML
 
 
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
+from searx.url_utils import urlencode
 
 
 # search-url
 # search-url
 url = 'https://www.wolframalpha.com/'
 url = 'https://www.wolframalpha.com/'
@@ -62,7 +61,7 @@ obtain_token()
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
     # obtain token if last update was more than an hour
     # obtain token if last update was more than an hour
-    if time() - token['last_updated'] > 3600:
+    if time() - (token['last_updated'] or 0) > 3600:
         obtain_token()
         obtain_token()
     params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
     params['url'] = search_url.format(query=urlencode({'input': query}), token=token['value'])
     params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
     params['headers']['Referer'] = referer_url.format(query=urlencode({'i': query}))
@@ -112,9 +111,9 @@ def response(resp):
 
 
     results.append({'infobox': infobox_title,
     results.append({'infobox': infobox_title,
                     'attributes': result_chunks,
                     'attributes': result_chunks,
-                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer'].decode('utf8')}]})
+                    'urls': [{'title': 'Wolfram|Alpha', 'url': resp.request.headers['Referer']}]})
 
 
-    results.append({'url': resp.request.headers['Referer'].decode('utf8'),
+    results.append({'url': resp.request.headers['Referer'],
                     'title': 'Wolfram|Alpha (' + infobox_title + ')',
                     'title': 'Wolfram|Alpha (' + infobox_title + ')',
                     'content': result_content})
                     'content': result_content})
 
 

+ 2 - 4
searx/engines/www1x.py

@@ -10,11 +10,9 @@
  @parse       url, title, thumbnail, img_src, content
  @parse       url, title, thumbnail, img_src, content
 """
 """
 
 
-from urllib import urlencode
-from urlparse import urljoin
 from lxml import html
 from lxml import html
-import string
 import re
 import re
+from searx.url_utils import urlencode, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']
@@ -55,7 +53,7 @@ def response(resp):
         cur_element += result_part
         cur_element += result_part
 
 
         # fix xml-error
         # fix xml-error
-        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
+        cur_element = cur_element.replace('"></a>', '"/></a>')
 
 
         dom = html.fromstring(cur_element)
         dom = html.fromstring(cur_element)
         link = dom.xpath('//a')[0]
         link = dom.xpath('//a')[0]

+ 1 - 2
searx/engines/www500px.py

@@ -13,8 +13,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
-from urlparse import urljoin
+from searx.url_utils import urlencode, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']

+ 2 - 2
searx/engines/xpath.py

@@ -1,13 +1,13 @@
 from lxml import html
 from lxml import html
-from urllib import urlencode, unquote
-from urlparse import urlparse, urljoin
 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
 from searx.utils import html_to_text
 from searx.utils import html_to_text
+from searx.url_utils import unquote, urlencode, urljoin, urlparse
 
 
 search_url = None
 search_url = None
 url_xpath = None
 url_xpath = None
 content_xpath = None
 content_xpath = None
 title_xpath = None
 title_xpath = None
+paging = False
 suggestion_xpath = ''
 suggestion_xpath = ''
 results_xpath = ''
 results_xpath = ''
 
 

+ 1 - 1
searx/engines/yacy.py

@@ -13,8 +13,8 @@
 # @todo        parse video, audio and file results
 # @todo        parse video, audio and file results
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
 from dateutil import parser
 from dateutil import parser
+from searx.url_utils import urlencode
 
 
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 

+ 1 - 2
searx/engines/yahoo.py

@@ -11,10 +11,9 @@
  @parse       url, title, content, suggestion
  @parse       url, title, content, suggestion
 """
 """
 
 
-from urllib import urlencode
-from urlparse import unquote
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.xpath import extract_text, extract_url
+from searx.url_utils import unquote, urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']

+ 3 - 3
searx/engines/yahoo_news.py

@@ -9,13 +9,13 @@
 # @stable      no (HTML can change)
 # @stable      no (HTML can change)
 # @parse       url, title, content, publishedDate
 # @parse       url, title, content, publishedDate
 
 
-from urllib import urlencode
+import re
+from datetime import datetime, timedelta
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
 from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url
-from datetime import datetime, timedelta
-import re
 from dateutil import parser
 from dateutil import parser
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['news']
 categories = ['news']

+ 2 - 2
searx/engines/yandex.py

@@ -9,9 +9,9 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 
 
-from urllib import urlencode
 from lxml import html
 from lxml import html
-from searx.search import logger
+from searx import logger
+from searx.url_utils import urlencode
 
 
 logger = logger.getChild('yandex engine')
 logger = logger.getChild('yandex engine')
 
 

+ 1 - 1
searx/engines/youtube_api.py

@@ -9,8 +9,8 @@
 # @parse       url, title, content, publishedDate, thumbnail, embedded
 # @parse       url, title, content, publishedDate, thumbnail, embedded
 
 
 from json import loads
 from json import loads
-from urllib import urlencode
 from dateutil import parser
 from dateutil import parser
+from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music']
 categories = ['videos', 'music']

+ 1 - 1
searx/engines/youtube_noapi.py

@@ -8,10 +8,10 @@
 # @stable      no
 # @stable      no
 # @parse       url, title, content, publishedDate, thumbnail, embedded
 # @parse       url, title, content, publishedDate, thumbnail, embedded
 
 
-from urllib import quote_plus
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import list_get
 from searx.utils import list_get
+from searx.url_utils import quote_plus
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music']
 categories = ['videos', 'music']

+ 4 - 1
searx/plugins/__init__.py

@@ -14,9 +14,12 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 
 
 (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
 (C) 2015 by Adam Tauber, <asciimoo@gmail.com>
 '''
 '''
-from sys import exit
+from sys import exit, version_info
 from searx import logger
 from searx import logger
 
 
+if version_info[0] == 3:
+    unicode = str
+
 logger = logger.getChild('plugins')
 logger = logger.getChild('plugins')
 
 
 from searx.plugins import (doai_rewrite,
 from searx.plugins import (doai_rewrite,

+ 1 - 1
searx/plugins/doai_rewrite.py

@@ -1,6 +1,6 @@
 from flask_babel import gettext
 from flask_babel import gettext
 import re
 import re
-from urlparse import urlparse, parse_qsl
+from searx.url_utils import urlparse, parse_qsl
 
 
 regex = re.compile(r'10\.\d{4,9}/[^\s]+')
 regex = re.compile(r'10\.\d{4,9}/[^\s]+')
 
 

+ 4 - 1
searx/plugins/https_rewrite.py

@@ -16,14 +16,17 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 '''
 '''
 
 
 import re
 import re
-from urlparse import urlparse
+import sys
 from lxml import etree
 from lxml import etree
 from os import listdir, environ
 from os import listdir, environ
 from os.path import isfile, isdir, join
 from os.path import isfile, isdir, join
 from searx.plugins import logger
 from searx.plugins import logger
 from flask_babel import gettext
 from flask_babel import gettext
 from searx import searx_dir
 from searx import searx_dir
+from searx.url_utils import urlparse
 
 
+if sys.version_info[0] == 3:
+    unicode = str
 
 
 name = "HTTPS rewrite"
 name = "HTTPS rewrite"
 description = gettext('Rewrite HTTP links to HTTPS if possible')
 description = gettext('Rewrite HTTP links to HTTPS if possible')

+ 2 - 2
searx/plugins/self_info.py

@@ -22,7 +22,7 @@ default_on = True
 
 
 
 
 # Self User Agent regex
 # Self User Agent regex
-p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
+p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE)
 
 
 
 
 # attach callback to the post search hook
 # attach callback to the post search hook
@@ -31,7 +31,7 @@ p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
 def post_search(request, search):
 def post_search(request, search):
     if search.search_query.pageno > 1:
     if search.search_query.pageno > 1:
         return True
         return True
-    if search.search_query.query == 'ip':
+    if search.search_query.query == b'ip':
         x_forwarded_for = request.headers.getlist("X-Forwarded-For")
         x_forwarded_for = request.headers.getlist("X-Forwarded-For")
         if x_forwarded_for:
         if x_forwarded_for:
             ip = x_forwarded_for[0]
             ip = x_forwarded_for[0]

+ 1 - 1
searx/plugins/tracker_url_remover.py

@@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 
 
 from flask_babel import gettext
 from flask_babel import gettext
 import re
 import re
-from urlparse import urlunparse
+from searx.url_utils import urlunparse
 
 
 regexes = {re.compile(r'utm_[^&]+&?'),
 regexes = {re.compile(r'utm_[^&]+&?'),
            re.compile(r'(wkey|wemail)[^&]+&?'),
            re.compile(r'(wkey|wemail)[^&]+&?'),

+ 9 - 9
searx/preferences.py

@@ -23,7 +23,7 @@ class Setting(object):
     def __init__(self, default_value, **kwargs):
     def __init__(self, default_value, **kwargs):
         super(Setting, self).__init__()
         super(Setting, self).__init__()
         self.value = default_value
         self.value = default_value
-        for key, value in kwargs.iteritems():
+        for key, value in kwargs.items():
             setattr(self, key, value)
             setattr(self, key, value)
 
 
         self._post_init()
         self._post_init()
@@ -38,7 +38,7 @@ class Setting(object):
         return self.value
         return self.value
 
 
     def save(self, name, resp):
     def save(self, name, resp):
-        resp.set_cookie(name, bytes(self.value), max_age=COOKIE_MAX_AGE)
+        resp.set_cookie(name, self.value, max_age=COOKIE_MAX_AGE)
 
 
 
 
 class StringSetting(Setting):
 class StringSetting(Setting):
@@ -133,7 +133,7 @@ class MapSetting(Setting):
 
 
     def save(self, name, resp):
     def save(self, name, resp):
         if hasattr(self, 'key'):
         if hasattr(self, 'key'):
-            resp.set_cookie(name, bytes(self.key), max_age=COOKIE_MAX_AGE)
+            resp.set_cookie(name, self.key, max_age=COOKIE_MAX_AGE)
 
 
 
 
 class SwitchableSetting(Setting):
 class SwitchableSetting(Setting):
@@ -194,7 +194,7 @@ class EnginesSetting(SwitchableSetting):
     def _post_init(self):
     def _post_init(self):
         super(EnginesSetting, self)._post_init()
         super(EnginesSetting, self)._post_init()
         transformed_choices = []
         transformed_choices = []
-        for engine_name, engine in self.choices.iteritems():
+        for engine_name, engine in self.choices.items():
             for category in engine.categories:
             for category in engine.categories:
                 transformed_choice = dict()
                 transformed_choice = dict()
                 transformed_choice['default_on'] = not engine.disabled
                 transformed_choice['default_on'] = not engine.disabled
@@ -241,9 +241,9 @@ class Preferences(object):
                                    'language': SearchLanguageSetting(settings['search']['language'],
                                    'language': SearchLanguageSetting(settings['search']['language'],
                                                                      choices=LANGUAGE_CODES),
                                                                      choices=LANGUAGE_CODES),
                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
                                    'locale': EnumStringSetting(settings['ui']['default_locale'],
-                                                               choices=settings['locales'].keys() + ['']),
+                                                               choices=list(settings['locales'].keys()) + ['']),
                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
                                    'autocomplete': EnumStringSetting(settings['search']['autocomplete'],
-                                                                     choices=autocomplete.backends.keys() + ['']),
+                                                                     choices=list(autocomplete.backends.keys()) + ['']),
                                    'image_proxy': MapSetting(settings['server']['image_proxy'],
                                    'image_proxy': MapSetting(settings['server']['image_proxy'],
                                                              map={'': settings['server']['image_proxy'],
                                                              map={'': settings['server']['image_proxy'],
                                                                   '0': False,
                                                                   '0': False,
@@ -260,7 +260,7 @@ class Preferences(object):
         self.unknown_params = {}
         self.unknown_params = {}
 
 
     def parse_cookies(self, input_data):
     def parse_cookies(self, input_data):
-        for user_setting_name, user_setting in input_data.iteritems():
+        for user_setting_name, user_setting in input_data.items():
             if user_setting_name in self.key_value_settings:
             if user_setting_name in self.key_value_settings:
                 self.key_value_settings[user_setting_name].parse(user_setting)
                 self.key_value_settings[user_setting_name].parse(user_setting)
             elif user_setting_name == 'disabled_engines':
             elif user_setting_name == 'disabled_engines':
@@ -274,7 +274,7 @@ class Preferences(object):
         disabled_engines = []
         disabled_engines = []
         enabled_categories = []
         enabled_categories = []
         disabled_plugins = []
         disabled_plugins = []
-        for user_setting_name, user_setting in input_data.iteritems():
+        for user_setting_name, user_setting in input_data.items():
             if user_setting_name in self.key_value_settings:
             if user_setting_name in self.key_value_settings:
                 self.key_value_settings[user_setting_name].parse(user_setting)
                 self.key_value_settings[user_setting_name].parse(user_setting)
             elif user_setting_name.startswith('engine_'):
             elif user_setting_name.startswith('engine_'):
@@ -295,7 +295,7 @@ class Preferences(object):
             return self.key_value_settings[user_setting_name].get_value()
             return self.key_value_settings[user_setting_name].get_value()
 
 
     def save(self, resp):
     def save(self, resp):
-        for user_setting_name, user_setting in self.key_value_settings.iteritems():
+        for user_setting_name, user_setting in self.key_value_settings.items():
             user_setting.save(user_setting_name, resp)
             user_setting.save(user_setting_name, resp)
         self.engines.save(resp)
         self.engines.save(resp)
         self.plugins.save(resp)
         self.plugins.save(resp)

+ 6 - 2
searx/query.py

@@ -21,8 +21,12 @@ from searx.languages import language_codes
 from searx.engines import (
 from searx.engines import (
     categories, engines, engine_shortcuts
     categories, engines, engine_shortcuts
 )
 )
-import string
 import re
 import re
+import string
+import sys
+
+if sys.version_info[0] == 3:
+    unicode = str
 
 
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 
 
@@ -146,7 +150,7 @@ class SearchQuery(object):
     """container for all the search parameters (query, language, etc...)"""
     """container for all the search parameters (query, language, etc...)"""
 
 
     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range):
-        self.query = query
+        self.query = query.encode('utf-8')
         self.engines = engines
         self.engines = engines
         self.categories = categories
         self.categories = categories
         self.lang = lang
         self.lang = lang

+ 5 - 1
searx/results.py

@@ -1,9 +1,13 @@
 import re
 import re
+import sys
 from collections import defaultdict
 from collections import defaultdict
 from operator import itemgetter
 from operator import itemgetter
 from threading import RLock
 from threading import RLock
-from urlparse import urlparse, unquote
 from searx.engines import engines
 from searx.engines import engines
+from searx.url_utils import urlparse, unquote
+
+if sys.version_info[0] == 3:
+    basestring = str
 
 
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)

+ 10 - 2
searx/search.py

@@ -16,8 +16,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 '''
 '''
 
 
 import gc
 import gc
+import sys
 import threading
 import threading
-from thread import start_new_thread
 from time import time
 from time import time
 from uuid import uuid4
 from uuid import uuid4
 import requests.exceptions
 import requests.exceptions
@@ -33,6 +33,14 @@ from searx import logger
 from searx.plugins import plugins
 from searx.plugins import plugins
 from searx.exceptions import SearxParameterException
 from searx.exceptions import SearxParameterException
 
 
+try:
+    from thread import start_new_thread
+except:
+    from _thread import start_new_thread
+
+if sys.version_info[0] == 3:
+    unicode = str
+
 logger = logger.getChild('search')
 logger = logger.getChild('search')
 
 
 number_of_searches = 0
 number_of_searches = 0
@@ -387,7 +395,7 @@ class Search(object):
             request_params['time_range'] = search_query.time_range
             request_params['time_range'] = search_query.time_range
 
 
             # append request to list
             # append request to list
-            requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params))
+            requests.append((selected_engine['name'], search_query.query, request_params))
 
 
             # update timeout_limit
             # update timeout_limit
             timeout_limit = max(timeout_limit, engine.timeout)
             timeout_limit = max(timeout_limit, engine.timeout)

+ 1 - 1
searx/settings_robot.yml

@@ -17,7 +17,7 @@ server:
 
 
 ui:
 ui:
     themes_path : ""
     themes_path : ""
-    default_theme : legacy
+    default_theme : oscar
     default_locale : ""
     default_locale : ""
 
 
 outgoing:
 outgoing:

+ 1 - 1
searx/templates/courgette/404.html

@@ -3,7 +3,7 @@
 <div class="center">
 <div class="center">
     <h1>{{ _('Page not found') }}</h1>
     <h1>{{ _('Page not found') }}</h1>
     {% autoescape false %}
     {% autoescape false %}
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
     {% endautoescape %}
     {% endautoescape %}
 </div>
 </div>
 {% endblock %}
 {% endblock %}

+ 1 - 1
searx/templates/legacy/404.html

@@ -3,7 +3,7 @@
 <div class="center">
 <div class="center">
     <h1>{{ _('Page not found') }}</h1>
     <h1>{{ _('Page not found') }}</h1>
     {% autoescape false %}
     {% autoescape false %}
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
     {% endautoescape %}
     {% endautoescape %}
 </div>
 </div>
 {% endblock %}
 {% endblock %}

+ 1 - 1
searx/templates/oscar/404.html

@@ -3,7 +3,7 @@
 <div class="text-center">
 <div class="text-center">
     <h1>{{ _('Page not found') }}</h1>
     <h1>{{ _('Page not found') }}</h1>
     {% autoescape false %}
     {% autoescape false %}
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
     {% endautoescape %}
     {% endautoescape %}
 </div>
 </div>
 {% endblock %}
 {% endblock %}

+ 1 - 1
searx/templates/pix-art/404.html

@@ -3,7 +3,7 @@
 <div class="center">
 <div class="center">
     <h1>{{ _('Page not found') }}</h1>
     <h1>{{ _('Page not found') }}</h1>
     {% autoescape false %}
     {% autoescape false %}
-    <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.decode('utf-8').format(url_for('index'), _('search page'))) }}</p>
+    <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p>
     {% endautoescape %}
     {% endautoescape %}
 </div>
 </div>
 {% endblock %}
 {% endblock %}

+ 26 - 16
searx/testing.py

@@ -1,13 +1,16 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 """Shared testing code."""
 """Shared testing code."""
 
 
-from plone.testing import Layer
-from unittest2 import TestCase
-from os.path import dirname, join, abspath
-
 
 
 import os
 import os
 import subprocess
 import subprocess
+import traceback
+
+
+from os.path import dirname, join, abspath
+
+from splinter import Browser
+from unittest2 import TestCase
 
 
 
 
 class SearxTestLayer:
 class SearxTestLayer:
@@ -32,7 +35,7 @@ class SearxTestLayer:
     testTearDown = classmethod(testTearDown)
     testTearDown = classmethod(testTearDown)
 
 
 
 
-class SearxRobotLayer(Layer):
+class SearxRobotLayer():
     """Searx Robot Test Layer"""
     """Searx Robot Test Layer"""
 
 
     def setUp(self):
     def setUp(self):
@@ -62,7 +65,12 @@ class SearxRobotLayer(Layer):
         del os.environ['SEARX_SETTINGS_PATH']
         del os.environ['SEARX_SETTINGS_PATH']
 
 
 
 
-SEARXROBOTLAYER = SearxRobotLayer()
+# SEARXROBOTLAYER = SearxRobotLayer()
+def run_robot_tests(tests):
+    print('Running {0} tests'.format(len(tests)))
+    for test in tests:
+        with Browser() as browser:
+            test(browser)
 
 
 
 
 class SearxTestCase(TestCase):
 class SearxTestCase(TestCase):
@@ -72,17 +80,19 @@ class SearxTestCase(TestCase):
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    from tests.test_robot import test_suite
     import sys
     import sys
-    from zope.testrunner.runner import Runner
+    # test cases
+    from tests import robot
 
 
     base_dir = abspath(join(dirname(__file__), '../tests'))
     base_dir = abspath(join(dirname(__file__), '../tests'))
     if sys.argv[1] == 'robot':
     if sys.argv[1] == 'robot':
-        r = Runner(['--color',
-                    '--auto-progress',
-                    '--stop-on-error',
-                    '--path',
-                    base_dir],
-                   found_suites=[test_suite()])
-        r.run()
-        sys.exit(int(r.failed))
+        test_layer = SearxRobotLayer()
+        errors = False
+        try:
+            test_layer.setUp()
+            run_robot_tests([getattr(robot, x) for x in dir(robot) if x.startswith('test_')])
+        except Exception:
+            errors = True
+            print('Error occured: {0}'.format(traceback.format_exc()))
+        test_layer.tearDown()
+        sys.exit(1 if errors else 0)

+ 28 - 0
searx/url_utils.py

@@ -0,0 +1,28 @@
+from sys import version_info
+
+if version_info[0] == 2:
+    from urllib import quote, quote_plus, unquote, urlencode
+    from urlparse import parse_qsl, urljoin, urlparse, urlunparse, ParseResult
+else:
+    from urllib.parse import (
+        parse_qsl,
+        quote,
+        quote_plus,
+        unquote,
+        urlencode,
+        urljoin,
+        urlparse,
+        urlunparse,
+        ParseResult
+    )
+
+
+__export__ = (parse_qsl,
+              quote,
+              quote_plus,
+              unquote,
+              urlencode,
+              urljoin,
+              urlparse,
+              urlunparse,
+              ParseResult)

+ 18 - 8
searx/utils.py

@@ -1,11 +1,9 @@
-import cStringIO
 import csv
 import csv
 import os
 import os
 import re
 import re
 
 
 from babel.dates import format_date
 from babel.dates import format_date
 from codecs import getincrementalencoder
 from codecs import getincrementalencoder
-from HTMLParser import HTMLParser
 from imp import load_source
 from imp import load_source
 from os.path import splitext, join
 from os.path import splitext, join
 from random import choice
 from random import choice
@@ -16,6 +14,19 @@ from searx.languages import language_codes
 from searx import settings
 from searx import settings
 from searx import logger
 from searx import logger
 
 
+try:
+    from cStringIO import StringIO
+except:
+    from io import StringIO
+
+try:
+    from HTMLParser import HTMLParser
+except:
+    from html.parser import HTMLParser
+
+if sys.version_info[0] == 3:
+    unichr = chr
+    unicode = str
 
 
 logger = logger.getChild('utils')
 logger = logger.getChild('utils')
 
 
@@ -140,7 +151,7 @@ class UnicodeWriter:
 
 
     def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
     def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
         # Redirect output to a queue
         # Redirect output to a queue
-        self.queue = cStringIO.StringIO()
+        self.queue = StringIO()
         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
         self.stream = f
         self.stream = f
         self.encoder = getincrementalencoder(encoding)()
         self.encoder = getincrementalencoder(encoding)()
@@ -152,14 +163,13 @@ class UnicodeWriter:
                 unicode_row.append(col.encode('utf-8').strip())
                 unicode_row.append(col.encode('utf-8').strip())
             else:
             else:
                 unicode_row.append(col)
                 unicode_row.append(col)
-        self.writer.writerow(unicode_row)
+        self.writer.writerow([x.decode('utf-8') if hasattr(x, 'decode') else x for x in unicode_row])
         # Fetch UTF-8 output from the queue ...
         # Fetch UTF-8 output from the queue ...
-        data = self.queue.getvalue()
-        data = data.decode("utf-8")
+        data = self.queue.getvalue().strip('\x00')
         # ... and reencode it into the target encoding
         # ... and reencode it into the target encoding
         data = self.encoder.encode(data)
         data = self.encoder.encode(data)
         # write to the target stream
         # write to the target stream
-        self.stream.write(data)
+        self.stream.write(data.decode('utf-8'))
         # empty queue
         # empty queue
         self.queue.truncate(0)
         self.queue.truncate(0)
 
 
@@ -231,7 +241,7 @@ def dict_subset(d, properties):
 
 
 def prettify_url(url, max_length=74):
 def prettify_url(url, max_length=74):
     if len(url) > max_length:
     if len(url) > max_length:
-        chunk_len = max_length / 2 + 1
+        chunk_len = int(max_length / 2 + 1)
         return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
         return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
     else:
     else:
         return url
         return url

+ 23 - 13
searx/webapp.py

@@ -22,11 +22,12 @@ if __name__ == '__main__':
     from os.path import realpath, dirname
     from os.path import realpath, dirname
     path.append(realpath(dirname(realpath(__file__)) + '/../'))
     path.append(realpath(dirname(realpath(__file__)) + '/../'))
 
 
-import cStringIO
 import hashlib
 import hashlib
 import hmac
 import hmac
 import json
 import json
 import os
 import os
+import sys
+
 import requests
 import requests
 
 
 from searx import logger
 from searx import logger
@@ -42,8 +43,6 @@ except:
     exit(1)
     exit(1)
 from cgi import escape
 from cgi import escape
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
-from urllib import urlencode
-from urlparse import urlparse, urljoin
 from werkzeug.contrib.fixers import ProxyFix
 from werkzeug.contrib.fixers import ProxyFix
 from flask import (
 from flask import (
     Flask, request, render_template, url_for, Response, make_response,
     Flask, request, render_template, url_for, Response, make_response,
@@ -52,7 +51,7 @@ from flask import (
 from flask_babel import Babel, gettext, format_date, format_decimal
 from flask_babel import Babel, gettext, format_date, format_decimal
 from flask.json import jsonify
 from flask.json import jsonify
 from searx import settings, searx_dir, searx_debug
 from searx import settings, searx_dir, searx_debug
-from searx.exceptions import SearxException, SearxParameterException
+from searx.exceptions import SearxParameterException
 from searx.engines import (
 from searx.engines import (
     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
     categories, engines, engine_shortcuts, get_engines_stats, initialize_engines
 )
 )
@@ -69,6 +68,7 @@ from searx.autocomplete import searx_bang, backends as autocomplete_backends
 from searx.plugins import plugins
 from searx.plugins import plugins
 from searx.preferences import Preferences, ValidationException
 from searx.preferences import Preferences, ValidationException
 from searx.answerers import answerers
 from searx.answerers import answerers
+from searx.url_utils import urlencode, urlparse, urljoin
 
 
 # check if the pyopenssl package is installed.
 # check if the pyopenssl package is installed.
 # It is needed for SSL connection without trouble, see #298
 # It is needed for SSL connection without trouble, see #298
@@ -78,6 +78,15 @@ except ImportError:
     logger.critical("The pyopenssl package has to be installed.\n"
     logger.critical("The pyopenssl package has to be installed.\n"
                     "Some HTTPS connections will fail")
                     "Some HTTPS connections will fail")
 
 
+try:
+    from cStringIO import StringIO
+except:
+    from io import StringIO
+
+
+if sys.version_info[0] == 3:
+    unicode = str
+
 # serve pages with HTTP/1.1
 # serve pages with HTTP/1.1
 from werkzeug.serving import WSGIRequestHandler
 from werkzeug.serving import WSGIRequestHandler
 WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
 WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@@ -357,6 +366,8 @@ def render(template_name, override_theme=None, **kwargs):
 
 
     kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
     kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
 
 
+    kwargs['unicode'] = unicode
+
     kwargs['scripts'] = set()
     kwargs['scripts'] = set()
     for plugin in request.user_plugins:
     for plugin in request.user_plugins:
         for script in plugin.js_dependencies:
         for script in plugin.js_dependencies:
@@ -375,7 +386,7 @@ def render(template_name, override_theme=None, **kwargs):
 def pre_request():
 def pre_request():
     request.errors = []
     request.errors = []
 
 
-    preferences = Preferences(themes, categories.keys(), engines, plugins)
+    preferences = Preferences(themes, list(categories.keys()), engines, plugins)
     request.preferences = preferences
     request.preferences = preferences
     try:
     try:
         preferences.parse_cookies(request.cookies)
         preferences.parse_cookies(request.cookies)
@@ -479,10 +490,8 @@ def index():
     for result in results:
     for result in results:
         if output_format == 'html':
         if output_format == 'html':
             if 'content' in result and result['content']:
             if 'content' in result and result['content']:
-                result['content'] = highlight_content(escape(result['content'][:1024]),
-                                                      search_query.query.encode('utf-8'))
-            result['title'] = highlight_content(escape(result['title'] or u''),
-                                                search_query.query.encode('utf-8'))
+                result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
+            result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
         else:
         else:
             if result.get('content'):
             if result.get('content'):
                 result['content'] = html_to_text(result['content']).strip()
                 result['content'] = html_to_text(result['content']).strip()
@@ -510,7 +519,7 @@ def index():
                     result['publishedDate'] = format_date(result['publishedDate'])
                     result['publishedDate'] = format_date(result['publishedDate'])
 
 
     if output_format == 'json':
     if output_format == 'json':
-        return Response(json.dumps({'query': search_query.query,
+        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
                                     'number_of_results': number_of_results,
                                     'number_of_results': number_of_results,
                                     'results': results,
                                     'results': results,
                                     'answers': list(result_container.answers),
                                     'answers': list(result_container.answers),
@@ -519,7 +528,7 @@ def index():
                                     'suggestions': list(result_container.suggestions)}),
                                     'suggestions': list(result_container.suggestions)}),
                         mimetype='application/json')
                         mimetype='application/json')
     elif output_format == 'csv':
     elif output_format == 'csv':
-        csv = UnicodeWriter(cStringIO.StringIO())
+        csv = UnicodeWriter(StringIO())
         keys = ('title', 'url', 'content', 'host', 'engine', 'score')
         keys = ('title', 'url', 'content', 'host', 'engine', 'score')
         csv.writerow(keys)
         csv.writerow(keys)
         for row in results:
         for row in results:
@@ -527,7 +536,7 @@ def index():
             csv.writerow([row.get(key, '') for key in keys])
             csv.writerow([row.get(key, '') for key in keys])
         csv.stream.seek(0)
         csv.stream.seek(0)
         response = Response(csv.stream.read(), mimetype='application/csv')
         response = Response(csv.stream.read(), mimetype='application/csv')
-        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.encode('utf-8'))
+        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query)
         response.headers.add('Content-Disposition', cont_disp)
         response.headers.add('Content-Disposition', cont_disp)
         return response
         return response
     elif output_format == 'rss':
     elif output_format == 'rss':
@@ -578,7 +587,7 @@ def autocompleter():
     disabled_engines = request.preferences.engines.get_disabled()
     disabled_engines = request.preferences.engines.get_disabled()
 
 
     # parse query
     # parse query
-    raw_text_query = RawTextQuery(request.form.get('q', '').encode('utf-8'), disabled_engines)
+    raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
     raw_text_query.parse_query()
     raw_text_query.parse_query()
 
 
     # check if search query is set
     # check if search query is set
@@ -820,6 +829,7 @@ def page_not_found(e):
 
 
 
 
 def run():
 def run():
+    logger.debug('starting webserver on %s:%s', settings['server']['port'], settings['server']['bind_address'])
     app.run(
     app.run(
         debug=searx_debug,
         debug=searx_debug,
         use_debugger=searx_debug,
         use_debugger=searx_debug,

+ 75 - 0
tests/robot/__init__.py

@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+
+from time import sleep
+
+url = "http://localhost:11111/"
+
+
+def test_index(browser):
+    # Visit URL
+    browser.visit(url)
+    assert browser.is_text_present('about')
+
+
+def test_404(browser):
+    # Visit URL
+    browser.visit(url + 'missing_link')
+    assert browser.is_text_present('Page not found')
+
+
+def test_about(browser):
+    browser.visit(url)
+    browser.click_link_by_text('about')
+    assert browser.is_text_present('Why use searx?')
+
+
+def test_preferences(browser):
+    browser.visit(url)
+    browser.click_link_by_text('preferences')
+    assert browser.is_text_present('Preferences')
+    assert browser.is_text_present('Cookies')
+
+    assert browser.is_element_present_by_xpath('//label[@for="checkbox_dummy"]')
+
+
+def test_preferences_engine_select(browser):
+    browser.visit(url)
+    browser.click_link_by_text('preferences')
+
+    assert browser.is_element_present_by_xpath('//a[@href="#tab_engine"]')
+    browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
+
+    assert not browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
+    browser.find_by_xpath('//label[@for="engine_general_dummy__general"]').first.check()
+    browser.find_by_xpath('//input[@value="save"]').first.click()
+
+    # waiting for the redirect - without this the test is flaky..
+    sleep(1)
+
+    browser.visit(url)
+    browser.click_link_by_text('preferences')
+    browser.find_by_xpath('//a[@href="#tab_engine"]').first.click()
+
+    assert browser.find_by_xpath('//input[@id="engine_general_dummy__general"]').first.checked
+
+
+def test_preferences_locale(browser):
+    browser.visit(url)
+    browser.click_link_by_text('preferences')
+
+    browser.select('locale', 'hu')
+    browser.find_by_xpath('//input[@value="save"]').first.click()
+
+    # waiting for the redirect - without this the test is flaky..
+    sleep(1)
+
+    browser.visit(url)
+    browser.click_link_by_text('beállítások')
+    browser.is_text_present('Beállítások')
+
+
+def test_search(browser):
+    browser.visit(url)
+    browser.fill('q', 'test search query')
+    browser.find_by_xpath('//button[@type="submit"]').first.click()
+    assert browser.is_text_present('didn\'t find any results')

+ 0 - 153
tests/robot/test_basic.robot

@@ -1,153 +0,0 @@
-*** Settings ***
-Library         Selenium2Library  timeout=10  implicit_wait=0.5
-Test Setup      Open Browser  http://localhost:11111/
-Test Teardown   Close All Browsers
-
-
-*** Keywords ***
-Submit Preferences
-    Set Selenium Speed  2 seconds
-    Submit Form  id=search_form
-    Location Should Be  http://localhost:11111/
-    Set Selenium Speed  0 seconds
-
-
-*** Test Cases ***
-Front page
-    Page Should Contain  about
-    Page Should Contain  preferences
-
-404 page
-    Go To  http://localhost:11111/no-such-page
-    Page Should Contain  Page not found
-    Page Should Contain  Go to search page
-
-About page
-    Click Element  link=about
-    Page Should Contain  Why use searx?
-    Page Should Contain Element  link=search engines
-
-Preferences page
-    Click Element  link=preferences
-    Page Should Contain  Preferences
-    Page Should Contain  Default categories
-    Page Should Contain  Currently used search engines
-    Page Should Contain  dummy dummy
-    Page Should Contain  general dummy
-
-Switch category
-    Go To  http://localhost:11111/preferences
-    Page Should Contain Checkbox  category_general
-    Page Should Contain Checkbox  category_dummy
-    Click Element  xpath=//*[.="general"]
-    Click Element  xpath=//*[.="dummy"]
-    Submit Preferences
-    Checkbox Should Not Be Selected  category_general
-    Checkbox Should Be Selected  category_dummy
-
-Change language
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    Select From List  locale  hu
-    Submit Preferences
-    Page Should Contain  rólunk
-    Page Should Contain  beállítások
-
-Change method
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    Select From List  method  GET
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  method  GET
-    Select From List  method  POST
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  method  POST
-
-Change theme
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  theme  legacy
-    Select From List  theme  oscar
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  theme  oscar
-
-Change safesearch
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  safesearch  None
-    Select From List  safesearch  Strict
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  safesearch  Strict
-
-Change image proxy
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  image_proxy  Disabled
-    Select From List  image_proxy  Enabled
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  image_proxy  Enabled
-
-Change search language
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  language  Default language
-    Select From List  language  Türkçe - tr-TR
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  language  Türkçe - tr-TR
-
-Change autocomplete
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  autocomplete  -
-    Select From List  autocomplete  google
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  autocomplete  google
-
-Change allowed/disabled engines
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    Page Should Contain  Engine name
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy']  Block
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_general_general_dummy']  Block
-    Click Element  xpath=//label[@class="deny"][@for='engine_general_general_dummy']
-    Submit Preferences
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    Page Should Contain  Engine name
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_dummy_dummy_dummy']  Block
-    Element Should Contain  xpath=//label[@class="deny"][@for='engine_general_general_dummy']  \
-
-Block a plugin
-    Page Should Contain  about
-    Page Should Contain  preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  theme  legacy
-    Select From List  theme  oscar
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    List Selection Should Be  theme  oscar
-    Page Should Contain  Plugins
-    Click Link  Plugins
-    Checkbox Should Not Be Selected  id=plugin_HTTPS_rewrite
-    Click Element  xpath=//label[@for='plugin_HTTPS_rewrite']
-    Submit Preferences
-    Go To  http://localhost:11111/preferences
-    Page Should Contain  Plugins
-    Click Link  Plugins
-    Checkbox Should Be Selected  id=plugin_HTTPS_rewrite

+ 2 - 2
tests/unit/engines/test_archlinux.py

@@ -25,7 +25,7 @@ class TestArchLinuxEngine(SearxTestCase):
         self.assertTrue(query in params['url'])
         self.assertTrue(query in params['url'])
         self.assertTrue('wiki.archlinux.org' in params['url'])
         self.assertTrue('wiki.archlinux.org' in params['url'])
 
 
-        for lang, domain in domains.iteritems():
+        for lang, domain in domains.items():
             dic['language'] = lang
             dic['language'] = lang
             params = archlinux.request(query, dic)
             params = archlinux.request(query, dic)
             self.assertTrue(domain in params['url'])
             self.assertTrue(domain in params['url'])
@@ -102,5 +102,5 @@ class TestArchLinuxEngine(SearxTestCase):
         for exp in expected:
         for exp in expected:
             res = results[i]
             res = results[i]
             i += 1
             i += 1
-            for key, value in exp.iteritems():
+            for key, value in exp.items():
                 self.assertEqual(res[key], value)
                 self.assertEqual(res[key], value)

+ 3 - 3
tests/unit/engines/test_bing.py

@@ -7,18 +7,18 @@ from searx.testing import SearxTestCase
 class TestBingEngine(SearxTestCase):
 class TestBingEngine(SearxTestCase):
 
 
     def test_request(self):
     def test_request(self):
-        query = 'test_query'
+        query = u'test_query'
         dicto = defaultdict(dict)
         dicto = defaultdict(dict)
         dicto['pageno'] = 0
         dicto['pageno'] = 0
         dicto['language'] = 'fr_FR'
         dicto['language'] = 'fr_FR'
-        params = bing.request(query, dicto)
+        params = bing.request(query.encode('utf-8'), dicto)
         self.assertTrue('url' in params)
         self.assertTrue('url' in params)
         self.assertTrue(query in params['url'])
         self.assertTrue(query in params['url'])
         self.assertTrue('language%3AFR' in params['url'])
         self.assertTrue('language%3AFR' in params['url'])
         self.assertTrue('bing.com' in params['url'])
         self.assertTrue('bing.com' in params['url'])
 
 
         dicto['language'] = 'all'
         dicto['language'] = 'all'
-        params = bing.request(query, dicto)
+        params = bing.request(query.encode('utf-8'), dicto)
         self.assertTrue('language' in params['url'])
         self.assertTrue('language' in params['url'])
 
 
     def test_response(self):
     def test_response(self):

+ 6 - 6
tests/unit/engines/test_bing_news.py

@@ -36,10 +36,10 @@ class TestBingNewsEngine(SearxTestCase):
         self.assertRaises(AttributeError, bing_news.response, '')
         self.assertRaises(AttributeError, bing_news.response, '')
         self.assertRaises(AttributeError, bing_news.response, '[]')
         self.assertRaises(AttributeError, bing_news.response, '[]')
 
 
-        response = mock.Mock(content='<html></html>')
+        response = mock.Mock(text='<html></html>')
         self.assertEqual(bing_news.response(response), [])
         self.assertEqual(bing_news.response(response), [])
 
 
-        response = mock.Mock(content='<html></html>')
+        response = mock.Mock(text='<html></html>')
         self.assertEqual(bing_news.response(response), [])
         self.assertEqual(bing_news.response(response), [])
 
 
         html = """<?xml version="1.0" encoding="utf-8" ?>
         html = """<?xml version="1.0" encoding="utf-8" ?>
@@ -74,7 +74,7 @@ class TestBingNewsEngine(SearxTestCase):
         </item>
         </item>
     </channel>
     </channel>
 </rss>"""  # noqa
 </rss>"""  # noqa
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = bing_news.response(response)
         results = bing_news.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 2)
         self.assertEqual(len(results), 2)
@@ -113,7 +113,7 @@ class TestBingNewsEngine(SearxTestCase):
         </item>
         </item>
     </channel>
     </channel>
 </rss>"""  # noqa
 </rss>"""  # noqa
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = bing_news.response(response)
         results = bing_news.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 1)
         self.assertEqual(len(results), 1)
@@ -136,11 +136,11 @@ class TestBingNewsEngine(SearxTestCase):
     </channel>
     </channel>
 </rss>"""  # noqa
 </rss>"""  # noqa
 
 
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = bing_news.response(response)
         results = bing_news.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 0)
         self.assertEqual(len(results), 0)
 
 
         html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
         html = """<?xml version="1.0" encoding="utf-8" ?>gabarge"""
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)
         self.assertRaises(lxml.etree.XMLSyntaxError, bing_news.response, response)

+ 6 - 6
tests/unit/engines/test_btdigg.py

@@ -22,10 +22,10 @@ class TestBtdiggEngine(SearxTestCase):
         self.assertRaises(AttributeError, btdigg.response, '')
         self.assertRaises(AttributeError, btdigg.response, '')
         self.assertRaises(AttributeError, btdigg.response, '[]')
         self.assertRaises(AttributeError, btdigg.response, '[]')
 
 
-        response = mock.Mock(content='<html></html>')
+        response = mock.Mock(text='<html></html>')
         self.assertEqual(btdigg.response(response), [])
         self.assertEqual(btdigg.response(response), [])
 
 
-        html = """
+        html = u"""
         <div id="search_res">
         <div id="search_res">
             <table>
             <table>
                 <tr>
                 <tr>
@@ -82,7 +82,7 @@ class TestBtdiggEngine(SearxTestCase):
             </table>
             </table>
         </div>
         </div>
         """
         """
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = btdigg.response(response)
         results = btdigg.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 1)
         self.assertEqual(len(results), 1)
@@ -101,12 +101,12 @@ class TestBtdiggEngine(SearxTestCase):
             </table>
             </table>
         </div>
         </div>
         """
         """
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = btdigg.response(response)
         results = btdigg.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 0)
         self.assertEqual(len(results), 0)
 
 
-        html = """
+        html = u"""
         <div id="search_res">
         <div id="search_res">
             <table>
             <table>
                 <tr>
                 <tr>
@@ -367,7 +367,7 @@ class TestBtdiggEngine(SearxTestCase):
             </table>
             </table>
         </div>
         </div>
         """
         """
-        response = mock.Mock(content=html)
+        response = mock.Mock(text=html.encode('utf-8'))
         results = btdigg.response(response)
         results = btdigg.response(response)
         self.assertEqual(type(results), list)
         self.assertEqual(type(results), list)
         self.assertEqual(len(results), 5)
         self.assertEqual(len(results), 5)

Some files were not shown because too many files changed in this diff