Browse Source

Drop Python 2 (1/n): remove unicode string and url_utils

Dalf 4 years ago
parent
commit
1022228d95
100 changed files with 303 additions and 448 deletions
  1. 0 5
      Makefile
  2. 2 2
      manage.sh
  3. 2 6
      searx/__init__.py
  4. 2 6
      searx/answerers/__init__.py
  5. 7 13
      searx/answerers/random/answerer.py
  6. 1 4
      searx/answerers/statistics/answerer.py
  7. 7 10
      searx/autocomplete.py
  8. 2 1
      searx/engines/1337x.py
  9. 2 2
      searx/engines/acgsou.py
  10. 2 1
      searx/engines/apkmirror.py
  11. 1 1
      searx/engines/archlinux.py
  12. 2 2
      searx/engines/arxiv.py
  13. 1 1
      searx/engines/base.py
  14. 2 2
      searx/engines/bing.py
  15. 2 2
      searx/engines/bing_images.py
  16. 1 2
      searx/engines/bing_news.py
  17. 1 1
      searx/engines/bing_videos.py
  18. 1 1
      searx/engines/btdigg.py
  19. 1 4
      searx/engines/currency_convert.py
  20. 1 1
      searx/engines/dailymotion.py
  21. 2 2
      searx/engines/deezer.py
  22. 2 1
      searx/engines/deviantart.py
  23. 3 3
      searx/engines/dictzone.py
  24. 1 4
      searx/engines/digbt.py
  25. 1 1
      searx/engines/digg.py
  26. 1 1
      searx/engines/doku.py
  27. 1 1
      searx/engines/duckduckgo.py
  28. 1 1
      searx/engines/duckduckgo_definitions.py
  29. 1 1
      searx/engines/duckduckgo_images.py
  30. 1 1
      searx/engines/duden.py
  31. 1 1
      searx/engines/etools.py
  32. 1 1
      searx/engines/fdroid.py
  33. 2 5
      searx/engines/filecrop.py
  34. 1 1
      searx/engines/flickr.py
  35. 5 5
      searx/engines/flickr_noapi.py
  36. 2 5
      searx/engines/framalibre.py
  37. 1 1
      searx/engines/frinkiac.py
  38. 1 1
      searx/engines/genius.py
  39. 2 2
      searx/engines/gentoo.py
  40. 1 1
      searx/engines/gigablast.py
  41. 1 1
      searx/engines/github.py
  42. 1 1
      searx/engines/google.py
  43. 2 3
      searx/engines/google_images.py
  44. 1 1
      searx/engines/google_news.py
  45. 1 1
      searx/engines/google_videos.py
  46. 2 5
      searx/engines/ina.py
  47. 1 1
      searx/engines/invidious.py
  48. 1 6
      searx/engines/json_engine.py
  49. 1 1
      searx/engines/kickass.py
  50. 2 2
      searx/engines/mediawiki.py
  51. 1 2
      searx/engines/microsoft_academic.py
  52. 1 1
      searx/engines/mixcloud.py
  53. 1 1
      searx/engines/nyaa.py
  54. 4 4
      searx/engines/openstreetmap.py
  55. 1 1
      searx/engines/peertube.py
  56. 1 1
      searx/engines/photon.py
  57. 5 3
      searx/engines/piratebay.py
  58. 1 1
      searx/engines/pubmed.py
  59. 3 3
      searx/engines/qwant.py
  60. 1 1
      searx/engines/reddit.py
  61. 2 2
      searx/engines/scanr_structures.py
  62. 1 1
      searx/engines/searchcode_code.py
  63. 1 1
      searx/engines/searchcode_doc.py
  64. 1 1
      searx/engines/seedpeer.py
  65. 3 6
      searx/engines/soundcloud.py
  66. 4 4
      searx/engines/spotify.py
  67. 1 1
      searx/engines/stackoverflow.py
  68. 1 1
      searx/engines/tokyotoshokan.py
  69. 1 1
      searx/engines/torrentz.py
  70. 4 4
      searx/engines/translated.py
  71. 1 1
      searx/engines/twitter.py
  72. 1 1
      searx/engines/unsplash.py
  73. 1 1
      searx/engines/vimeo.py
  74. 7 7
      searx/engines/wikidata.py
  75. 2 2
      searx/engines/wikipedia.py
  76. 10 10
      searx/engines/wolframalpha_api.py
  77. 1 1
      searx/engines/wolframalpha_noapi.py
  78. 1 1
      searx/engines/www1x.py
  79. 3 3
      searx/engines/xpath.py
  80. 1 1
      searx/engines/yacy.py
  81. 1 1
      searx/engines/yahoo.py
  82. 2 2
      searx/engines/yahoo_news.py
  83. 1 1
      searx/engines/yandex.py
  84. 1 1
      searx/engines/yggtorrent.py
  85. 1 1
      searx/engines/youtube_api.py
  86. 1 1
      searx/engines/youtube_noapi.py
  87. 1 1
      searx/external_bang.py
  88. 69 69
      searx/languages.py
  89. 2 5
      searx/plugins/__init__.py
  90. 1 4
      searx/plugins/https_rewrite.py
  91. 1 1
      searx/plugins/oa_doi_rewrite.py
  92. 1 1
      searx/plugins/tracker_url_remover.py
  93. 4 9
      searx/preferences.py
  94. 5 7
      searx/query.py
  95. 5 8
      searx/results.py
  96. 4 11
      searx/search.py
  97. 2 2
      searx/testing.py
  98. 0 30
      searx/url_utils.py
  99. 25 52
      searx/utils.py
  100. 26 45
      searx/webapp.py

+ 0 - 5
Makefile

@@ -213,10 +213,6 @@ gecko.driver:
 PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot
 PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot
 test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot
 test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot
 
 
-ifeq ($(PY),2)
-test.pylint:
-	@echo "LINT      skip liniting py2"
-else
 # TODO: balance linting with pylint
 # TODO: balance linting with pylint
 
 
 test.pylint: pyenvinstall
 test.pylint: pyenvinstall
@@ -225,7 +221,6 @@ test.pylint: pyenvinstall
 		searx/testing.py \
 		searx/testing.py \
 		searx/engines/gigablast.py \
 		searx/engines/gigablast.py \
 	)
 	)
-endif
 
 
 # ignored rules:
 # ignored rules:
 #  E402 module level import not at top of file
 #  E402 module level import not at top of file

+ 2 - 2
manage.sh

@@ -39,7 +39,7 @@ install_geckodriver() {
         return
         return
     fi
     fi
     GECKODRIVER_VERSION="v0.24.0"
     GECKODRIVER_VERSION="v0.24.0"
-    PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`"
+    PLATFORM="`python3 -c "import platform; print(platform.system().lower(), platform.architecture()[0])"`"
     case "$PLATFORM" in
     case "$PLATFORM" in
         "linux 32bit" | "linux2 32bit") ARCH="linux32";;
         "linux 32bit" | "linux2 32bit") ARCH="linux32";;
         "linux 64bit" | "linux2 64bit") ARCH="linux64";;
         "linux 64bit" | "linux2 64bit") ARCH="linux64";;
@@ -136,7 +136,7 @@ docker_build() {
     # Check consistency between the git tag and the searx/version.py file
     # Check consistency between the git tag and the searx/version.py file
     # /!\ HACK : parse Python file with bash /!\
     # /!\ HACK : parse Python file with bash /!\
     # otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py )
     # otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py )
-    # SEARX_PYTHON_VERSION=$(python -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)")
+    # SEARX_PYTHON_VERSION=$(python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)")
     SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -)
     SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -)
     if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then
     if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then
 	echo "Inconsistency between the last git tag and the searx/version.py file"
 	echo "Inconsistency between the last git tag and the searx/version.py file"

+ 2 - 6
searx/__init__.py

@@ -21,12 +21,8 @@ from os import environ
 from os.path import realpath, dirname, join, abspath, isfile
 from os.path import realpath, dirname, join, abspath, isfile
 from io import open
 from io import open
 from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION
 from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION
-try:
-    from yaml import safe_load
-except:
-    from sys import exit, stderr
-    stderr.write('[E] install pyyaml\n')
-    exit(2)
+from yaml import safe_load
+
 
 
 searx_dir = abspath(dirname(__file__))
 searx_dir = abspath(dirname(__file__))
 engine_dir = dirname(realpath(__file__))
 engine_dir = dirname(realpath(__file__))

+ 2 - 6
searx/answerers/__init__.py

@@ -1,12 +1,8 @@
 from os import listdir
 from os import listdir
 from os.path import realpath, dirname, join, isdir
 from os.path import realpath, dirname, join, isdir
-from sys import version_info
 from searx.utils import load_module
 from searx.utils import load_module
 from collections import defaultdict
 from collections import defaultdict
 
 
-if version_info[0] == 3:
-    unicode = str
-
 
 
 answerers_dir = dirname(realpath(__file__))
 answerers_dir = dirname(realpath(__file__))
 
 
@@ -36,10 +32,10 @@ def ask(query):
     results = []
     results = []
     query_parts = list(filter(None, query.query.split()))
     query_parts = list(filter(None, query.query.split()))
 
 
-    if query_parts[0].decode('utf-8') not in answerers_by_keywords:
+    if query_parts[0].decode() not in answerers_by_keywords:
         return results
         return results
 
 
-    for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
+    for answerer in answerers_by_keywords[query_parts[0].decode()]:
         result = answerer(query)
         result = answerer(query)
         if result:
         if result:
             results.append(result)
             results.append(result)

+ 7 - 13
searx/answerers/random/answerer.py

@@ -1,7 +1,6 @@
 import hashlib
 import hashlib
 import random
 import random
 import string
 import string
-import sys
 import uuid
 import uuid
 from flask_babel import gettext
 from flask_babel import gettext
 
 
@@ -10,12 +9,7 @@ from flask_babel import gettext
 keywords = ('random',)
 keywords = ('random',)
 
 
 random_int_max = 2**31
 random_int_max = 2**31
-
-if sys.version_info[0] == 2:
-    random_string_letters = string.lowercase + string.digits + string.uppercase
-else:
-    unicode = str
-    random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
+random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
 
 
 
 
 def random_characters():
 def random_characters():
@@ -24,25 +18,25 @@ def random_characters():
 
 
 
 
 def random_string():
 def random_string():
-    return u''.join(random_characters())
+    return ''.join(random_characters())
 
 
 
 
 def random_float():
 def random_float():
-    return unicode(random.random())
+    return str(random.random())
 
 
 
 
 def random_int():
 def random_int():
-    return unicode(random.randint(-random_int_max, random_int_max))
+    return str(random.randint(-random_int_max, random_int_max))
 
 
 
 
 def random_sha256():
 def random_sha256():
     m = hashlib.sha256()
     m = hashlib.sha256()
     m.update(''.join(random_characters()).encode())
     m.update(''.join(random_characters()).encode())
-    return unicode(m.hexdigest())
+    return str(m.hexdigest())
 
 
 
 
 def random_uuid():
 def random_uuid():
-    return unicode(uuid.uuid4())
+    return str(uuid.uuid4())
 
 
 
 
 random_types = {b'string': random_string,
 random_types = {b'string': random_string,
@@ -70,4 +64,4 @@ def answer(query):
 def self_info():
 def self_info():
     return {'name': gettext('Random value generator'),
     return {'name': gettext('Random value generator'),
             'description': gettext('Generate different random values'),
             'description': gettext('Generate different random values'),
-            'examples': [u'random {}'.format(x.decode('utf-8')) for x in random_types]}
+            'examples': ['random {}'.format(x.decode()) for x in random_types]}

+ 1 - 4
searx/answerers/statistics/answerer.py

@@ -1,11 +1,8 @@
-from sys import version_info
 from functools import reduce
 from functools import reduce
 from operator import mul
 from operator import mul
 
 
 from flask_babel import gettext
 from flask_babel import gettext
 
 
-if version_info[0] == 3:
-    unicode = str
 
 
 keywords = ('min',
 keywords = ('min',
             'max',
             'max',
@@ -44,7 +41,7 @@ def answer(query):
     if answer is None:
     if answer is None:
         return []
         return []
 
 
-    return [{'answer': unicode(answer)}]
+    return [{'answer': str(answer)}]
 
 
 
 
 # required answerer function
 # required answerer function

+ 7 - 10
searx/autocomplete.py

@@ -16,19 +16,16 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 '''
 '''
 
 
 
 
-import sys
 from lxml import etree
 from lxml import etree
 from json import loads
 from json import loads
+from urllib.parse import urlencode
+
 from searx import settings
 from searx import settings
 from searx.languages import language_codes
 from searx.languages import language_codes
 from searx.engines import (
 from searx.engines import (
     categories, engines, engine_shortcuts
     categories, engines, engine_shortcuts
 )
 )
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
-from searx.url_utils import urlencode
-
-if sys.version_info[0] == 3:
-    unicode = str
 
 
 
 
 def get(*args, **kwargs):
 def get(*args, **kwargs):
@@ -85,22 +82,22 @@ def searx_bang(full_query):
             engine_query = full_query.getSearchQuery()[1:]
             engine_query = full_query.getSearchQuery()[1:]
 
 
             for lc in language_codes:
             for lc in language_codes:
-                lang_id, lang_name, country, english_name = map(unicode.lower, lc)
+                lang_id, lang_name, country, english_name = map(str.lower, lc)
 
 
                 # check if query starts with language-id
                 # check if query starts with language-id
                 if lang_id.startswith(engine_query):
                 if lang_id.startswith(engine_query):
                     if len(engine_query) <= 2:
                     if len(engine_query) <= 2:
-                        results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
+                        results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
                     else:
                     else:
-                        results.append(u':{lang_id}'.format(lang_id=lang_id))
+                        results.append(':{lang_id}'.format(lang_id=lang_id))
 
 
                 # check if query starts with language name
                 # check if query starts with language name
                 if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
                 if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
-                    results.append(u':{lang_name}'.format(lang_name=lang_name))
+                    results.append(':{lang_name}'.format(lang_name=lang_name))
 
 
                 # check if query starts with country
                 # check if query starts with country
                 if country.startswith(engine_query.replace('_', ' ')):
                 if country.startswith(engine_query.replace('_', ' ')):
-                    results.append(u':{country}'.format(country=country.replace(' ', '_')))
+                    results.append(':{country}'.format(country=country.replace(' ', '_')))
 
 
     # remove duplicates
     # remove duplicates
     result_set = set(results)
     result_set = set(results)

+ 2 - 1
searx/engines/1337x.py

@@ -1,7 +1,8 @@
+from urllib.parse import quote, urljoin
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
-from searx.url_utils import quote, urljoin
+
 
 
 url = 'https://1337x.to/'
 url = 'https://1337x.to/'
 search_url = url + 'search/{search_term}/{pageno}/'
 search_url = url + 'search/{search_term}/{pageno}/'

+ 2 - 2
searx/engines/acgsou.py

@@ -9,9 +9,9 @@
  @parse        url, title, content, seed, leech, torrentfile
  @parse        url, title, content, seed, leech, torrentfile
 """
 """
 
 
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 from searx.utils import get_torrent_size, int_or_zero
 from searx.utils import get_torrent_size, int_or_zero
 
 
 # engine dependent config
 # engine dependent config
@@ -63,7 +63,7 @@ def response(resp):
         except:
         except:
             pass
             pass
         # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
         # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
-        content = u'Category: "{category}".'
+        content = 'Category: "{category}".'
         content = content.format(category=category)
         content = content.format(category=category)
 
 
         results.append({'url': href,
         results.append({'url': href,

+ 2 - 1
searx/engines/apkmirror.py

@@ -9,9 +9,10 @@
  @parse       url, title, thumbnail_src
  @parse       url, title, thumbnail_src
 """
 """
 
 
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
+
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 1 - 1
searx/engines/archlinux.py

@@ -11,9 +11,9 @@
  @parse        url, title
  @parse        url, title
 """
 """
 
 
+from urllib.parse import urlencode, urljoin
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 2 - 2
searx/engines/arxiv.py

@@ -11,9 +11,9 @@
  More info on api: https://arxiv.org/help/api/user-manual
  More info on api: https://arxiv.org/help/api/user-manual
 """
 """
 
 
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from datetime import datetime
 from datetime import datetime
-from searx.url_utils import urlencode
 
 
 
 
 categories = ['science']
 categories = ['science']
@@ -30,7 +30,7 @@ def request(query, params):
     # basic search
     # basic search
     offset = (params['pageno'] - 1) * number_of_results
     offset = (params['pageno'] - 1) * number_of_results
 
 
-    string_args = dict(query=query.decode('utf-8'),
+    string_args = dict(query=query.decode(),
                        offset=offset,
                        offset=offset,
                        number_of_results=number_of_results)
                        number_of_results=number_of_results)
 
 

+ 1 - 1
searx/engines/base.py

@@ -13,10 +13,10 @@
  More info on api: http://base-search.net/about/download/base_interface.pdf
  More info on api: http://base-search.net/about/download/base_interface.pdf
 """
 """
 
 
+from urllib.parse import urlencode
 from lxml import etree
 from lxml import etree
 from datetime import datetime
 from datetime import datetime
 import re
 import re
-from searx.url_utils import urlencode
 from searx.utils import searx_useragent
 from searx.utils import searx_useragent
 
 
 
 

+ 2 - 2
searx/engines/bing.py

@@ -14,10 +14,10 @@
 """
 """
 
 
 import re
 import re
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx import logger, utils
 from searx import logger, utils
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 from searx.utils import match_language, gen_useragent, eval_xpath
 from searx.utils import match_language, gen_useragent, eval_xpath
 
 
 logger = logger.getChild('bing engine')
 logger = logger.getChild('bing engine')
@@ -47,7 +47,7 @@ def request(query, params):
     else:
     else:
         lang = match_language(params['language'], supported_languages, language_aliases)
         lang = match_language(params['language'], supported_languages, language_aliases)
 
 
-    query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
+    query = 'language:{} {}'.format(lang.split('-')[0].upper(), query.decode()).encode()
 
 
     search_path = search_string.format(
     search_path = search_string.format(
         query=urlencode({'q': query}),
         query=urlencode({'q': query}),

+ 2 - 2
searx/engines/bing_images.py

@@ -12,10 +12,10 @@
 
 
 """
 """
 
 
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from json import loads
 from json import loads
 import re
 import re
-from searx.url_utils import urlencode
 from searx.utils import match_language
 from searx.utils import match_language
 
 
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
@@ -91,7 +91,7 @@ def response(resp):
 
 
             # strip 'Unicode private use area' highlighting, they render to Tux
             # strip 'Unicode private use area' highlighting, they render to Tux
             # the Linux penguin and a standing diamond on my machine...
             # the Linux penguin and a standing diamond on my machine...
-            title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '')
+            title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
             results.append({'template': 'images.html',
             results.append({'template': 'images.html',
                             'url': m['purl'],
                             'url': m['purl'],
                             'thumbnail_src': m['turl'],
                             'thumbnail_src': m['turl'],

+ 1 - 2
searx/engines/bing_news.py

@@ -13,10 +13,9 @@
 
 
 from datetime import datetime
 from datetime import datetime
 from dateutil import parser
 from dateutil import parser
+from urllib.parse import urlencode, urlparse, parse_qsl
 from lxml import etree
 from lxml import etree
 from searx.utils import list_get, match_language
 from searx.utils import list_get, match_language
-from searx.url_utils import urlencode, urlparse, parse_qsl
-
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 
 
 # engine dependent config
 # engine dependent config

+ 1 - 1
searx/engines/bing_videos.py

@@ -12,7 +12,7 @@
 
 
 from json import loads
 from json import loads
 from lxml import html
 from lxml import html
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import match_language
 from searx.utils import match_language
 
 
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases

+ 1 - 1
searx/engines/btdigg.py

@@ -12,8 +12,8 @@
 
 
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
+from urllib.parse import quote, urljoin
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import quote, urljoin
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
 
 
 # engine dependent config
 # engine dependent config

+ 1 - 4
searx/engines/currency_convert.py

@@ -1,14 +1,11 @@
 import json
 import json
 import re
 import re
 import os
 import os
-import sys
 import unicodedata
 import unicodedata
 
 
 from io import open
 from io import open
 from datetime import datetime
 from datetime import datetime
 
 
-if sys.version_info[0] == 3:
-    unicode = str
 
 
 categories = []
 categories = []
 url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
 url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
@@ -20,7 +17,7 @@ db = 1
 
 
 
 
 def normalize_name(name):
 def normalize_name(name):
-    name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
+    name = name.decode().lower().replace('-', ' ').rstrip('s')
     name = re.sub(' +', ' ', name)
     name = re.sub(' +', ' ', name)
     return unicodedata.normalize('NFKD', name).lower()
     return unicodedata.normalize('NFKD', name).lower()
 
 

+ 1 - 1
searx/engines/dailymotion.py

@@ -14,7 +14,7 @@
 
 
 from json import loads
 from json import loads
 from datetime import datetime
 from datetime import datetime
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import match_language, html_to_text
 from searx.utils import match_language, html_to_text
 
 
 # engine dependent config
 # engine dependent config

+ 2 - 2
searx/engines/deezer.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']
@@ -50,7 +50,7 @@ def response(resp):
             if url.startswith('http://'):
             if url.startswith('http://'):
                 url = 'https' + url[4:]
                 url = 'https' + url[4:]
 
 
-            content = u'{} - {} - {}'.format(
+            content = '{} - {} - {}'.format(
                 result['artist']['name'],
                 result['artist']['name'],
                 result['album']['title'],
                 result['album']['title'],
                 result['title'])
                 result['title'])

+ 2 - 1
searx/engines/deviantart.py

@@ -14,8 +14,9 @@
 
 
 from lxml import html
 from lxml import html
 import re
 import re
+from urllib.parse import urlencode
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
+
 
 
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']

+ 3 - 3
searx/engines/dictzone.py

@@ -10,12 +10,12 @@
 """
 """
 
 
 import re
 import re
+from urllib.parse import urljoin
 from lxml import html
 from lxml import html
 from searx.utils import is_valid_lang, eval_xpath
 from searx.utils import is_valid_lang, eval_xpath
-from searx.url_utils import urljoin
 
 
 categories = ['general']
 categories = ['general']
-url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
 weight = 100
 weight = 100
 
 
 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
@@ -37,7 +37,7 @@ def request(query, params):
 
 
     params['url'] = url.format(from_lang=from_lang[2],
     params['url'] = url.format(from_lang=from_lang[2],
                                to_lang=to_lang[2],
                                to_lang=to_lang[2],
-                               query=query.decode('utf-8'))
+                               query=query.decode())
 
 
     return params
     return params
 
 

+ 1 - 4
searx/engines/digbt.py

@@ -10,14 +10,11 @@
  @parse       url, title, content, magnetlink
  @parse       url, title, content, magnetlink
 """
 """
 
 
-from sys import version_info
+from urllib.parse import urljoin
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
-from searx.url_utils import urljoin
 
 
-if version_info[0] == 3:
-    unicode = str
 
 
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']
 paging = True
 paging = True

+ 1 - 1
searx/engines/digg.py

@@ -14,8 +14,8 @@ import random
 import string
 import string
 from dateutil import parser
 from dateutil import parser
 from json import loads
 from json import loads
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
-from searx.url_utils import urlencode
 from datetime import datetime
 from datetime import datetime
 
 
 # engine dependent config
 # engine dependent config

+ 1 - 1
searx/engines/doku.py

@@ -9,10 +9,10 @@
 # @stable      yes
 # @stable      yes
 # @parse       (general)    url, title, content
 # @parse       (general)    url, title, content
 
 
+from urllib.parse import urlencode
 from lxml.html import fromstring
 from lxml.html import fromstring
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import eval_xpath
 from searx.utils import eval_xpath
-from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'

+ 1 - 1
searx/engines/duckduckgo.py

@@ -15,9 +15,9 @@
 
 
 from lxml.html import fromstring
 from lxml.html import fromstring
 from json import loads
 from json import loads
+from urllib.parse import urlencode
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.poolrequests import get
 from searx.poolrequests import get
-from searx.url_utils import urlencode
 from searx.utils import match_language, eval_xpath
 from searx.utils import match_language, eval_xpath
 
 
 # engine dependent config
 # engine dependent config

+ 1 - 1
searx/engines/duckduckgo_definitions.py

@@ -10,11 +10,11 @@ DuckDuckGo (definitions)
 """
 """
 
 
 import json
 import json
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from re import compile
 from re import compile
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
-from searx.url_utils import urlencode
 from searx.utils import html_to_text, match_language
 from searx.utils import html_to_text, match_language
 
 
 url = 'https://api.duckduckgo.com/'\
 url = 'https://api.duckduckgo.com/'\

+ 1 - 1
searx/engines/duckduckgo_images.py

@@ -14,13 +14,13 @@
 """
 """
 
 
 from json import loads
 from json import loads
+from urllib.parse import urlencode
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.engines.duckduckgo import (
 from searx.engines.duckduckgo import (
     _fetch_supported_languages, supported_languages_url,
     _fetch_supported_languages, supported_languages_url,
     get_region_code, language_aliases
     get_region_code, language_aliases
 )
 )
 from searx.poolrequests import get
 from searx.poolrequests import get
-from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']

+ 1 - 1
searx/engines/duden.py

@@ -10,9 +10,9 @@
 
 
 from lxml import html, etree
 from lxml import html, etree
 import re
 import re
+from urllib.parse import quote, urljoin
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import eval_xpath
 from searx.utils import eval_xpath
-from searx.url_utils import quote, urljoin
 from searx import logger
 from searx import logger
 
 
 categories = ['general']
 categories = ['general']

+ 1 - 1
searx/engines/etools.py

@@ -10,8 +10,8 @@
 """
 """
 
 
 from lxml import html
 from lxml import html
+from urllib.parse import quote
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import quote
 from searx.utils import eval_xpath
 from searx.utils import eval_xpath
 
 
 categories = ['general']
 categories = ['general']

+ 1 - 1
searx/engines/fdroid.py

@@ -9,9 +9,9 @@
  @parse        url, title, content
  @parse        url, title, content
 """
 """
 
 
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['files']
 categories = ['files']

+ 2 - 5
searx/engines/filecrop.py

@@ -1,9 +1,6 @@
-from searx.url_utils import urlencode
+from html.parser import HTMLParser
+from urllib.parse import urlencode
 
 
-try:
-    from HTMLParser import HTMLParser
-except:
-    from html.parser import HTMLParser
 
 
 url = 'http://www.filecrop.com/'
 url = 'http://www.filecrop.com/'
 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa
 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa

+ 1 - 1
searx/engines/flickr.py

@@ -14,7 +14,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 categories = ['images']
 categories = ['images']
 
 

+ 5 - 5
searx/engines/flickr_noapi.py

@@ -15,8 +15,8 @@
 from json import loads
 from json import loads
 from time import time
 from time import time
 import re
 import re
+from urllib.parse import urlencode
 from searx.engines import logger
 from searx.engines import logger
-from searx.url_utils import urlencode
 from searx.utils import ecma_unescape, html_to_text
 from searx.utils import ecma_unescape, html_to_text
 
 
 logger = logger.getChild('flickr-noapi')
 logger = logger.getChild('flickr-noapi')
@@ -117,10 +117,10 @@ def response(resp):
             'img_format': img_format,
             'img_format': img_format,
             'template': 'images.html'
             'template': 'images.html'
         }
         }
-        result['author'] = author.encode('utf-8', 'ignore').decode('utf-8')
-        result['source'] = source.encode('utf-8', 'ignore').decode('utf-8')
-        result['title'] = title.encode('utf-8', 'ignore').decode('utf-8')
-        result['content'] = content.encode('utf-8', 'ignore').decode('utf-8')
+        result['author'] = author.encode(errors='ignore').decode()
+        result['source'] = source.encode(errors='ignore').decode()
+        result['title'] = title.encode(errors='ignore').decode()
+        result['content'] = content.encode(errors='ignore').decode()
         results.append(result)
         results.append(result)
 
 
     return results
     return results

+ 2 - 5
searx/engines/framalibre.py

@@ -10,13 +10,10 @@
  @parse       url, title, content, thumbnail, img_src
  @parse       url, title, content, thumbnail, img_src
 """
 """
 
 
-try:
-    from cgi import escape
-except:
-    from html import escape
+from html import escape
+from urllib.parse import urljoin, urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urljoin, urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 1 - 1
searx/engines/frinkiac.py

@@ -10,7 +10,7 @@ Frinkiac (Images)
 """
 """
 
 
 from json import loads
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 categories = ['images']
 categories = ['images']
 
 

+ 1 - 1
searx/engines/genius.py

@@ -11,7 +11,7 @@ Genius
 """
 """
 
 
 from json import loads
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from datetime import datetime
 from datetime import datetime
 
 
 # engine dependent config
 # engine dependent config

+ 2 - 2
searx/engines/gentoo.py

@@ -11,9 +11,9 @@
  @parse        url, title
  @parse        url, title
 """
 """
 
 
+from urllib.parse import urlencode, urljoin
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']
@@ -90,7 +90,7 @@ def request(query, params):
     # if our language is hosted on the main site, we need to add its name
     # if our language is hosted on the main site, we need to add its name
     # to the query in order to narrow the results to that language
     # to the query in order to narrow the results to that language
     if language in main_langs:
     if language in main_langs:
-        query += b' (' + (main_langs[language]).encode('utf-8') + b')'
+        query += b' (' + (main_langs[language]).encode() + b')'
 
 
     # prepare the request parameters
     # prepare the request parameters
     query = urlencode({'search': query})
     query = urlencode({'search': query})

+ 1 - 1
searx/engines/gigablast.py

@@ -14,8 +14,8 @@
 
 
 import re
 import re
 from json import loads
 from json import loads
+from urllib.parse import urlencode
 # from searx import logger
 # from searx import logger
-from searx.url_utils import urlencode
 from searx.poolrequests import get
 from searx.poolrequests import get
 
 
 # engine dependent config
 # engine dependent config

+ 1 - 1
searx/engines/github.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 1 - 1
searx/engines/google.py

@@ -18,11 +18,11 @@ Definitions`_.
 
 
 # pylint: disable=invalid-name, missing-function-docstring
 # pylint: disable=invalid-name, missing-function-docstring
 
 
+from urllib.parse import urlencode, urlparse
 from lxml import html
 from lxml import html
 from flask_babel import gettext
 from flask_babel import gettext
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx import logger
 from searx import logger
-from searx.url_utils import urlencode, urlparse
 from searx.utils import match_language, eval_xpath
 from searx.utils import match_language, eval_xpath
 
 
 logger = logger.getChild('google engine')
 logger = logger.getChild('google engine')

+ 2 - 3
searx/engines/google_images.py

@@ -24,11 +24,10 @@ Definitions`_.
 
 
 """
 """
 
 
-import urllib
+from urllib.parse import urlencode, urlparse, unquote
 from lxml import html
 from lxml import html
 from flask_babel import gettext
 from flask_babel import gettext
 from searx import logger
 from searx import logger
-from searx.url_utils import urlencode, urlparse
 from searx.utils import eval_xpath
 from searx.utils import eval_xpath
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 
 
@@ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id):
         if 'gstatic.com/images' in line and data_id in line:
         if 'gstatic.com/images' in line and data_id in line:
             url_line = _script[i + 1]
             url_line = _script[i + 1]
             img_url = url_line.split('"')[1]
             img_url = url_line.split('"')[1]
-            img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%'))
+            img_url = unquote(img_url.replace(r'\u00', r'%'))
     return img_url
     return img_url
 
 
 
 

+ 1 - 1
searx/engines/google_news.py

@@ -10,9 +10,9 @@
  @parse       url, title, content, publishedDate
  @parse       url, title, content, publishedDate
 """
 """
 
 
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.google import _fetch_supported_languages, supported_languages_url
 from searx.engines.google import _fetch_supported_languages, supported_languages_url
-from searx.url_utils import urlencode
 from searx.utils import match_language
 from searx.utils import match_language
 
 
 # search-url
 # search-url

+ 1 - 1
searx/engines/google_videos.py

@@ -12,9 +12,9 @@
 
 
 from datetime import date, timedelta
 from datetime import date, timedelta
 from json import loads
 from json import loads
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 import re
 import re
 
 
 # engine dependent config
 # engine dependent config

+ 2 - 5
searx/engines/ina.py

@@ -12,15 +12,12 @@
 # @todo        embedded (needs some md5 from video page)
 # @todo        embedded (needs some md5 from video page)
 
 
 from json import loads
 from json import loads
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from dateutil import parser
 from dateutil import parser
+from html.parser import HTMLParser
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 
 
-try:
-    from HTMLParser import HTMLParser
-except:
-    from html.parser import HTMLParser
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']

+ 1 - 1
searx/engines/invidious.py

@@ -8,7 +8,7 @@
 # @stable      yes
 # @stable      yes
 # @parse       url, title, content, publishedDate, thumbnail, embedded, author, length
 # @parse       url, title, content, publishedDate, thumbnail, embedded, author, length
 
 
-from searx.url_utils import quote_plus
+from urllib.parse import quote_plus
 from dateutil import parser
 from dateutil import parser
 import time
 import time
 
 

+ 1 - 6
searx/engines/json_engine.py

@@ -1,11 +1,8 @@
 from collections import Iterable
 from collections import Iterable
 from json import loads
 from json import loads
-from sys import version_info
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import to_string
 from searx.utils import to_string
 
 
-if version_info[0] == 3:
-    unicode = str
 
 
 search_url = None
 search_url = None
 url_query = None
 url_query = None
@@ -37,8 +34,6 @@ def iterate(iterable):
 def is_iterable(obj):
 def is_iterable(obj):
     if type(obj) == str:
     if type(obj) == str:
         return False
         return False
-    if type(obj) == unicode:
-        return False
     return isinstance(obj, Iterable)
     return isinstance(obj, Iterable)
 
 
 
 

+ 1 - 1
searx/engines/kickass.py

@@ -12,9 +12,9 @@
 
 
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
+from urllib.parse import quote, urljoin
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size, convert_str_to_int
 from searx.utils import get_torrent_size, convert_str_to_int
-from searx.url_utils import quote, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']

+ 2 - 2
searx/engines/mediawiki.py

@@ -14,7 +14,7 @@
 
 
 from json import loads
 from json import loads
 from string import Formatter
 from string import Formatter
-from searx.url_utils import urlencode, quote
+from urllib.parse import urlencode, quote
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
@@ -79,7 +79,7 @@ def response(resp):
         if result.get('snippet', '').startswith('#REDIRECT'):
         if result.get('snippet', '').startswith('#REDIRECT'):
             continue
             continue
         url = base_url.format(language=resp.search_params['language']) +\
         url = base_url.format(language=resp.search_params['language']) +\
-            'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8'))
+            'wiki/' + quote(result['title'].replace(' ', '_').encode())
 
 
         # append result
         # append result
         results.append({'url': url,
         results.append({'url': url,

+ 1 - 2
searx/engines/microsoft_academic.py

@@ -12,8 +12,7 @@ Microsoft Academic (Science)
 from datetime import datetime
 from datetime import datetime
 from json import loads
 from json import loads
 from uuid import uuid4
 from uuid import uuid4
-
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 
 categories = ['images']
 categories = ['images']

+ 1 - 1
searx/engines/mixcloud.py

@@ -12,7 +12,7 @@
 
 
 from json import loads
 from json import loads
 from dateutil import parser
 from dateutil import parser
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']

+ 1 - 1
searx/engines/nyaa.py

@@ -10,8 +10,8 @@
 """
 """
 
 
 from lxml import html
 from lxml import html
+from urllib.parse import urlencode
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 from searx.utils import get_torrent_size, int_or_zero
 from searx.utils import get_torrent_size, int_or_zero
 
 
 # engine dependent config
 # engine dependent config

+ 4 - 4
searx/engines/openstreetmap.py

@@ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)')
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):
 
 
-    params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
-    params['route'] = route_re.match(query.decode('utf-8'))
+    params['url'] = base_url + search_string.format(query=query.decode())
+    params['route'] = route_re.match(query.decode())
 
 
     return params
     return params
 
 
@@ -52,7 +52,7 @@ def response(resp):
         if 'display_name' not in r:
         if 'display_name' not in r:
             continue
             continue
 
 
-        title = r['display_name'] or u''
+        title = r['display_name'] or ''
         osm_type = r.get('osm_type', r.get('type'))
         osm_type = r.get('osm_type', r.get('type'))
         url = result_base_url.format(osm_type=osm_type,
         url = result_base_url.format(osm_type=osm_type,
                                      osm_id=r['osm_id'])
                                      osm_id=r['osm_id'])
@@ -64,7 +64,7 @@ def response(resp):
 
 
         # if no geojson is found and osm_type is a node, add geojson Point
         # if no geojson is found and osm_type is a node, add geojson Point
         if not geojson and osm_type == 'node':
         if not geojson and osm_type == 'node':
-            geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
+            geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]}
 
 
         address_raw = r.get('address')
         address_raw = r.get('address')
         address = {}
         address = {}

+ 1 - 1
searx/engines/peertube.py

@@ -14,7 +14,7 @@
 
 
 from json import loads
 from json import loads
 from datetime import datetime
 from datetime import datetime
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 
 # engine dependent config
 # engine dependent config

+ 1 - 1
searx/engines/photon.py

@@ -11,8 +11,8 @@
 """
 """
 
 
 from json import loads
 from json import loads
+from urllib.parse import urlencode
 from searx.utils import searx_useragent
 from searx.utils import searx_useragent
-from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['map']
 categories = ['map']

+ 5 - 3
searx/engines/piratebay.py

@@ -11,7 +11,9 @@
 from json import loads
 from json import loads
 from datetime import datetime
 from datetime import datetime
 from operator import itemgetter
 from operator import itemgetter
-from searx.url_utils import quote
+
+from urllib.parse import quote, urljoin
+from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
 
 
 # engine dependent config
 # engine dependent config
@@ -62,8 +64,8 @@ def response(resp):
     # parse results
     # parse results
     for result in search_res:
     for result in search_res:
         link = url + "description.php?id=" + result["id"]
         link = url + "description.php?id=" + result["id"]
-        magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \
-            "&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers)
+        magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\
+                     + "&tr=" + "&tr=".join(trackers)
 
 
         params = {
         params = {
             "url": link,
             "url": link,

+ 1 - 1
searx/engines/pubmed.py

@@ -14,7 +14,7 @@
 from flask_babel import gettext
 from flask_babel import gettext
 from lxml import etree
 from lxml import etree
 from datetime import datetime
 from datetime import datetime
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.poolrequests import get
 from searx.poolrequests import get
 
 
 
 

+ 3 - 3
searx/engines/qwant.py

@@ -12,9 +12,9 @@
 
 
 from datetime import datetime
 from datetime import datetime
 from json import loads
 from json import loads
-from searx.utils import html_to_text
-from searx.url_utils import urlencode
-from searx.utils import match_language
+from urllib.parse import urlencode
+from searx.utils import html_to_text, match_language
+
 
 
 # engine dependent config
 # engine dependent config
 categories = None
 categories = None

+ 1 - 1
searx/engines/reddit.py

@@ -12,7 +12,7 @@
 
 
 import json
 import json
 from datetime import datetime
 from datetime import datetime
-from searx.url_utils import urlencode, urljoin, urlparse
+from urllib.parse import urlencode, urljoin, urlparse
 
 
 # engine dependent config
 # engine dependent config
 categories = ['general', 'images', 'news', 'social media']
 categories = ['general', 'images', 'news', 'social media']

+ 2 - 2
searx/engines/scanr_structures.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from json import loads, dumps
 from json import loads, dumps
-from searx.utils import html_to_text
+from urllib.parse import html_to_text
 
 
 # engine dependent config
 # engine dependent config
 categories = ['science']
 categories = ['science']
@@ -29,7 +29,7 @@ def request(query, params):
     params['url'] = search_url
     params['url'] = search_url
     params['method'] = 'POST'
     params['method'] = 'POST'
     params['headers']['Content-type'] = "application/json"
     params['headers']['Content-type'] = "application/json"
-    params['data'] = dumps({"query": query.decode('utf-8'),
+    params['data'] = dumps({"query": query.decode(),
                             "searchField": "ALL",
                             "searchField": "ALL",
                             "sortDirection": "ASC",
                             "sortDirection": "ASC",
                             "sortOrder": "RELEVANCY",
                             "sortOrder": "RELEVANCY",

+ 1 - 1
searx/engines/searchcode_code.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 
 
 # engine dependent config
 # engine dependent config

+ 1 - 1
searx/engines/searchcode_doc.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 1 - 1
searx/engines/seedpeer.py

@@ -11,7 +11,7 @@
 from lxml import html
 from lxml import html
 from json import loads
 from json import loads
 from operator import itemgetter
 from operator import itemgetter
-from searx.url_utils import quote, urljoin
+from urllib.parse import quote, urljoin
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 
 
 
 

+ 3 - 6
searx/engines/soundcloud.py

@@ -14,14 +14,11 @@ import re
 from json import loads
 from json import loads
 from lxml import html
 from lxml import html
 from dateutil import parser
 from dateutil import parser
+from io import StringIO
+from urllib.parse import quote_plus, urlencode
 from searx import logger
 from searx import logger
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
-from searx.url_utils import quote_plus, urlencode
 
 
-try:
-    from cStringIO import StringIO
-except:
-    from io import StringIO
 
 
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']
@@ -61,7 +58,7 @@ def get_client_id():
             # gets app_js and searches for the clientid
             # gets app_js and searches for the clientid
             response = http_get(app_js_url)
             response = http_get(app_js_url)
             if response.ok:
             if response.ok:
-                cids = cid_re.search(response.content.decode("utf-8"))
+                cids = cid_re.search(response.content.decode())
                 if cids is not None and len(cids.groups()):
                 if cids is not None and len(cids.groups()):
                     return cids.groups()[0]
                     return cids.groups()[0]
     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

+ 4 - 4
searx/engines/spotify.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from json import loads
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 import requests
 import requests
 import base64
 import base64
 
 
@@ -39,8 +39,8 @@ def request(query, params):
         'https://accounts.spotify.com/api/token',
         'https://accounts.spotify.com/api/token',
         data={'grant_type': 'client_credentials'},
         data={'grant_type': 'client_credentials'},
         headers={'Authorization': 'Basic ' + base64.b64encode(
         headers={'Authorization': 'Basic ' + base64.b64encode(
-            "{}:{}".format(api_client_id, api_client_secret).encode('utf-8')
-        ).decode('utf-8')}
+            "{}:{}".format(api_client_id, api_client_secret).encode()
+        ).decode()}
     )
     )
     j = loads(r.text)
     j = loads(r.text)
     params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
     params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
@@ -59,7 +59,7 @@ def response(resp):
         if result['type'] == 'track':
         if result['type'] == 'track':
             title = result['name']
             title = result['name']
             url = result['external_urls']['spotify']
             url = result['external_urls']['spotify']
-            content = u'{} - {} - {}'.format(
+            content = '{} - {} - {}'.format(
                 result['artists'][0]['name'],
                 result['artists'][0]['name'],
                 result['album']['name'],
                 result['album']['name'],
                 result['name'])
                 result['name'])

+ 1 - 1
searx/engines/stackoverflow.py

@@ -10,9 +10,9 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 
 
+from urllib.parse import urlencode, urljoin
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 1 - 1
searx/engines/tokyotoshokan.py

@@ -11,10 +11,10 @@
 """
 """
 
 
 import re
 import re
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from datetime import datetime
 from datetime import datetime
-from searx.url_utils import urlencode
 from searx.utils import get_torrent_size, int_or_zero
 from searx.utils import get_torrent_size, int_or_zero
 
 
 # engine dependent config
 # engine dependent config

+ 1 - 1
searx/engines/torrentz.py

@@ -12,10 +12,10 @@
 """
 """
 
 
 import re
 import re
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from datetime import datetime
 from datetime import datetime
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
 
 
 # engine dependent config
 # engine dependent config

+ 4 - 4
searx/engines/translated.py

@@ -12,8 +12,8 @@ import re
 from searx.utils import is_valid_lang
 from searx.utils import is_valid_lang
 
 
 categories = ['general']
 categories = ['general']
-url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
-web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
+url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
+web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
 weight = 100
 weight = 100
 
 
 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
@@ -39,9 +39,9 @@ def request(query, params):
         key_form = ''
         key_form = ''
     params['url'] = url.format(from_lang=from_lang[1],
     params['url'] = url.format(from_lang=from_lang[1],
                                to_lang=to_lang[1],
                                to_lang=to_lang[1],
-                               query=query.decode('utf-8'),
+                               query=query.decode(),
                                key=key_form)
                                key=key_form)
-    params['query'] = query.decode('utf-8')
+    params['query'] = query.decode()
     params['from_lang'] = from_lang
     params['from_lang'] = from_lang
     params['to_lang'] = to_lang
     params['to_lang'] = to_lang
 
 

+ 1 - 1
searx/engines/twitter.py

@@ -12,10 +12,10 @@
  @todo        publishedDate
  @todo        publishedDate
 """
 """
 
 
+from urllib.parse import urlencode, urljoin
 from lxml import html
 from lxml import html
 from datetime import datetime
 from datetime import datetime
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
 
 
 # engine dependent config
 # engine dependent config
 categories = ['social media']
 categories = ['social media']

+ 1 - 1
searx/engines/unsplash.py

@@ -10,7 +10,7 @@
  @parse       url, title, img_src, thumbnail_src
  @parse       url, title, img_src, thumbnail_src
 """
 """
 
 
-from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl
+from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
 from json import loads
 from json import loads
 
 
 url = 'https://unsplash.com/'
 url = 'https://unsplash.com/'

+ 1 - 1
searx/engines/vimeo.py

@@ -12,9 +12,9 @@
 # @todo        rewrite to api
 # @todo        rewrite to api
 # @todo        set content-parameter with correct data
 # @todo        set content-parameter with correct data
 
 
+from urllib.parse import urlencode
 from json import loads
 from json import loads
 from dateutil import parser
 from dateutil import parser
-from searx.url_utils import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']

+ 7 - 7
searx/engines/wikidata.py

@@ -15,9 +15,9 @@ from searx import logger
 from searx.poolrequests import get
 from searx.poolrequests import get
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
 from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
-from searx.url_utils import urlencode
 from searx.utils import match_language, eval_xpath
 from searx.utils import match_language, eval_xpath
 
 
+from urllib.parse import urlencode
 from json import loads
 from json import loads
 from lxml.html import fromstring
 from lxml.html import fromstring
 from lxml import etree
 from lxml import etree
@@ -76,7 +76,7 @@ def request(query, params):
 def response(resp):
 def response(resp):
     results = []
     results = []
     htmlparser = etree.HTMLParser()
     htmlparser = etree.HTMLParser()
-    html = fromstring(resp.content.decode("utf-8"), parser=htmlparser)
+    html = fromstring(resp.content.decode(), parser=htmlparser)
     search_results = eval_xpath(html, wikidata_ids_xpath)
     search_results = eval_xpath(html, wikidata_ids_xpath)
 
 
     if resp.search_params['language'].split('-')[0] == 'all':
     if resp.search_params['language'].split('-')[0] == 'all':
@@ -89,7 +89,7 @@ def response(resp):
         wikidata_id = search_result.split('/')[-1]
         wikidata_id = search_result.split('/')[-1]
         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
         htmlresponse = get(url)
         htmlresponse = get(url)
-        jsonresponse = loads(htmlresponse.content.decode("utf-8"))
+        jsonresponse = loads(htmlresponse.content.decode())
         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
 
 
     return results
     return results
@@ -453,16 +453,16 @@ def get_geolink(result):
     latitude, longitude = coordinates.split(',')
     latitude, longitude = coordinates.split(',')
 
 
     # convert to decimal
     # convert to decimal
-    lat = int(latitude[:latitude.find(u'°')])
+    lat = int(latitude[:latitude.find('°')])
     if latitude.find('\'') >= 0:
     if latitude.find('\'') >= 0:
-        lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0
+        lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0
     if latitude.find('"') >= 0:
     if latitude.find('"') >= 0:
         lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
         lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
     if latitude.find('S') >= 0:
     if latitude.find('S') >= 0:
         lat *= -1
         lat *= -1
-    lon = int(longitude[:longitude.find(u'°')])
+    lon = int(longitude[:longitude.find('°')])
     if longitude.find('\'') >= 0:
     if longitude.find('\'') >= 0:
-        lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0
+        lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0
     if longitude.find('"') >= 0:
     if longitude.find('"') >= 0:
         lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
         lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
     if longitude.find('W') >= 0:
     if longitude.find('W') >= 0:

+ 2 - 2
searx/engines/wikipedia.py

@@ -10,13 +10,13 @@
  @parse       url, infobox
  @parse       url, infobox
 """
 """
 
 
+from urllib.parse import quote
 from json import loads
 from json import loads
 from lxml.html import fromstring
 from lxml.html import fromstring
-from searx.url_utils import quote
 from searx.utils import match_language, searx_useragent
 from searx.utils import match_language, searx_useragent
 
 
 # search-url
 # search-url
-search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
+search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
 supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
 supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
 
 
 
 

+ 10 - 10
searx/engines/wolframalpha_api.py

@@ -9,7 +9,7 @@
 # @parse       url, infobox
 # @parse       url, infobox
 
 
 from lxml import etree
 from lxml import etree
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 # search-url
 # search-url
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@@ -45,15 +45,15 @@ def request(query, params):
 
 
 # replace private user area characters to make text legible
 # replace private user area characters to make text legible
 def replace_pua_chars(text):
 def replace_pua_chars(text):
-    pua_chars = {u'\uf522': u'\u2192',  # rigth arrow
-                 u'\uf7b1': u'\u2115',  # set of natural numbers
-                 u'\uf7b4': u'\u211a',  # set of rational numbers
-                 u'\uf7b5': u'\u211d',  # set of real numbers
-                 u'\uf7bd': u'\u2124',  # set of integer numbers
-                 u'\uf74c': 'd',        # differential
-                 u'\uf74d': u'\u212f',  # euler's number
-                 u'\uf74e': 'i',        # imaginary number
-                 u'\uf7d9': '='}        # equals sign
+    pua_chars = {'\uf522': '\u2192',  # rigth arrow
+                 '\uf7b1': '\u2115',  # set of natural numbers
+                 '\uf7b4': '\u211a',  # set of rational numbers
+                 '\uf7b5': '\u211d',  # set of real numbers
+                 '\uf7bd': '\u2124',  # set of integer numbers
+                 '\uf74c': 'd',       # differential
+                 '\uf74d': '\u212f',  # euler's number
+                 '\uf74e': 'i',       # imaginary number
+                 '\uf7d9': '='}       # equals sign
 
 
     for k, v in pua_chars.items():
     for k, v in pua_chars.items():
         text = text.replace(k, v)
         text = text.replace(k, v)

+ 1 - 1
searx/engines/wolframalpha_noapi.py

@@ -10,9 +10,9 @@
 
 
 from json import loads
 from json import loads
 from time import time
 from time import time
+from urllib.parse import urlencode
 
 
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
-from searx.url_utils import urlencode
 
 
 # search-url
 # search-url
 url = 'https://www.wolframalpha.com/'
 url = 'https://www.wolframalpha.com/'

+ 1 - 1
searx/engines/www1x.py

@@ -11,7 +11,7 @@
 """
 """
 
 
 from lxml import html
 from lxml import html
-from searx.url_utils import urlencode, urljoin
+from urllib.parse import urlencode, urljoin
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 
 
 # engine dependent config
 # engine dependent config

+ 3 - 3
searx/engines/xpath.py

@@ -1,7 +1,7 @@
+from urllib.parse import unquote, urlencode, urljoin, urlparse
 from lxml import html
 from lxml import html
 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
 from searx.utils import html_to_text, eval_xpath
 from searx.utils import html_to_text, eval_xpath
-from searx.url_utils import unquote, urlencode, urljoin, urlparse
 
 
 search_url = None
 search_url = None
 url_xpath = None
 url_xpath = None
@@ -56,7 +56,7 @@ def extract_url(xpath_results, search_url):
     if url.startswith('//'):
     if url.startswith('//'):
         # add http or https to this kind of url //example.com/
         # add http or https to this kind of url //example.com/
         parsed_search_url = urlparse(search_url)
         parsed_search_url = urlparse(search_url)
-        url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
+        url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
     elif url.startswith('/'):
     elif url.startswith('/'):
         # fix relative url to the search engine
         # fix relative url to the search engine
         url = urljoin(search_url, url)
         url = urljoin(search_url, url)
@@ -86,7 +86,7 @@ def normalize_url(url):
         p = parsed_url.path
         p = parsed_url.path
         mark = p.find('/**')
         mark = p.find('/**')
         if mark != -1:
         if mark != -1:
-            return unquote(p[mark + 3:]).decode('utf-8')
+            return unquote(p[mark + 3:]).decode()
 
 
     return url
     return url
 
 

+ 1 - 1
searx/engines/yacy.py

@@ -14,7 +14,7 @@
 
 
 from json import loads
 from json import loads
 from dateutil import parser
 from dateutil import parser
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 

+ 1 - 1
searx/engines/yahoo.py

@@ -11,9 +11,9 @@
  @parse       url, title, content, suggestion
  @parse       url, title, content, suggestion
 """
 """
 
 
+from urllib.parse import unquote, urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.xpath import extract_text, extract_url
-from searx.url_utils import unquote, urlencode
 from searx.utils import match_language, eval_xpath
 from searx.utils import match_language, eval_xpath
 
 
 # engine dependent config
 # engine dependent config

+ 2 - 2
searx/engines/yahoo_news.py

@@ -11,13 +11,13 @@
 
 
 import re
 import re
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.yahoo import (
 from searx.engines.yahoo import (
     parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
     parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
 )
 )
 from dateutil import parser
 from dateutil import parser
-from searx.url_utils import urlencode
 from searx.utils import match_language
 from searx.utils import match_language
 
 
 # engine dependent config
 # engine dependent config
@@ -58,7 +58,7 @@ def request(query, params):
 
 
 def sanitize_url(url):
 def sanitize_url(url):
     if ".yahoo.com/" in url:
     if ".yahoo.com/" in url:
-        return re.sub(u"\\;\\_ylt\\=.+$", "", url)
+        return re.sub("\\;\\_ylt\\=.+$", "", url)
     else:
     else:
         return url
         return url
 
 

+ 1 - 1
searx/engines/yandex.py

@@ -9,9 +9,9 @@
  @parse       url, title, content
  @parse       url, title, content
 """
 """
 
 
+from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx import logger
 from searx import logger
-from searx.url_utils import urlencode
 
 
 logger = logger.getChild('yandex engine')
 logger = logger.getChild('yandex engine')
 
 

+ 1 - 1
searx/engines/yggtorrent.py

@@ -11,8 +11,8 @@
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
 from datetime import datetime
 from datetime import datetime
+from urllib.parse import quote
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
-from searx.url_utils import quote
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
 
 

+ 1 - 1
searx/engines/youtube_api.py

@@ -10,7 +10,7 @@
 
 
 from json import loads
 from json import loads
 from dateutil import parser
 from dateutil import parser
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music']
 categories = ['videos', 'music']

+ 1 - 1
searx/engines/youtube_noapi.py

@@ -10,9 +10,9 @@
 
 
 from functools import reduce
 from functools import reduce
 from json import loads
 from json import loads
+from urllib.parse import quote_plus
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from searx.utils import list_get
 from searx.utils import list_get
-from searx.url_utils import quote_plus
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music']
 categories = ['videos', 'music']

+ 1 - 1
searx/external_bang.py

@@ -23,7 +23,7 @@ def get_bang_url(search_query):
     """
     """
 
 
     if search_query.external_bang:
     if search_query.external_bang:
-        query = search_query.query.decode('utf-8', 'ignore')
+        query = search_query.query.decode(errors='ignore')
         bang = _get_bang(search_query.external_bang)
         bang = _get_bang(search_query.external_bang)
 
 
         if bang and query:
         if bang and query:

+ 69 - 69
searx/languages.py

@@ -3,73 +3,73 @@
 # this file is generated automatically by utils/update_search_languages.py
 # this file is generated automatically by utils/update_search_languages.py
 
 
 language_codes = (
 language_codes = (
-    (u"af-NA", u"Afrikaans", u"", u"Afrikaans"),
-    (u"ar-SA", u"العربية", u"", u"Arabic"),
-    (u"be-BY", u"Беларуская", u"", u"Belarusian"),
-    (u"bg-BG", u"Български", u"", u"Bulgarian"),
-    (u"ca-AD", u"Català", u"", u"Catalan"),
-    (u"cs-CZ", u"Čeština", u"", u"Czech"),
-    (u"da-DK", u"Dansk", u"", u"Danish"),
-    (u"de", u"Deutsch", u"", u"German"),
-    (u"de-AT", u"Deutsch", u"Österreich", u"German"),
-    (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
-    (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
-    (u"el-GR", u"Ελληνικά", u"", u"Greek"),
-    (u"en", u"English", u"", u"English"),
-    (u"en-AU", u"English", u"Australia", u"English"),
-    (u"en-CA", u"English", u"Canada", u"English"),
-    (u"en-GB", u"English", u"United Kingdom", u"English"),
-    (u"en-IE", u"English", u"Ireland", u"English"),
-    (u"en-IN", u"English", u"India", u"English"),
-    (u"en-NZ", u"English", u"New Zealand", u"English"),
-    (u"en-PH", u"English", u"Philippines", u"English"),
-    (u"en-SG", u"English", u"Singapore", u"English"),
-    (u"en-US", u"English", u"United States", u"English"),
-    (u"es", u"Español", u"", u"Spanish"),
-    (u"es-AR", u"Español", u"Argentina", u"Spanish"),
-    (u"es-CL", u"Español", u"Chile", u"Spanish"),
-    (u"es-ES", u"Español", u"España", u"Spanish"),
-    (u"es-MX", u"Español", u"México", u"Spanish"),
-    (u"et-EE", u"Eesti", u"", u"Estonian"),
-    (u"fa-IR", u"فارسی", u"", u"Persian"),
-    (u"fi-FI", u"Suomi", u"", u"Finnish"),
-    (u"fr", u"Français", u"", u"French"),
-    (u"fr-BE", u"Français", u"Belgique", u"French"),
-    (u"fr-CA", u"Français", u"Canada", u"French"),
-    (u"fr-CH", u"Français", u"Suisse", u"French"),
-    (u"fr-FR", u"Français", u"France", u"French"),
-    (u"he-IL", u"עברית", u"", u"Hebrew"),
-    (u"hr-HR", u"Hrvatski", u"", u"Croatian"),
-    (u"hu-HU", u"Magyar", u"", u"Hungarian"),
-    (u"hy-AM", u"Հայերեն", u"", u"Armenian"),
-    (u"id-ID", u"Indonesia", u"", u"Indonesian"),
-    (u"is-IS", u"Íslenska", u"", u"Icelandic"),
-    (u"it-IT", u"Italiano", u"", u"Italian"),
-    (u"ja-JP", u"日本語", u"", u"Japanese"),
-    (u"ko-KR", u"한국어", u"", u"Korean"),
-    (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
-    (u"lv-LV", u"Latviešu", u"", u"Latvian"),
-    (u"ms-MY", u"Melayu", u"", u"Malay"),
-    (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"),
-    (u"nl", u"Nederlands", u"", u"Dutch"),
-    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
-    (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
-    (u"pl-PL", u"Polski", u"", u"Polish"),
-    (u"pt", u"Português", u"", u"Portuguese"),
-    (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
-    (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
-    (u"ro-RO", u"Română", u"", u"Romanian"),
-    (u"ru-RU", u"Русский", u"", u"Russian"),
-    (u"sk-SK", u"Slovenčina", u"", u"Slovak"),
-    (u"sl-SI", u"Slovenščina", u"", u"Slovenian"),
-    (u"sr-RS", u"Srpski", u"", u"Serbian"),
-    (u"sv-SE", u"Svenska", u"", u"Swedish"),
-    (u"sw-KE", u"Kiswahili", u"", u"Swahili"),
-    (u"th-TH", u"ไทย", u"", u"Thai"),
-    (u"tr-TR", u"Türkçe", u"", u"Turkish"),
-    (u"uk-UA", u"Українська", u"", u"Ukrainian"),
-    (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
-    (u"zh", u"中文", u"", u"Chinese"),
-    (u"zh-CN", u"中文", u"中国", u"Chinese"),
-    (u"zh-TW", u"中文", u"台灣", u"Chinese")
+    ("af-NA", "Afrikaans", "", "Afrikaans"),
+    ("ar-SA", "العربية", "", "Arabic"),
+    ("be-BY", "Беларуская", "", "Belarusian"),
+    ("bg-BG", "Български", "", "Bulgarian"),
+    ("ca-AD", "Català", "", "Catalan"),
+    ("cs-CZ", "Čeština", "", "Czech"),
+    ("da-DK", "Dansk", "", "Danish"),
+    ("de", "Deutsch", "", "German"),
+    ("de-AT", "Deutsch", "Österreich", "German"),
+    ("de-CH", "Deutsch", "Schweiz", "German"),
+    ("de-DE", "Deutsch", "Deutschland", "German"),
+    ("el-GR", "Ελληνικά", "", "Greek"),
+    ("en", "English", "", "English"),
+    ("en-AU", "English", "Australia", "English"),
+    ("en-CA", "English", "Canada", "English"),
+    ("en-GB", "English", "United Kingdom", "English"),
+    ("en-IE", "English", "Ireland", "English"),
+    ("en-IN", "English", "India", "English"),
+    ("en-NZ", "English", "New Zealand", "English"),
+    ("en-PH", "English", "Philippines", "English"),
+    ("en-SG", "English", "Singapore", "English"),
+    ("en-US", "English", "United States", "English"),
+    ("es", "Español", "", "Spanish"),
+    ("es-AR", "Español", "Argentina", "Spanish"),
+    ("es-CL", "Español", "Chile", "Spanish"),
+    ("es-ES", "Español", "España", "Spanish"),
+    ("es-MX", "Español", "México", "Spanish"),
+    ("et-EE", "Eesti", "", "Estonian"),
+    ("fa-IR", "فارسی", "", "Persian"),
+    ("fi-FI", "Suomi", "", "Finnish"),
+    ("fr", "Français", "", "French"),
+    ("fr-BE", "Français", "Belgique", "French"),
+    ("fr-CA", "Français", "Canada", "French"),
+    ("fr-CH", "Français", "Suisse", "French"),
+    ("fr-FR", "Français", "France", "French"),
+    ("he-IL", "עברית", "", "Hebrew"),
+    ("hr-HR", "Hrvatski", "", "Croatian"),
+    ("hu-HU", "Magyar", "", "Hungarian"),
+    ("hy-AM", "Հայերեն", "", "Armenian"),
+    ("id-ID", "Indonesia", "", "Indonesian"),
+    ("is-IS", "Íslenska", "", "Icelandic"),
+    ("it-IT", "Italiano", "", "Italian"),
+    ("ja-JP", "日本語", "", "Japanese"),
+    ("ko-KR", "한국어", "", "Korean"),
+    ("lt-LT", "Lietuvių", "", "Lithuanian"),
+    ("lv-LV", "Latviešu", "", "Latvian"),
+    ("ms-MY", "Melayu", "", "Malay"),
+    ("nb-NO", "Norsk Bokmål", "", "Norwegian Bokmål"),
+    ("nl", "Nederlands", "", "Dutch"),
+    ("nl-BE", "Nederlands", "België", "Dutch"),
+    ("nl-NL", "Nederlands", "Nederland", "Dutch"),
+    ("pl-PL", "Polski", "", "Polish"),
+    ("pt", "Português", "", "Portuguese"),
+    ("pt-BR", "Português", "Brasil", "Portuguese"),
+    ("pt-PT", "Português", "Portugal", "Portuguese"),
+    ("ro-RO", "Română", "", "Romanian"),
+    ("ru-RU", "Русский", "", "Russian"),
+    ("sk-SK", "Slovenčina", "", "Slovak"),
+    ("sl-SI", "Slovenščina", "", "Slovenian"),
+    ("sr-RS", "Srpski", "", "Serbian"),
+    ("sv-SE", "Svenska", "", "Swedish"),
+    ("sw-KE", "Kiswahili", "", "Swahili"),
+    ("th-TH", "ไทย", "", "Thai"),
+    ("tr-TR", "Türkçe", "", "Turkish"),
+    ("uk-UA", "Українська", "", "Ukrainian"),
+    ("vi-VN", "Tiếng Việt", "", "Vietnamese"),
+    ("zh", "中文", "", "Chinese"),
+    ("zh-CN", "中文", "中国", "Chinese"),
+    ("zh-TW", "中文", "台灣", "Chinese")
 )
 )

+ 2 - 5
searx/plugins/__init__.py

@@ -20,13 +20,10 @@ from importlib import import_module
 from os import listdir, makedirs, remove, stat, utime
 from os import listdir, makedirs, remove, stat, utime
 from os.path import abspath, basename, dirname, exists, join
 from os.path import abspath, basename, dirname, exists, join
 from shutil import copyfile
 from shutil import copyfile
-from sys import version_info
 from traceback import print_exc
 from traceback import print_exc
 
 
 from searx import logger, settings, static_path
 from searx import logger, settings, static_path
 
 
-if version_info[0] == 3:
-    unicode = str
 
 
 logger = logger.getChild('plugins')
 logger = logger.getChild('plugins')
 
 
@@ -38,8 +35,8 @@ from searx.plugins import (oa_doi_rewrite,
                            tracker_url_remover,
                            tracker_url_remover,
                            vim_hotkeys)
                            vim_hotkeys)
 
 
-required_attrs = (('name', (str, unicode)),
-                  ('description', (str, unicode)),
+required_attrs = (('name', str),
+                  ('description', str),
                   ('default_on', bool))
                   ('default_on', bool))
 
 
 optional_attrs = (('js_dependencies', tuple),
 optional_attrs = (('js_dependencies', tuple),

+ 1 - 4
searx/plugins/https_rewrite.py

@@ -16,17 +16,14 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 '''
 '''
 
 
 import re
 import re
-import sys
+from urllib.parse import urlparse
 from lxml import etree
 from lxml import etree
 from os import listdir, environ
 from os import listdir, environ
 from os.path import isfile, isdir, join
 from os.path import isfile, isdir, join
 from searx.plugins import logger
 from searx.plugins import logger
 from flask_babel import gettext
 from flask_babel import gettext
 from searx import searx_dir
 from searx import searx_dir
-from searx.url_utils import urlparse
 
 
-if sys.version_info[0] == 3:
-    unicode = str
 
 
 name = "HTTPS rewrite"
 name = "HTTPS rewrite"
 description = gettext('Rewrite HTTP links to HTTPS if possible')
 description = gettext('Rewrite HTTP links to HTTPS if possible')

+ 1 - 1
searx/plugins/oa_doi_rewrite.py

@@ -1,6 +1,6 @@
+from urllib.parse import urlparse, parse_qsl
 from flask_babel import gettext
 from flask_babel import gettext
 import re
 import re
-from searx.url_utils import urlparse, parse_qsl
 from searx import settings
 from searx import settings
 
 
 
 

+ 1 - 1
searx/plugins/tracker_url_remover.py

@@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 
 
 from flask_babel import gettext
 from flask_babel import gettext
 import re
 import re
-from searx.url_utils import urlunparse, parse_qsl, urlencode
+from urllib.parse import urlunparse, parse_qsl, urlencode
 
 
 regexes = {re.compile(r'utm_[^&]+'),
 regexes = {re.compile(r'utm_[^&]+'),
            re.compile(r'(wkey|wemail)[^&]*'),
            re.compile(r'(wkey|wemail)[^&]*'),

+ 4 - 9
searx/preferences.py

@@ -6,16 +6,11 @@
 
 
 from base64 import urlsafe_b64encode, urlsafe_b64decode
 from base64 import urlsafe_b64encode, urlsafe_b64decode
 from zlib import compress, decompress
 from zlib import compress, decompress
-from sys import version
+from urllib.parse import parse_qs, urlencode
 
 
 from searx import settings, autocomplete
 from searx import settings, autocomplete
 from searx.languages import language_codes as languages
 from searx.languages import language_codes as languages
 from searx.utils import match_language
 from searx.utils import match_language
-from searx.url_utils import parse_qs, urlencode
-
-if version[0] == '3':
-    # pylint: disable=invalid-name
-    unicode = str
 
 
 
 
 COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5  # 5 years
 COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5  # 5 years
@@ -402,14 +397,14 @@ class Preferences(object):
 
 
         settings_kv['tokens'] = ','.join(self.tokens.values)
         settings_kv['tokens'] = ','.join(self.tokens.values)
 
 
-        return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8')
+        return urlsafe_b64encode(compress(urlencode(settings_kv).encode())).decode()
 
 
     def parse_encoded_data(self, input_data):
     def parse_encoded_data(self, input_data):
         """parse (base64) preferences from request (``flask.request.form['preferences']``)"""
         """parse (base64) preferences from request (``flask.request.form['preferences']``)"""
-        decoded_data = decompress(urlsafe_b64decode(input_data.encode('utf-8')))
+        decoded_data = decompress(urlsafe_b64decode(input_data.encode()))
         dict_data = {}
         dict_data = {}
         for x, y in parse_qs(decoded_data).items():
         for x, y in parse_qs(decoded_data).items():
-            dict_data[x.decode('utf8')] = y[0].decode('utf8')
+            dict_data[x.decode()] = y[0].decode()
         self.parse_dict(dict_data)
         self.parse_dict(dict_data)
 
 
     def parse_dict(self, input_data):
     def parse_dict(self, input_data):

+ 5 - 7
searx/query.py

@@ -17,15 +17,13 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
 (C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
 '''
 '''
 
 
+import re
+
 from searx.languages import language_codes
 from searx.languages import language_codes
 from searx.engines import (
 from searx.engines import (
     categories, engines, engine_shortcuts
     categories, engines, engine_shortcuts
 )
 )
-import re
-import sys
 
 
-if sys.version_info[0] == 3:
-    unicode = str
 
 
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 
 
@@ -93,7 +91,7 @@ class RawTextQuery(object):
                 # check if any language-code is equal with
                 # check if any language-code is equal with
                 # declared language-codes
                 # declared language-codes
                 for lc in language_codes:
                 for lc in language_codes:
-                    lang_id, lang_name, country, english_name = map(unicode.lower, lc)
+                    lang_id, lang_name, country, english_name = map(str.lower, lc)
 
 
                     # if correct language-code is found
                     # if correct language-code is found
                     # set it as new search-language
                     # set it as new search-language
@@ -177,7 +175,7 @@ class RawTextQuery(object):
 
 
     def getFullQuery(self):
     def getFullQuery(self):
         # get full querry including whitespaces
         # get full querry including whitespaces
-        return u''.join(self.query_parts)
+        return ''.join(self.query_parts)
 
 
 
 
 class SearchQuery(object):
 class SearchQuery(object):
@@ -185,7 +183,7 @@ class SearchQuery(object):
 
 
     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
                  timeout_limit=None, preferences=None, external_bang=None):
                  timeout_limit=None, preferences=None, external_bang=None):
-        self.query = query.encode('utf-8')
+        self.query = query.encode()
         self.engines = engines
         self.engines = engines
         self.categories = categories
         self.categories = categories
         self.lang = lang
         self.lang = lang

+ 5 - 8
searx/results.py

@@ -1,14 +1,11 @@
 import re
 import re
-import sys
 from collections import defaultdict
 from collections import defaultdict
 from operator import itemgetter
 from operator import itemgetter
 from threading import RLock
 from threading import RLock
+from urllib.parse import urlparse, unquote
 from searx import logger
 from searx import logger
 from searx.engines import engines
 from searx.engines import engines
-from searx.url_utils import urlparse, unquote
 
 
-if sys.version_info[0] == 3:
-    basestring = str
 
 
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@@ -16,7 +13,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 
 
 # return the meaningful length of the content for a result
 # return the meaningful length of the content for a result
 def result_content_len(content):
 def result_content_len(content):
-    if isinstance(content, basestring):
+    if isinstance(content, str):
         return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
         return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
     else:
     else:
         return 0
         return 0
@@ -161,11 +158,11 @@ class ResultContainer(object):
                 self._number_of_results.append(result['number_of_results'])
                 self._number_of_results.append(result['number_of_results'])
             else:
             else:
                 # standard result (url, title, content)
                 # standard result (url, title, content)
-                if 'url' in result and not isinstance(result['url'], basestring):
+                if 'url' in result and not isinstance(result['url'], str):
                     logger.debug('result: invalid URL: %s', str(result))
                     logger.debug('result: invalid URL: %s', str(result))
-                elif 'title' in result and not isinstance(result['title'], basestring):
+                elif 'title' in result and not isinstance(result['title'], str):
                     logger.debug('result: invalid title: %s', str(result))
                     logger.debug('result: invalid title: %s', str(result))
-                elif 'content' in result and not isinstance(result['content'], basestring):
+                elif 'content' in result and not isinstance(result['content'], str):
                     logger.debug('result: invalid content: %s', str(result))
                     logger.debug('result: invalid content: %s', str(result))
                 else:
                 else:
                     self._merge_result(result, standard_result_count + 1)
                     self._merge_result(result, standard_result_count + 1)

+ 4 - 11
searx/search.py

@@ -20,8 +20,8 @@ import sys
 import threading
 import threading
 from time import time
 from time import time
 from uuid import uuid4
 from uuid import uuid4
+from _thread import start_new_thread
 
 
-import six
 from flask_babel import gettext
 from flask_babel import gettext
 import requests.exceptions
 import requests.exceptions
 import searx.poolrequests as requests_lib
 import searx.poolrequests as requests_lib
@@ -37,13 +37,6 @@ from searx import logger
 from searx.plugins import plugins
 from searx.plugins import plugins
 from searx.exceptions import SearxParameterException
 from searx.exceptions import SearxParameterException
 
 
-try:
-    from thread import start_new_thread
-except:
-    from _thread import start_new_thread
-
-if sys.version_info[0] == 3:
-    unicode = str
 
 
 logger = logger.getChild('search')
 logger = logger.getChild('search')
 
 
@@ -355,11 +348,11 @@ def get_search_query_from_webapp(preferences, form):
         load_default_categories = True
         load_default_categories = True
         for pd_name, pd in form.items():
         for pd_name, pd in form.items():
             if pd_name == 'categories':
             if pd_name == 'categories':
-                query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories)
+                query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories)
             elif pd_name == 'engines':
             elif pd_name == 'engines':
                 pd_engines = [{'category': engines[engine].categories[0],
                 pd_engines = [{'category': engines[engine].categories[0],
                                'name': engine}
                                'name': engine}
-                              for engine in map(unicode.strip, pd.split(',')) if engine in engines]
+                              for engine in map(str.strip, pd.split(',')) if engine in engines]
                 if pd_engines:
                 if pd_engines:
                     query_engines.extend(pd_engines)
                     query_engines.extend(pd_engines)
                     load_default_categories = False
                     load_default_categories = False
@@ -434,7 +427,7 @@ class Search(object):
 
 
             # This means there was a valid bang and the
             # This means there was a valid bang and the
             # rest of the search does not need to be continued
             # rest of the search does not need to be continued
-            if isinstance(self.result_container.redirect_url, six.string_types):
+            if isinstance(self.result_container.redirect_url, str):
                 return self.result_container
                 return self.result_container
         # start time
         # start time
         start_time = time()
         start_time = time()

+ 2 - 2
searx/testing.py

@@ -17,7 +17,7 @@ from unittest2 import TestCase
 class SearxTestLayer:
 class SearxTestLayer:
     """Base layer for non-robot tests."""
     """Base layer for non-robot tests."""
 
 
-    __name__ = u'SearxTestLayer'
+    __name__ = 'SearxTestLayer'
 
 
     @classmethod
     @classmethod
     def setUp(cls):
     def setUp(cls):
@@ -66,7 +66,7 @@ class SearxRobotLayer():
             stderr=subprocess.STDOUT
             stderr=subprocess.STDOUT
         )
         )
         if hasattr(self.server.stdout, 'read1'):
         if hasattr(self.server.stdout, 'read1'):
-            print(self.server.stdout.read1(1024).decode('utf-8'))
+            print(self.server.stdout.read1(1024).decode())
 
 
     def tearDown(self):
     def tearDown(self):
         os.kill(self.server.pid, 9)
         os.kill(self.server.pid, 9)

+ 0 - 30
searx/url_utils.py

@@ -1,30 +0,0 @@
-from sys import version_info
-
-if version_info[0] == 2:
-    from urllib import quote, quote_plus, unquote, urlencode
-    from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult
-else:
-    from urllib.parse import (
-        parse_qs,
-        parse_qsl,
-        quote,
-        quote_plus,
-        unquote,
-        urlencode,
-        urljoin,
-        urlparse,
-        urlunparse,
-        ParseResult
-    )
-
-
-__export__ = (parse_qs,
-              parse_qsl,
-              quote,
-              quote_plus,
-              unquote,
-              urlencode,
-              urljoin,
-              urlparse,
-              urlunparse,
-              ParseResult)

+ 25 - 52
searx/utils.py

@@ -1,21 +1,22 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
+import os
+import sys
 import csv
 import csv
 import hashlib
 import hashlib
 import hmac
 import hmac
-import os
 import re
 import re
+import json
 
 
-from babel.core import get_global
-from babel.dates import format_date
 from codecs import getincrementalencoder
 from codecs import getincrementalencoder
 from imp import load_source
 from imp import load_source
 from numbers import Number
 from numbers import Number
 from os.path import splitext, join
 from os.path import splitext, join
-from io import open
+from io import open, StringIO
 from random import choice
 from random import choice
+from html.parser import HTMLParser
 from lxml.etree import XPath
 from lxml.etree import XPath
-import sys
-import json
+from babel.core import get_global
+from babel.dates import format_date
 
 
 from searx import settings
 from searx import settings
 from searx.version import VERSION_STRING
 from searx.version import VERSION_STRING
@@ -23,23 +24,6 @@ from searx.languages import language_codes
 from searx import settings
 from searx import settings
 from searx import logger
 from searx import logger
 
 
-try:
-    from cStringIO import StringIO
-except:
-    from io import StringIO
-
-try:
-    from HTMLParser import HTMLParser
-except:
-    from html.parser import HTMLParser
-
-if sys.version_info[0] == 3:
-    unichr = chr
-    unicode = str
-    IS_PY2 = False
-    basestring = str
-else:
-    IS_PY2 = True
 
 
 logger = logger.getChild('utils')
 logger = logger.getChild('utils')
 
 
@@ -75,19 +59,19 @@ def highlight_content(content, query):
     if content.find('<') != -1:
     if content.find('<') != -1:
         return content
         return content
 
 
-    query = query.decode('utf-8')
+    query = query.decode()
     if content.lower().find(query.lower()) > -1:
     if content.lower().find(query.lower()) > -1:
-        query_regex = u'({0})'.format(re.escape(query))
+        query_regex = '({0})'.format(re.escape(query))
         content = re.sub(query_regex, '<span class="highlight">\\1</span>',
         content = re.sub(query_regex, '<span class="highlight">\\1</span>',
                          content, flags=re.I | re.U)
                          content, flags=re.I | re.U)
     else:
     else:
         regex_parts = []
         regex_parts = []
         for chunk in query.split():
         for chunk in query.split():
             if len(chunk) == 1:
             if len(chunk) == 1:
-                regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk)))
+                regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk)))
             else:
             else:
-                regex_parts.append(u'{0}'.format(re.escape(chunk)))
-        query_regex = u'({0})'.format('|'.join(regex_parts))
+                regex_parts.append('{0}'.format(re.escape(chunk)))
+        query_regex = '({0})'.format('|'.join(regex_parts))
         content = re.sub(query_regex, '<span class="highlight">\\1</span>',
         content = re.sub(query_regex, '<span class="highlight">\\1</span>',
                          content, flags=re.I | re.U)
                          content, flags=re.I | re.U)
 
 
@@ -124,21 +108,21 @@ class HTMLTextExtractor(HTMLParser):
     def handle_charref(self, number):
     def handle_charref(self, number):
         if not self.is_valid_tag():
         if not self.is_valid_tag():
             return
             return
-        if number[0] in (u'x', u'X'):
+        if number[0] in ('x', 'X'):
             codepoint = int(number[1:], 16)
             codepoint = int(number[1:], 16)
         else:
         else:
             codepoint = int(number)
             codepoint = int(number)
-        self.result.append(unichr(codepoint))
+        self.result.append(chr(codepoint))
 
 
     def handle_entityref(self, name):
     def handle_entityref(self, name):
         if not self.is_valid_tag():
         if not self.is_valid_tag():
             return
             return
         # codepoint = htmlentitydefs.name2codepoint[name]
         # codepoint = htmlentitydefs.name2codepoint[name]
-        # self.result.append(unichr(codepoint))
+        # self.result.append(chr(codepoint))
         self.result.append(name)
         self.result.append(name)
 
 
     def get_text(self):
     def get_text(self):
-        return u''.join(self.result).strip()
+        return ''.join(self.result).strip()
 
 
 
 
 def html_to_text(html):
 def html_to_text(html):
@@ -163,22 +147,14 @@ class UnicodeWriter:
         self.encoder = getincrementalencoder(encoding)()
         self.encoder = getincrementalencoder(encoding)()
 
 
     def writerow(self, row):
     def writerow(self, row):
-        if IS_PY2:
-            row = [s.encode("utf-8") if hasattr(s, 'encode') else s for s in row]
         self.writer.writerow(row)
         self.writer.writerow(row)
         # Fetch UTF-8 output from the queue ...
         # Fetch UTF-8 output from the queue ...
         data = self.queue.getvalue()
         data = self.queue.getvalue()
-        if IS_PY2:
-            data = data.decode("utf-8")
-        else:
-            data = data.strip('\x00')
+        data = data.strip('\x00')
         # ... and reencode it into the target encoding
         # ... and reencode it into the target encoding
         data = self.encoder.encode(data)
         data = self.encoder.encode(data)
         # write to the target stream
         # write to the target stream
-        if IS_PY2:
-            self.stream.write(data)
-        else:
-            self.stream.write(data.decode("utf-8"))
+        self.stream.write(data.decode())
         # empty queue
         # empty queue
         self.queue.truncate(0)
         self.queue.truncate(0)
 
 
@@ -253,7 +229,7 @@ def dict_subset(d, properties):
 def prettify_url(url, max_length=74):
 def prettify_url(url, max_length=74):
     if len(url) > max_length:
     if len(url) > max_length:
         chunk_len = int(max_length / 2 + 1)
         chunk_len = int(max_length / 2 + 1)
-        return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
+        return '{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
     else:
     else:
         return url
         return url
 
 
@@ -310,7 +286,7 @@ def int_or_zero(num):
 
 
 def is_valid_lang(lang):
 def is_valid_lang(lang):
     is_abbr = (len(lang) == 2)
     is_abbr = (len(lang) == 2)
-    lang = lang.lower().decode('utf-8')
+    lang = lang.lower().decode()
     if is_abbr:
     if is_abbr:
         for l in language_codes:
         for l in language_codes:
             if l[0][:2] == lang:
             if l[0][:2] == lang:
@@ -407,17 +383,14 @@ def new_hmac(secret_key, url):
             secret_key_bytes = secret_key
             secret_key_bytes = secret_key
         else:
         else:
             raise err
             raise err
-    if sys.version_info[0] == 2:
-        return hmac.new(bytes(secret_key), url, hashlib.sha256).hexdigest()
-    else:
-        return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
+    return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
 
 
 
 
 def to_string(obj):
 def to_string(obj):
-    if isinstance(obj, basestring):
+    if isinstance(obj, str):
         return obj
         return obj
     if isinstance(obj, Number):
     if isinstance(obj, Number):
-        return unicode(obj)
+        return str(obj)
     if hasattr(obj, '__str__'):
     if hasattr(obj, '__str__'):
         return obj.__str__()
         return obj.__str__()
     if hasattr(obj, '__repr__'):
     if hasattr(obj, '__repr__'):
@@ -433,9 +406,9 @@ def ecma_unescape(s):
     """
     """
     # s = unicode(s)
     # s = unicode(s)
     # "%u5409" becomes "吉"
     # "%u5409" becomes "吉"
-    s = ecma_unescape4_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
+    s = ecma_unescape4_re.sub(lambda e: chr(int(e.group(1), 16)), s)
     # "%20" becomes " ", "%F3" becomes "ó"
     # "%20" becomes " ", "%F3" becomes "ó"
-    s = ecma_unescape2_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
+    s = ecma_unescape2_re.sub(lambda e: chr(int(e.group(1), 16)), s)
     return s
     return s
 
 
 
 

+ 26 - 45
searx/webapp.py

@@ -17,37 +17,35 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 '''
 '''
 
 
+import sys
+if sys.version_info[0] < 3:
+    print('\033[1;31m Python2 is no longer supported\033[0m')
+    exit(1)
+
 if __name__ == '__main__':
 if __name__ == '__main__':
-    from sys import path
     from os.path import realpath, dirname
     from os.path import realpath, dirname
-    path.append(realpath(dirname(realpath(__file__)) + '/../'))
+    sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
 
 
 import hashlib
 import hashlib
 import hmac
 import hmac
 import json
 import json
 import os
 import os
-import sys
 
 
 import requests
 import requests
 
 
 from searx import logger
 from searx import logger
 logger = logger.getChild('webapp')
 logger = logger.getChild('webapp')
 
 
-try:
-    from pygments import highlight
-    from pygments.lexers import get_lexer_by_name
-    from pygments.formatters import HtmlFormatter
-except:
-    logger.critical("cannot import dependency: pygments")
-    from sys import exit
-    exit(1)
-try:
-    from cgi import escape
-except:
-    from html import escape
-from six import next
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
 from time import time
 from time import time
+from html import escape
+from io import StringIO
+from urllib.parse import urlencode, urlparse, urljoin
+
+from pygments import highlight
+from pygments.lexers import get_lexer_by_name
+from pygments.formatters import HtmlFormatter
+
 from werkzeug.middleware.proxy_fix import ProxyFix
 from werkzeug.middleware.proxy_fix import ProxyFix
 from flask import (
 from flask import (
     Flask, request, render_template, url_for, Response, make_response,
     Flask, request, render_template, url_for, Response, make_response,
@@ -78,7 +76,6 @@ from searx.plugins import plugins
 from searx.plugins.oa_doi_rewrite import get_doi_resolver
 from searx.plugins.oa_doi_rewrite import get_doi_resolver
 from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
 from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
 from searx.answerers import answerers
 from searx.answerers import answerers
-from searx.url_utils import urlencode, urlparse, urljoin
 from searx.utils import new_hmac
 from searx.utils import new_hmac
 
 
 # check if the pyopenssl package is installed.
 # check if the pyopenssl package is installed.
@@ -89,19 +86,6 @@ except ImportError:
     logger.critical("The pyopenssl package has to be installed.\n"
     logger.critical("The pyopenssl package has to be installed.\n"
                     "Some HTTPS connections will fail")
                     "Some HTTPS connections will fail")
 
 
-try:
-    from cStringIO import StringIO
-except:
-    from io import StringIO
-
-
-if sys.version_info[0] == 3:
-    unicode = str
-    PY3 = True
-else:
-    logger.warning('\033[1;31m Python2 is no longer supported\033[0m')
-    exit(1)
-
 # serve pages with HTTP/1.1
 # serve pages with HTTP/1.1
 from werkzeug.serving import WSGIRequestHandler
 from werkzeug.serving import WSGIRequestHandler
 WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
 WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@@ -315,11 +299,11 @@ def proxify(url):
     if not settings.get('result_proxy'):
     if not settings.get('result_proxy'):
         return url
         return url
 
 
-    url_params = dict(mortyurl=url.encode('utf-8'))
+    url_params = dict(mortyurl=url.encode())
 
 
     if settings['result_proxy'].get('key'):
     if settings['result_proxy'].get('key'):
         url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'],
         url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'],
-                                           url.encode('utf-8'),
+                                           url.encode(),
                                            hashlib.sha256).hexdigest()
                                            hashlib.sha256).hexdigest()
 
 
     return '{0}?{1}'.format(settings['result_proxy']['url'],
     return '{0}?{1}'.format(settings['result_proxy']['url'],
@@ -347,10 +331,10 @@ def image_proxify(url):
     if settings.get('result_proxy'):
     if settings.get('result_proxy'):
         return proxify(url)
         return proxify(url)
 
 
-    h = new_hmac(settings['server']['secret_key'], url.encode('utf-8'))
+    h = new_hmac(settings['server']['secret_key'], url.encode())
 
 
     return '{0}?{1}'.format(url_for('image_proxy'),
     return '{0}?{1}'.format(url_for('image_proxy'),
-                            urlencode(dict(url=url.encode('utf-8'), h=h)))
+                            urlencode(dict(url=url.encode(), h=h)))
 
 
 
 
 def render(template_name, override_theme=None, **kwargs):
 def render(template_name, override_theme=None, **kwargs):
@@ -424,7 +408,7 @@ def render(template_name, override_theme=None, **kwargs):
 
 
     kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
     kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
 
 
-    kwargs['unicode'] = unicode
+    kwargs['unicode'] = str
 
 
     kwargs['preferences'] = request.preferences
     kwargs['preferences'] = request.preferences
 
 
@@ -612,7 +596,7 @@ def index():
             if 'content' in result and result['content']:
             if 'content' in result and result['content']:
                 result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
                 result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
             if 'title' in result and result['title']:
             if 'title' in result and result['title']:
-                result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
+                result['title'] = highlight_content(escape(result['title'] or ''), search_query.query)
         else:
         else:
             if result.get('content'):
             if result.get('content'):
                 result['content'] = html_to_text(result['content']).strip()
                 result['content'] = html_to_text(result['content']).strip()
@@ -634,14 +618,14 @@ def index():
                     minutes = int((timedifference.seconds / 60) % 60)
                     minutes = int((timedifference.seconds / 60) % 60)
                     hours = int(timedifference.seconds / 60 / 60)
                     hours = int(timedifference.seconds / 60 / 60)
                     if hours == 0:
                     if hours == 0:
-                        result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
+                        result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
                     else:
                     else:
-                        result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
+                        result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
                 else:
                 else:
                     result['publishedDate'] = format_date(result['publishedDate'])
                     result['publishedDate'] = format_date(result['publishedDate'])
 
 
     if output_format == 'json':
     if output_format == 'json':
-        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
+        return Response(json.dumps({'query': search_query.query.decode(),
                                     'number_of_results': number_of_results,
                                     'number_of_results': number_of_results,
                                     'results': results,
                                     'results': results,
                                     'answers': list(result_container.answers),
                                     'answers': list(result_container.answers),
@@ -670,7 +654,7 @@ def index():
             csv.writerow([row.get(key, '') for key in keys])
             csv.writerow([row.get(key, '') for key in keys])
         csv.stream.seek(0)
         csv.stream.seek(0)
         response = Response(csv.stream.read(), mimetype='application/csv')
         response = Response(csv.stream.read(), mimetype='application/csv')
-        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8'))
+        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode())
         response.headers.add('Content-Disposition', cont_disp)
         response.headers.add('Content-Disposition', cont_disp)
         return response
         return response
 
 
@@ -754,10 +738,7 @@ def autocompleter():
     disabled_engines = request.preferences.engines.get_disabled()
     disabled_engines = request.preferences.engines.get_disabled()
 
 
     # parse query
     # parse query
-    if PY3:
-        raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines)
-    else:
-        raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
+    raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines)
     raw_text_query.parse_query()
     raw_text_query.parse_query()
 
 
     # check if search query is set
     # check if search query is set
@@ -879,7 +860,7 @@ def _is_selected_language_supported(engine, preferences):
 
 
 @app.route('/image_proxy', methods=['GET'])
 @app.route('/image_proxy', methods=['GET'])
 def image_proxy():
 def image_proxy():
-    url = request.args.get('url').encode('utf-8')
+    url = request.args.get('url').encode()
 
 
     if not url:
     if not url:
         return '', 400
         return '', 400

Some files were not shown because too many files changed in this diff