Browse Source

Drop Python 2 (1/n): remove unicode string and url_utils

Dalf 4 years ago
parent
commit
1022228d95
100 changed files with 303 additions and 448 deletions
  1. 0 5
      Makefile
  2. 2 2
      manage.sh
  3. 2 6
      searx/__init__.py
  4. 2 6
      searx/answerers/__init__.py
  5. 7 13
      searx/answerers/random/answerer.py
  6. 1 4
      searx/answerers/statistics/answerer.py
  7. 7 10
      searx/autocomplete.py
  8. 2 1
      searx/engines/1337x.py
  9. 2 2
      searx/engines/acgsou.py
  10. 2 1
      searx/engines/apkmirror.py
  11. 1 1
      searx/engines/archlinux.py
  12. 2 2
      searx/engines/arxiv.py
  13. 1 1
      searx/engines/base.py
  14. 2 2
      searx/engines/bing.py
  15. 2 2
      searx/engines/bing_images.py
  16. 1 2
      searx/engines/bing_news.py
  17. 1 1
      searx/engines/bing_videos.py
  18. 1 1
      searx/engines/btdigg.py
  19. 1 4
      searx/engines/currency_convert.py
  20. 1 1
      searx/engines/dailymotion.py
  21. 2 2
      searx/engines/deezer.py
  22. 2 1
      searx/engines/deviantart.py
  23. 3 3
      searx/engines/dictzone.py
  24. 1 4
      searx/engines/digbt.py
  25. 1 1
      searx/engines/digg.py
  26. 1 1
      searx/engines/doku.py
  27. 1 1
      searx/engines/duckduckgo.py
  28. 1 1
      searx/engines/duckduckgo_definitions.py
  29. 1 1
      searx/engines/duckduckgo_images.py
  30. 1 1
      searx/engines/duden.py
  31. 1 1
      searx/engines/etools.py
  32. 1 1
      searx/engines/fdroid.py
  33. 2 5
      searx/engines/filecrop.py
  34. 1 1
      searx/engines/flickr.py
  35. 5 5
      searx/engines/flickr_noapi.py
  36. 2 5
      searx/engines/framalibre.py
  37. 1 1
      searx/engines/frinkiac.py
  38. 1 1
      searx/engines/genius.py
  39. 2 2
      searx/engines/gentoo.py
  40. 1 1
      searx/engines/gigablast.py
  41. 1 1
      searx/engines/github.py
  42. 1 1
      searx/engines/google.py
  43. 2 3
      searx/engines/google_images.py
  44. 1 1
      searx/engines/google_news.py
  45. 1 1
      searx/engines/google_videos.py
  46. 2 5
      searx/engines/ina.py
  47. 1 1
      searx/engines/invidious.py
  48. 1 6
      searx/engines/json_engine.py
  49. 1 1
      searx/engines/kickass.py
  50. 2 2
      searx/engines/mediawiki.py
  51. 1 2
      searx/engines/microsoft_academic.py
  52. 1 1
      searx/engines/mixcloud.py
  53. 1 1
      searx/engines/nyaa.py
  54. 4 4
      searx/engines/openstreetmap.py
  55. 1 1
      searx/engines/peertube.py
  56. 1 1
      searx/engines/photon.py
  57. 5 3
      searx/engines/piratebay.py
  58. 1 1
      searx/engines/pubmed.py
  59. 3 3
      searx/engines/qwant.py
  60. 1 1
      searx/engines/reddit.py
  61. 2 2
      searx/engines/scanr_structures.py
  62. 1 1
      searx/engines/searchcode_code.py
  63. 1 1
      searx/engines/searchcode_doc.py
  64. 1 1
      searx/engines/seedpeer.py
  65. 3 6
      searx/engines/soundcloud.py
  66. 4 4
      searx/engines/spotify.py
  67. 1 1
      searx/engines/stackoverflow.py
  68. 1 1
      searx/engines/tokyotoshokan.py
  69. 1 1
      searx/engines/torrentz.py
  70. 4 4
      searx/engines/translated.py
  71. 1 1
      searx/engines/twitter.py
  72. 1 1
      searx/engines/unsplash.py
  73. 1 1
      searx/engines/vimeo.py
  74. 7 7
      searx/engines/wikidata.py
  75. 2 2
      searx/engines/wikipedia.py
  76. 10 10
      searx/engines/wolframalpha_api.py
  77. 1 1
      searx/engines/wolframalpha_noapi.py
  78. 1 1
      searx/engines/www1x.py
  79. 3 3
      searx/engines/xpath.py
  80. 1 1
      searx/engines/yacy.py
  81. 1 1
      searx/engines/yahoo.py
  82. 2 2
      searx/engines/yahoo_news.py
  83. 1 1
      searx/engines/yandex.py
  84. 1 1
      searx/engines/yggtorrent.py
  85. 1 1
      searx/engines/youtube_api.py
  86. 1 1
      searx/engines/youtube_noapi.py
  87. 1 1
      searx/external_bang.py
  88. 69 69
      searx/languages.py
  89. 2 5
      searx/plugins/__init__.py
  90. 1 4
      searx/plugins/https_rewrite.py
  91. 1 1
      searx/plugins/oa_doi_rewrite.py
  92. 1 1
      searx/plugins/tracker_url_remover.py
  93. 4 9
      searx/preferences.py
  94. 5 7
      searx/query.py
  95. 5 8
      searx/results.py
  96. 4 11
      searx/search.py
  97. 2 2
      searx/testing.py
  98. 0 30
      searx/url_utils.py
  99. 25 52
      searx/utils.py
  100. 26 45
      searx/webapp.py

+ 0 - 5
Makefile

@@ -213,10 +213,6 @@ gecko.driver:
 PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot
 test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot
 
-ifeq ($(PY),2)
-test.pylint:
-	@echo "LINT      skip liniting py2"
-else
 # TODO: balance linting with pylint
 
 test.pylint: pyenvinstall
@@ -225,7 +221,6 @@ test.pylint: pyenvinstall
 		searx/testing.py \
 		searx/engines/gigablast.py \
 	)
-endif
 
 # ignored rules:
 #  E402 module level import not at top of file

+ 2 - 2
manage.sh

@@ -39,7 +39,7 @@ install_geckodriver() {
         return
     fi
     GECKODRIVER_VERSION="v0.24.0"
-    PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`"
+    PLATFORM="`python3 -c "import platform; print(platform.system().lower(), platform.architecture()[0])"`"
     case "$PLATFORM" in
         "linux 32bit" | "linux2 32bit") ARCH="linux32";;
         "linux 64bit" | "linux2 64bit") ARCH="linux64";;
@@ -136,7 +136,7 @@ docker_build() {
     # Check consistency between the git tag and the searx/version.py file
     # /!\ HACK : parse Python file with bash /!\
     # otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py )
-    # SEARX_PYTHON_VERSION=$(python -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)")
+    # SEARX_PYTHON_VERSION=$(python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)")
     SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -)
     if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then
 	echo "Inconsistency between the last git tag and the searx/version.py file"

+ 2 - 6
searx/__init__.py

@@ -21,12 +21,8 @@ from os import environ
 from os.path import realpath, dirname, join, abspath, isfile
 from io import open
 from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION
-try:
-    from yaml import safe_load
-except:
-    from sys import exit, stderr
-    stderr.write('[E] install pyyaml\n')
-    exit(2)
+from yaml import safe_load
+
 
 searx_dir = abspath(dirname(__file__))
 engine_dir = dirname(realpath(__file__))

+ 2 - 6
searx/answerers/__init__.py

@@ -1,12 +1,8 @@
 from os import listdir
 from os.path import realpath, dirname, join, isdir
-from sys import version_info
 from searx.utils import load_module
 from collections import defaultdict
 
-if version_info[0] == 3:
-    unicode = str
-
 
 answerers_dir = dirname(realpath(__file__))
 
@@ -36,10 +32,10 @@ def ask(query):
     results = []
     query_parts = list(filter(None, query.query.split()))
 
-    if query_parts[0].decode('utf-8') not in answerers_by_keywords:
+    if query_parts[0].decode() not in answerers_by_keywords:
         return results
 
-    for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]:
+    for answerer in answerers_by_keywords[query_parts[0].decode()]:
         result = answerer(query)
         if result:
             results.append(result)

+ 7 - 13
searx/answerers/random/answerer.py

@@ -1,7 +1,6 @@
 import hashlib
 import random
 import string
-import sys
 import uuid
 from flask_babel import gettext
 
@@ -10,12 +9,7 @@ from flask_babel import gettext
 keywords = ('random',)
 
 random_int_max = 2**31
-
-if sys.version_info[0] == 2:
-    random_string_letters = string.lowercase + string.digits + string.uppercase
-else:
-    unicode = str
-    random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
+random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase
 
 
 def random_characters():
@@ -24,25 +18,25 @@ def random_characters():
 
 
 def random_string():
-    return u''.join(random_characters())
+    return ''.join(random_characters())
 
 
 def random_float():
-    return unicode(random.random())
+    return str(random.random())
 
 
 def random_int():
-    return unicode(random.randint(-random_int_max, random_int_max))
+    return str(random.randint(-random_int_max, random_int_max))
 
 
 def random_sha256():
     m = hashlib.sha256()
     m.update(''.join(random_characters()).encode())
-    return unicode(m.hexdigest())
+    return str(m.hexdigest())
 
 
 def random_uuid():
-    return unicode(uuid.uuid4())
+    return str(uuid.uuid4())
 
 
 random_types = {b'string': random_string,
@@ -70,4 +64,4 @@ def answer(query):
 def self_info():
     return {'name': gettext('Random value generator'),
             'description': gettext('Generate different random values'),
-            'examples': [u'random {}'.format(x.decode('utf-8')) for x in random_types]}
+            'examples': ['random {}'.format(x.decode()) for x in random_types]}

+ 1 - 4
searx/answerers/statistics/answerer.py

@@ -1,11 +1,8 @@
-from sys import version_info
 from functools import reduce
 from operator import mul
 
 from flask_babel import gettext
 
-if version_info[0] == 3:
-    unicode = str
 
 keywords = ('min',
             'max',
@@ -44,7 +41,7 @@ def answer(query):
     if answer is None:
         return []
 
-    return [{'answer': unicode(answer)}]
+    return [{'answer': str(answer)}]
 
 
 # required answerer function

+ 7 - 10
searx/autocomplete.py

@@ -16,19 +16,16 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 '''
 
 
-import sys
 from lxml import etree
 from json import loads
+from urllib.parse import urlencode
+
 from searx import settings
 from searx.languages import language_codes
 from searx.engines import (
     categories, engines, engine_shortcuts
 )
 from searx.poolrequests import get as http_get
-from searx.url_utils import urlencode
-
-if sys.version_info[0] == 3:
-    unicode = str
 
 
 def get(*args, **kwargs):
@@ -85,22 +82,22 @@ def searx_bang(full_query):
             engine_query = full_query.getSearchQuery()[1:]
 
             for lc in language_codes:
-                lang_id, lang_name, country, english_name = map(unicode.lower, lc)
+                lang_id, lang_name, country, english_name = map(str.lower, lc)
 
                 # check if query starts with language-id
                 if lang_id.startswith(engine_query):
                     if len(engine_query) <= 2:
-                        results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
+                        results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0]))
                     else:
-                        results.append(u':{lang_id}'.format(lang_id=lang_id))
+                        results.append(':{lang_id}'.format(lang_id=lang_id))
 
                 # check if query starts with language name
                 if lang_name.startswith(engine_query) or english_name.startswith(engine_query):
-                    results.append(u':{lang_name}'.format(lang_name=lang_name))
+                    results.append(':{lang_name}'.format(lang_name=lang_name))
 
                 # check if query starts with country
                 if country.startswith(engine_query.replace('_', ' ')):
-                    results.append(u':{country}'.format(country=country.replace(' ', '_')))
+                    results.append(':{country}'.format(country=country.replace(' ', '_')))
 
     # remove duplicates
     result_set = set(results)

+ 2 - 1
searx/engines/1337x.py

@@ -1,7 +1,8 @@
+from urllib.parse import quote, urljoin
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
-from searx.url_utils import quote, urljoin
+
 
 url = 'https://1337x.to/'
 search_url = url + 'search/{search_term}/{pageno}/'

+ 2 - 2
searx/engines/acgsou.py

@@ -9,9 +9,9 @@
  @parse        url, title, content, seed, leech, torrentfile
 """
 
+from urllib.parse import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 from searx.utils import get_torrent_size, int_or_zero
 
 # engine dependent config
@@ -63,7 +63,7 @@ def response(resp):
         except:
             pass
         # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
-        content = u'Category: "{category}".'
+        content = 'Category: "{category}".'
         content = content.format(category=category)
 
         results.append({'url': href,

+ 2 - 1
searx/engines/apkmirror.py

@@ -9,9 +9,10 @@
  @parse       url, title, thumbnail_src
 """
 
+from urllib.parse import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
+
 
 # engine dependent config
 categories = ['it']

+ 1 - 1
searx/engines/archlinux.py

@@ -11,9 +11,9 @@
  @parse        url, title
 """
 
+from urllib.parse import urlencode, urljoin
 from lxml import html
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
 
 # engine dependent config
 categories = ['it']

+ 2 - 2
searx/engines/arxiv.py

@@ -11,9 +11,9 @@
  More info on api: https://arxiv.org/help/api/user-manual
 """
 
+from urllib.parse import urlencode
 from lxml import html
 from datetime import datetime
-from searx.url_utils import urlencode
 
 
 categories = ['science']
@@ -30,7 +30,7 @@ def request(query, params):
     # basic search
     offset = (params['pageno'] - 1) * number_of_results
 
-    string_args = dict(query=query.decode('utf-8'),
+    string_args = dict(query=query.decode(),
                        offset=offset,
                        number_of_results=number_of_results)
 

+ 1 - 1
searx/engines/base.py

@@ -13,10 +13,10 @@
  More info on api: http://base-search.net/about/download/base_interface.pdf
 """
 
+from urllib.parse import urlencode
 from lxml import etree
 from datetime import datetime
 import re
-from searx.url_utils import urlencode
 from searx.utils import searx_useragent
 
 

+ 2 - 2
searx/engines/bing.py

@@ -14,10 +14,10 @@
 """
 
 import re
+from urllib.parse import urlencode
 from lxml import html
 from searx import logger, utils
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 from searx.utils import match_language, gen_useragent, eval_xpath
 
 logger = logger.getChild('bing engine')
@@ -47,7 +47,7 @@ def request(query, params):
     else:
         lang = match_language(params['language'], supported_languages, language_aliases)
 
-    query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
+    query = 'language:{} {}'.format(lang.split('-')[0].upper(), query.decode()).encode()
 
     search_path = search_string.format(
         query=urlencode({'q': query}),

+ 2 - 2
searx/engines/bing_images.py

@@ -12,10 +12,10 @@
 
 """
 
+from urllib.parse import urlencode
 from lxml import html
 from json import loads
 import re
-from searx.url_utils import urlencode
 from searx.utils import match_language
 
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
@@ -91,7 +91,7 @@ def response(resp):
 
             # strip 'Unicode private use area' highlighting, they render to Tux
             # the Linux penguin and a standing diamond on my machine...
-            title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '')
+            title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
             results.append({'template': 'images.html',
                             'url': m['purl'],
                             'thumbnail_src': m['turl'],

+ 1 - 2
searx/engines/bing_news.py

@@ -13,10 +13,9 @@
 
 from datetime import datetime
 from dateutil import parser
+from urllib.parse import urlencode, urlparse, parse_qsl
 from lxml import etree
 from searx.utils import list_get, match_language
-from searx.url_utils import urlencode, urlparse, parse_qsl
-
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
 
 # engine dependent config

+ 1 - 1
searx/engines/bing_videos.py

@@ -12,7 +12,7 @@
 
 from json import loads
 from lxml import html
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import match_language
 
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases

+ 1 - 1
searx/engines/btdigg.py

@@ -12,8 +12,8 @@
 
 from lxml import html
 from operator import itemgetter
+from urllib.parse import quote, urljoin
 from searx.engines.xpath import extract_text
-from searx.url_utils import quote, urljoin
 from searx.utils import get_torrent_size
 
 # engine dependent config

+ 1 - 4
searx/engines/currency_convert.py

@@ -1,14 +1,11 @@
 import json
 import re
 import os
-import sys
 import unicodedata
 
 from io import open
 from datetime import datetime
 
-if sys.version_info[0] == 3:
-    unicode = str
 
 categories = []
 url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}'
@@ -20,7 +17,7 @@ db = 1
 
 
 def normalize_name(name):
-    name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s')
+    name = name.decode().lower().replace('-', ' ').rstrip('s')
     name = re.sub(' +', ' ', name)
     return unicodedata.normalize('NFKD', name).lower()
 

+ 1 - 1
searx/engines/dailymotion.py

@@ -14,7 +14,7 @@
 
 from json import loads
 from datetime import datetime
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import match_language, html_to_text
 
 # engine dependent config

+ 2 - 2
searx/engines/deezer.py

@@ -11,7 +11,7 @@
 """
 
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 # engine dependent config
 categories = ['music']
@@ -50,7 +50,7 @@ def response(resp):
             if url.startswith('http://'):
                 url = 'https' + url[4:]
 
-            content = u'{} - {} - {}'.format(
+            content = '{} - {} - {}'.format(
                 result['artist']['name'],
                 result['album']['title'],
                 result['title'])

+ 2 - 1
searx/engines/deviantart.py

@@ -14,8 +14,9 @@
 
 from lxml import html
 import re
+from urllib.parse import urlencode
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
+
 
 # engine dependent config
 categories = ['images']

+ 3 - 3
searx/engines/dictzone.py

@@ -10,12 +10,12 @@
 """
 
 import re
+from urllib.parse import urljoin
 from lxml import html
 from searx.utils import is_valid_lang, eval_xpath
-from searx.url_utils import urljoin
 
 categories = ['general']
-url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
+url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
 weight = 100
 
 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
@@ -37,7 +37,7 @@ def request(query, params):
 
     params['url'] = url.format(from_lang=from_lang[2],
                                to_lang=to_lang[2],
-                               query=query.decode('utf-8'))
+                               query=query.decode())
 
     return params
 

+ 1 - 4
searx/engines/digbt.py

@@ -10,14 +10,11 @@
  @parse       url, title, content, magnetlink
 """
 
-from sys import version_info
+from urllib.parse import urljoin
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
-from searx.url_utils import urljoin
 
-if version_info[0] == 3:
-    unicode = str
 
 categories = ['videos', 'music', 'files']
 paging = True

+ 1 - 1
searx/engines/digg.py

@@ -14,8 +14,8 @@ import random
 import string
 from dateutil import parser
 from json import loads
+from urllib.parse import urlencode
 from lxml import html
-from searx.url_utils import urlencode
 from datetime import datetime
 
 # engine dependent config

+ 1 - 1
searx/engines/doku.py

@@ -9,10 +9,10 @@
 # @stable      yes
 # @parse       (general)    url, title, content
 
+from urllib.parse import urlencode
 from lxml.html import fromstring
 from searx.engines.xpath import extract_text
 from searx.utils import eval_xpath
-from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'

+ 1 - 1
searx/engines/duckduckgo.py

@@ -15,9 +15,9 @@
 
 from lxml.html import fromstring
 from json import loads
+from urllib.parse import urlencode
 from searx.engines.xpath import extract_text
 from searx.poolrequests import get
-from searx.url_utils import urlencode
 from searx.utils import match_language, eval_xpath
 
 # engine dependent config

+ 1 - 1
searx/engines/duckduckgo_definitions.py

@@ -10,11 +10,11 @@ DuckDuckGo (definitions)
 """
 
 import json
+from urllib.parse import urlencode
 from lxml import html
 from re import compile
 from searx.engines.xpath import extract_text
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
-from searx.url_utils import urlencode
 from searx.utils import html_to_text, match_language
 
 url = 'https://api.duckduckgo.com/'\

+ 1 - 1
searx/engines/duckduckgo_images.py

@@ -14,13 +14,13 @@
 """
 
 from json import loads
+from urllib.parse import urlencode
 from searx.engines.xpath import extract_text
 from searx.engines.duckduckgo import (
     _fetch_supported_languages, supported_languages_url,
     get_region_code, language_aliases
 )
 from searx.poolrequests import get
-from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['images']

+ 1 - 1
searx/engines/duden.py

@@ -10,9 +10,9 @@
 
 from lxml import html, etree
 import re
+from urllib.parse import quote, urljoin
 from searx.engines.xpath import extract_text
 from searx.utils import eval_xpath
-from searx.url_utils import quote, urljoin
 from searx import logger
 
 categories = ['general']

+ 1 - 1
searx/engines/etools.py

@@ -10,8 +10,8 @@
 """
 
 from lxml import html
+from urllib.parse import quote
 from searx.engines.xpath import extract_text
-from searx.url_utils import quote
 from searx.utils import eval_xpath
 
 categories = ['general']

+ 1 - 1
searx/engines/fdroid.py

@@ -9,9 +9,9 @@
  @parse        url, title, content
 """
 
+from urllib.parse import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['files']

+ 2 - 5
searx/engines/filecrop.py

@@ -1,9 +1,6 @@
-from searx.url_utils import urlencode
+from html.parser import HTMLParser
+from urllib.parse import urlencode
 
-try:
-    from HTMLParser import HTMLParser
-except:
-    from html.parser import HTMLParser
 
 url = 'http://www.filecrop.com/'
 search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa

+ 1 - 1
searx/engines/flickr.py

@@ -14,7 +14,7 @@
 """
 
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 categories = ['images']
 

+ 5 - 5
searx/engines/flickr_noapi.py

@@ -15,8 +15,8 @@
 from json import loads
 from time import time
 import re
+from urllib.parse import urlencode
 from searx.engines import logger
-from searx.url_utils import urlencode
 from searx.utils import ecma_unescape, html_to_text
 
 logger = logger.getChild('flickr-noapi')
@@ -117,10 +117,10 @@ def response(resp):
             'img_format': img_format,
             'template': 'images.html'
         }
-        result['author'] = author.encode('utf-8', 'ignore').decode('utf-8')
-        result['source'] = source.encode('utf-8', 'ignore').decode('utf-8')
-        result['title'] = title.encode('utf-8', 'ignore').decode('utf-8')
-        result['content'] = content.encode('utf-8', 'ignore').decode('utf-8')
+        result['author'] = author.encode(errors='ignore').decode()
+        result['source'] = source.encode(errors='ignore').decode()
+        result['title'] = title.encode(errors='ignore').decode()
+        result['content'] = content.encode(errors='ignore').decode()
         results.append(result)
 
     return results

+ 2 - 5
searx/engines/framalibre.py

@@ -10,13 +10,10 @@
  @parse       url, title, content, thumbnail, img_src
 """
 
-try:
-    from cgi import escape
-except:
-    from html import escape
+from html import escape
+from urllib.parse import urljoin, urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
-from searx.url_utils import urljoin, urlencode
 
 # engine dependent config
 categories = ['it']

+ 1 - 1
searx/engines/frinkiac.py

@@ -10,7 +10,7 @@ Frinkiac (Images)
 """
 
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 categories = ['images']
 

+ 1 - 1
searx/engines/genius.py

@@ -11,7 +11,7 @@ Genius
 """
 
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from datetime import datetime
 
 # engine dependent config

+ 2 - 2
searx/engines/gentoo.py

@@ -11,9 +11,9 @@
  @parse        url, title
 """
 
+from urllib.parse import urlencode, urljoin
 from lxml import html
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
 
 # engine dependent config
 categories = ['it']
@@ -90,7 +90,7 @@ def request(query, params):
     # if our language is hosted on the main site, we need to add its name
     # to the query in order to narrow the results to that language
     if language in main_langs:
-        query += b' (' + (main_langs[language]).encode('utf-8') + b')'
+        query += b' (' + (main_langs[language]).encode() + b')'
 
     # prepare the request parameters
     query = urlencode({'search': query})

+ 1 - 1
searx/engines/gigablast.py

@@ -14,8 +14,8 @@
 
 import re
 from json import loads
+from urllib.parse import urlencode
 # from searx import logger
-from searx.url_utils import urlencode
 from searx.poolrequests import get
 
 # engine dependent config

+ 1 - 1
searx/engines/github.py

@@ -11,7 +11,7 @@
 """
 
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 # engine dependent config
 categories = ['it']

+ 1 - 1
searx/engines/google.py

@@ -18,11 +18,11 @@ Definitions`_.
 
 # pylint: disable=invalid-name, missing-function-docstring
 
+from urllib.parse import urlencode, urlparse
 from lxml import html
 from flask_babel import gettext
 from searx.engines.xpath import extract_text
 from searx import logger
-from searx.url_utils import urlencode, urlparse
 from searx.utils import match_language, eval_xpath
 
 logger = logger.getChild('google engine')

+ 2 - 3
searx/engines/google_images.py

@@ -24,11 +24,10 @@ Definitions`_.
 
 """
 
-import urllib
+from urllib.parse import urlencode, urlparse, unquote
 from lxml import html
 from flask_babel import gettext
 from searx import logger
-from searx.url_utils import urlencode, urlparse
 from searx.utils import eval_xpath
 from searx.engines.xpath import extract_text
 
@@ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id):
         if 'gstatic.com/images' in line and data_id in line:
             url_line = _script[i + 1]
             img_url = url_line.split('"')[1]
-            img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%'))
+            img_url = unquote(img_url.replace(r'\u00', r'%'))
     return img_url
 
 

+ 1 - 1
searx/engines/google_news.py

@@ -10,9 +10,9 @@
  @parse       url, title, content, publishedDate
 """
 
+from urllib.parse import urlencode
 from lxml import html
 from searx.engines.google import _fetch_supported_languages, supported_languages_url
-from searx.url_utils import urlencode
 from searx.utils import match_language
 
 # search-url

+ 1 - 1
searx/engines/google_videos.py

@@ -12,9 +12,9 @@
 
 from datetime import date, timedelta
 from json import loads
+from urllib.parse import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 import re
 
 # engine dependent config

+ 2 - 5
searx/engines/ina.py

@@ -12,15 +12,12 @@
 # @todo        embedded (needs some md5 from video page)
 
 from json import loads
+from urllib.parse import urlencode
 from lxml import html
 from dateutil import parser
+from html.parser import HTMLParser
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 
-try:
-    from HTMLParser import HTMLParser
-except:
-    from html.parser import HTMLParser
 
 # engine dependent config
 categories = ['videos']

+ 1 - 1
searx/engines/invidious.py

@@ -8,7 +8,7 @@
 # @stable      yes
 # @parse       url, title, content, publishedDate, thumbnail, embedded, author, length
 
-from searx.url_utils import quote_plus
+from urllib.parse import quote_plus
 from dateutil import parser
 import time
 

+ 1 - 6
searx/engines/json_engine.py

@@ -1,11 +1,8 @@
 from collections import Iterable
 from json import loads
-from sys import version_info
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import to_string
 
-if version_info[0] == 3:
-    unicode = str
 
 search_url = None
 url_query = None
@@ -37,8 +34,6 @@ def iterate(iterable):
 def is_iterable(obj):
     if type(obj) == str:
         return False
-    if type(obj) == unicode:
-        return False
     return isinstance(obj, Iterable)
 
 

+ 1 - 1
searx/engines/kickass.py

@@ -12,9 +12,9 @@
 
 from lxml import html
 from operator import itemgetter
+from urllib.parse import quote, urljoin
 from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size, convert_str_to_int
-from searx.url_utils import quote, urljoin
 
 # engine dependent config
 categories = ['videos', 'music', 'files']

+ 2 - 2
searx/engines/mediawiki.py

@@ -14,7 +14,7 @@
 
 from json import loads
 from string import Formatter
-from searx.url_utils import urlencode, quote
+from urllib.parse import urlencode, quote
 
 # engine dependent config
 categories = ['general']
@@ -79,7 +79,7 @@ def response(resp):
         if result.get('snippet', '').startswith('#REDIRECT'):
             continue
         url = base_url.format(language=resp.search_params['language']) +\
-            'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8'))
+            'wiki/' + quote(result['title'].replace(' ', '_').encode())
 
         # append result
         results.append({'url': url,

+ 1 - 2
searx/engines/microsoft_academic.py

@@ -12,8 +12,7 @@ Microsoft Academic (Science)
 from datetime import datetime
 from json import loads
 from uuid import uuid4
-
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import html_to_text
 
 categories = ['images']

+ 1 - 1
searx/engines/mixcloud.py

@@ -12,7 +12,7 @@
 
 from json import loads
 from dateutil import parser
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 # engine dependent config
 categories = ['music']

+ 1 - 1
searx/engines/nyaa.py

@@ -10,8 +10,8 @@
 """
 
 from lxml import html
+from urllib.parse import urlencode
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 from searx.utils import get_torrent_size, int_or_zero
 
 # engine dependent config

+ 4 - 4
searx/engines/openstreetmap.py

@@ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)')
 # do search-request
 def request(query, params):
 
-    params['url'] = base_url + search_string.format(query=query.decode('utf-8'))
-    params['route'] = route_re.match(query.decode('utf-8'))
+    params['url'] = base_url + search_string.format(query=query.decode())
+    params['route'] = route_re.match(query.decode())
 
     return params
 
@@ -52,7 +52,7 @@ def response(resp):
         if 'display_name' not in r:
             continue
 
-        title = r['display_name'] or u''
+        title = r['display_name'] or ''
         osm_type = r.get('osm_type', r.get('type'))
         url = result_base_url.format(osm_type=osm_type,
                                      osm_id=r['osm_id'])
@@ -64,7 +64,7 @@ def response(resp):
 
         # if no geojson is found and osm_type is a node, add geojson Point
         if not geojson and osm_type == 'node':
-            geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]}
+            geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]}
 
         address_raw = r.get('address')
         address = {}

+ 1 - 1
searx/engines/peertube.py

@@ -14,7 +14,7 @@
 
 from json import loads
 from datetime import datetime
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.utils import html_to_text
 
 # engine dependent config

+ 1 - 1
searx/engines/photon.py

@@ -11,8 +11,8 @@
 """
 
 from json import loads
+from urllib.parse import urlencode
 from searx.utils import searx_useragent
-from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['map']

+ 5 - 3
searx/engines/piratebay.py

@@ -11,7 +11,9 @@
 from json import loads
 from datetime import datetime
 from operator import itemgetter
-from searx.url_utils import quote
+
+from urllib.parse import quote, urljoin
+from searx.engines.xpath import extract_text
 from searx.utils import get_torrent_size
 
 # engine dependent config
@@ -62,8 +64,8 @@ def response(resp):
     # parse results
     for result in search_res:
         link = url + "description.php?id=" + result["id"]
-        magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \
-            "&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers)
+        magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\
+                     + "&tr=" + "&tr=".join(trackers)
 
         params = {
             "url": link,

+ 1 - 1
searx/engines/pubmed.py

@@ -14,7 +14,7 @@
 from flask_babel import gettext
 from lxml import etree
 from datetime import datetime
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 from searx.poolrequests import get
 
 

+ 3 - 3
searx/engines/qwant.py

@@ -12,9 +12,9 @@
 
 from datetime import datetime
 from json import loads
-from searx.utils import html_to_text
-from searx.url_utils import urlencode
-from searx.utils import match_language
+from urllib.parse import urlencode
+from searx.utils import html_to_text, match_language
+
 
 # engine dependent config
 categories = None

+ 1 - 1
searx/engines/reddit.py

@@ -12,7 +12,7 @@
 
 import json
 from datetime import datetime
-from searx.url_utils import urlencode, urljoin, urlparse
+from urllib.parse import urlencode, urljoin, urlparse
 
 # engine dependent config
 categories = ['general', 'images', 'news', 'social media']

+ 2 - 2
searx/engines/scanr_structures.py

@@ -11,7 +11,7 @@
 """
 
 from json import loads, dumps
-from searx.utils import html_to_text
+from urllib.parse import html_to_text
 
 # engine dependent config
 categories = ['science']
@@ -29,7 +29,7 @@ def request(query, params):
     params['url'] = search_url
     params['method'] = 'POST'
     params['headers']['Content-type'] = "application/json"
-    params['data'] = dumps({"query": query.decode('utf-8'),
+    params['data'] = dumps({"query": query.decode(),
                             "searchField": "ALL",
                             "sortDirection": "ASC",
                             "sortOrder": "RELEVANCY",

+ 1 - 1
searx/engines/searchcode_code.py

@@ -11,7 +11,7 @@
 """
 
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 
 # engine dependent config

+ 1 - 1
searx/engines/searchcode_doc.py

@@ -11,7 +11,7 @@
 """
 
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 # engine dependent config
 categories = ['it']

+ 1 - 1
searx/engines/seedpeer.py

@@ -11,7 +11,7 @@
 from lxml import html
 from json import loads
 from operator import itemgetter
-from searx.url_utils import quote, urljoin
+from urllib.parse import quote, urljoin
 from searx.engines.xpath import extract_text
 
 

+ 3 - 6
searx/engines/soundcloud.py

@@ -14,14 +14,11 @@ import re
 from json import loads
 from lxml import html
 from dateutil import parser
+from io import StringIO
+from urllib.parse import quote_plus, urlencode
 from searx import logger
 from searx.poolrequests import get as http_get
-from searx.url_utils import quote_plus, urlencode
 
-try:
-    from cStringIO import StringIO
-except:
-    from io import StringIO
 
 # engine dependent config
 categories = ['music']
@@ -61,7 +58,7 @@ def get_client_id():
             # gets app_js and searches for the clientid
             response = http_get(app_js_url)
             if response.ok:
-                cids = cid_re.search(response.content.decode("utf-8"))
+                cids = cid_re.search(response.content.decode())
                 if cids is not None and len(cids.groups()):
                     return cids.groups()[0]
     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")

+ 4 - 4
searx/engines/spotify.py

@@ -11,7 +11,7 @@
 """
 
 from json import loads
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 import requests
 import base64
 
@@ -39,8 +39,8 @@ def request(query, params):
         'https://accounts.spotify.com/api/token',
         data={'grant_type': 'client_credentials'},
         headers={'Authorization': 'Basic ' + base64.b64encode(
-            "{}:{}".format(api_client_id, api_client_secret).encode('utf-8')
-        ).decode('utf-8')}
+            "{}:{}".format(api_client_id, api_client_secret).encode()
+        ).decode()}
     )
     j = loads(r.text)
     params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))}
@@ -59,7 +59,7 @@ def response(resp):
         if result['type'] == 'track':
             title = result['name']
             url = result['external_urls']['spotify']
-            content = u'{} - {} - {}'.format(
+            content = '{} - {} - {}'.format(
                 result['artists'][0]['name'],
                 result['album']['name'],
                 result['name'])

+ 1 - 1
searx/engines/stackoverflow.py

@@ -10,9 +10,9 @@
  @parse       url, title, content
 """
 
+from urllib.parse import urlencode, urljoin
 from lxml import html
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
 
 # engine dependent config
 categories = ['it']

+ 1 - 1
searx/engines/tokyotoshokan.py

@@ -11,10 +11,10 @@
 """
 
 import re
+from urllib.parse import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
 from datetime import datetime
-from searx.url_utils import urlencode
 from searx.utils import get_torrent_size, int_or_zero
 
 # engine dependent config

+ 1 - 1
searx/engines/torrentz.py

@@ -12,10 +12,10 @@
 """
 
 import re
+from urllib.parse import urlencode
 from lxml import html
 from datetime import datetime
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode
 from searx.utils import get_torrent_size
 
 # engine dependent config

+ 4 - 4
searx/engines/translated.py

@@ -12,8 +12,8 @@ import re
 from searx.utils import is_valid_lang
 
 categories = ['general']
-url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
-web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
+url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
+web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
 weight = 100
 
 parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
@@ -39,9 +39,9 @@ def request(query, params):
         key_form = ''
     params['url'] = url.format(from_lang=from_lang[1],
                                to_lang=to_lang[1],
-                               query=query.decode('utf-8'),
+                               query=query.decode(),
                                key=key_form)
-    params['query'] = query.decode('utf-8')
+    params['query'] = query.decode()
     params['from_lang'] = from_lang
     params['to_lang'] = to_lang
 

+ 1 - 1
searx/engines/twitter.py

@@ -12,10 +12,10 @@
  @todo        publishedDate
 """
 
+from urllib.parse import urlencode, urljoin
 from lxml import html
 from datetime import datetime
 from searx.engines.xpath import extract_text
-from searx.url_utils import urlencode, urljoin
 
 # engine dependent config
 categories = ['social media']

+ 1 - 1
searx/engines/unsplash.py

@@ -10,7 +10,7 @@
  @parse       url, title, img_src, thumbnail_src
 """
 
-from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl
+from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
 from json import loads
 
 url = 'https://unsplash.com/'

+ 1 - 1
searx/engines/vimeo.py

@@ -12,9 +12,9 @@
 # @todo        rewrite to api
 # @todo        set content-parameter with correct data
 
+from urllib.parse import urlencode
 from json import loads
 from dateutil import parser
-from searx.url_utils import urlencode
 
 # engine dependent config
 categories = ['videos']

+ 7 - 7
searx/engines/wikidata.py

@@ -15,9 +15,9 @@ from searx import logger
 from searx.poolrequests import get
 from searx.engines.xpath import extract_text
 from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
-from searx.url_utils import urlencode
 from searx.utils import match_language, eval_xpath
 
+from urllib.parse import urlencode
 from json import loads
 from lxml.html import fromstring
 from lxml import etree
@@ -76,7 +76,7 @@ def request(query, params):
 def response(resp):
     results = []
     htmlparser = etree.HTMLParser()
-    html = fromstring(resp.content.decode("utf-8"), parser=htmlparser)
+    html = fromstring(resp.content.decode(), parser=htmlparser)
     search_results = eval_xpath(html, wikidata_ids_xpath)
 
     if resp.search_params['language'].split('-')[0] == 'all':
@@ -89,7 +89,7 @@ def response(resp):
         wikidata_id = search_result.split('/')[-1]
         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language}))
         htmlresponse = get(url)
-        jsonresponse = loads(htmlresponse.content.decode("utf-8"))
+        jsonresponse = loads(htmlresponse.content.decode())
         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser)
 
     return results
@@ -453,16 +453,16 @@ def get_geolink(result):
     latitude, longitude = coordinates.split(',')
 
     # convert to decimal
-    lat = int(latitude[:latitude.find(u'°')])
+    lat = int(latitude[:latitude.find('°')])
     if latitude.find('\'') >= 0:
-        lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0
+        lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0
     if latitude.find('"') >= 0:
         lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0
     if latitude.find('S') >= 0:
         lat *= -1
-    lon = int(longitude[:longitude.find(u'°')])
+    lon = int(longitude[:longitude.find('°')])
     if longitude.find('\'') >= 0:
-        lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0
+        lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0
     if longitude.find('"') >= 0:
         lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0
     if longitude.find('W') >= 0:

+ 2 - 2
searx/engines/wikipedia.py

@@ -10,13 +10,13 @@
  @parse       url, infobox
 """
 
+from urllib.parse import quote
 from json import loads
 from lxml.html import fromstring
-from searx.url_utils import quote
 from searx.utils import match_language, searx_useragent
 
 # search-url
-search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
+search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
 supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
 
 

+ 10 - 10
searx/engines/wolframalpha_api.py

@@ -9,7 +9,7 @@
 # @parse       url, infobox
 
 from lxml import etree
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 # search-url
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
@@ -45,15 +45,15 @@ def request(query, params):
 
 # replace private user area characters to make text legible
 def replace_pua_chars(text):
-    pua_chars = {u'\uf522': u'\u2192',  # rigth arrow
-                 u'\uf7b1': u'\u2115',  # set of natural numbers
-                 u'\uf7b4': u'\u211a',  # set of rational numbers
-                 u'\uf7b5': u'\u211d',  # set of real numbers
-                 u'\uf7bd': u'\u2124',  # set of integer numbers
-                 u'\uf74c': 'd',        # differential
-                 u'\uf74d': u'\u212f',  # euler's number
-                 u'\uf74e': 'i',        # imaginary number
-                 u'\uf7d9': '='}        # equals sign
+    pua_chars = {'\uf522': '\u2192',  # rigth arrow
+                 '\uf7b1': '\u2115',  # set of natural numbers
+                 '\uf7b4': '\u211a',  # set of rational numbers
+                 '\uf7b5': '\u211d',  # set of real numbers
+                 '\uf7bd': '\u2124',  # set of integer numbers
+                 '\uf74c': 'd',       # differential
+                 '\uf74d': '\u212f',  # euler's number
+                 '\uf74e': 'i',       # imaginary number
+                 '\uf7d9': '='}       # equals sign
 
     for k, v in pua_chars.items():
         text = text.replace(k, v)

+ 1 - 1
searx/engines/wolframalpha_noapi.py

@@ -10,9 +10,9 @@
 
 from json import loads
 from time import time
+from urllib.parse import urlencode
 
 from searx.poolrequests import get as http_get
-from searx.url_utils import urlencode
 
 # search-url
 url = 'https://www.wolframalpha.com/'

+ 1 - 1
searx/engines/www1x.py

@@ -11,7 +11,7 @@
 """
 
 from lxml import html
-from searx.url_utils import urlencode, urljoin
+from urllib.parse import urlencode, urljoin
 from searx.engines.xpath import extract_text
 
 # engine dependent config

+ 3 - 3
searx/engines/xpath.py

@@ -1,7 +1,7 @@
+from urllib.parse import unquote, urlencode, urljoin, urlparse
 from lxml import html
 from lxml.etree import _ElementStringResult, _ElementUnicodeResult
 from searx.utils import html_to_text, eval_xpath
-from searx.url_utils import unquote, urlencode, urljoin, urlparse
 
 search_url = None
 url_xpath = None
@@ -56,7 +56,7 @@ def extract_url(xpath_results, search_url):
     if url.startswith('//'):
         # add http or https to this kind of url //example.com/
         parsed_search_url = urlparse(search_url)
-        url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
+        url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url)
     elif url.startswith('/'):
         # fix relative url to the search engine
         url = urljoin(search_url, url)
@@ -86,7 +86,7 @@ def normalize_url(url):
         p = parsed_url.path
         mark = p.find('/**')
         if mark != -1:
-            return unquote(p[mark + 3:]).decode('utf-8')
+            return unquote(p[mark + 3:]).decode()
 
     return url
 

+ 1 - 1
searx/engines/yacy.py

@@ -14,7 +14,7 @@
 
 from json import loads
 from dateutil import parser
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 from searx.utils import html_to_text
 

+ 1 - 1
searx/engines/yahoo.py

@@ -11,9 +11,9 @@
  @parse       url, title, content, suggestion
 """
 
+from urllib.parse import unquote, urlencode
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
-from searx.url_utils import unquote, urlencode
 from searx.utils import match_language, eval_xpath
 
 # engine dependent config

+ 2 - 2
searx/engines/yahoo_news.py

@@ -11,13 +11,13 @@
 
 import re
 from datetime import datetime, timedelta
+from urllib.parse import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text, extract_url
 from searx.engines.yahoo import (
     parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
 )
 from dateutil import parser
-from searx.url_utils import urlencode
 from searx.utils import match_language
 
 # engine dependent config
@@ -58,7 +58,7 @@ def request(query, params):
 
 def sanitize_url(url):
     if ".yahoo.com/" in url:
-        return re.sub(u"\\;\\_ylt\\=.+$", "", url)
+        return re.sub("\\;\\_ylt\\=.+$", "", url)
     else:
         return url
 

+ 1 - 1
searx/engines/yandex.py

@@ -9,9 +9,9 @@
  @parse       url, title, content
 """
 
+from urllib.parse import urlencode
 from lxml import html
 from searx import logger
-from searx.url_utils import urlencode
 
 logger = logger.getChild('yandex engine')
 

+ 1 - 1
searx/engines/yggtorrent.py

@@ -11,8 +11,8 @@
 from lxml import html
 from operator import itemgetter
 from datetime import datetime
+from urllib.parse import quote
 from searx.engines.xpath import extract_text
-from searx.url_utils import quote
 from searx.utils import get_torrent_size
 from searx.poolrequests import get as http_get
 

+ 1 - 1
searx/engines/youtube_api.py

@@ -10,7 +10,7 @@
 
 from json import loads
 from dateutil import parser
-from searx.url_utils import urlencode
+from urllib.parse import urlencode
 
 # engine dependent config
 categories = ['videos', 'music']

+ 1 - 1
searx/engines/youtube_noapi.py

@@ -10,9 +10,9 @@
 
 from functools import reduce
 from json import loads
+from urllib.parse import quote_plus
 from searx.engines.xpath import extract_text
 from searx.utils import list_get
-from searx.url_utils import quote_plus
 
 # engine dependent config
 categories = ['videos', 'music']

+ 1 - 1
searx/external_bang.py

@@ -23,7 +23,7 @@ def get_bang_url(search_query):
     """
 
     if search_query.external_bang:
-        query = search_query.query.decode('utf-8', 'ignore')
+        query = search_query.query.decode(errors='ignore')
         bang = _get_bang(search_query.external_bang)
 
         if bang and query:

+ 69 - 69
searx/languages.py

@@ -3,73 +3,73 @@
 # this file is generated automatically by utils/update_search_languages.py
 
 language_codes = (
-    (u"af-NA", u"Afrikaans", u"", u"Afrikaans"),
-    (u"ar-SA", u"العربية", u"", u"Arabic"),
-    (u"be-BY", u"Беларуская", u"", u"Belarusian"),
-    (u"bg-BG", u"Български", u"", u"Bulgarian"),
-    (u"ca-AD", u"Català", u"", u"Catalan"),
-    (u"cs-CZ", u"Čeština", u"", u"Czech"),
-    (u"da-DK", u"Dansk", u"", u"Danish"),
-    (u"de", u"Deutsch", u"", u"German"),
-    (u"de-AT", u"Deutsch", u"Österreich", u"German"),
-    (u"de-CH", u"Deutsch", u"Schweiz", u"German"),
-    (u"de-DE", u"Deutsch", u"Deutschland", u"German"),
-    (u"el-GR", u"Ελληνικά", u"", u"Greek"),
-    (u"en", u"English", u"", u"English"),
-    (u"en-AU", u"English", u"Australia", u"English"),
-    (u"en-CA", u"English", u"Canada", u"English"),
-    (u"en-GB", u"English", u"United Kingdom", u"English"),
-    (u"en-IE", u"English", u"Ireland", u"English"),
-    (u"en-IN", u"English", u"India", u"English"),
-    (u"en-NZ", u"English", u"New Zealand", u"English"),
-    (u"en-PH", u"English", u"Philippines", u"English"),
-    (u"en-SG", u"English", u"Singapore", u"English"),
-    (u"en-US", u"English", u"United States", u"English"),
-    (u"es", u"Español", u"", u"Spanish"),
-    (u"es-AR", u"Español", u"Argentina", u"Spanish"),
-    (u"es-CL", u"Español", u"Chile", u"Spanish"),
-    (u"es-ES", u"Español", u"España", u"Spanish"),
-    (u"es-MX", u"Español", u"México", u"Spanish"),
-    (u"et-EE", u"Eesti", u"", u"Estonian"),
-    (u"fa-IR", u"فارسی", u"", u"Persian"),
-    (u"fi-FI", u"Suomi", u"", u"Finnish"),
-    (u"fr", u"Français", u"", u"French"),
-    (u"fr-BE", u"Français", u"Belgique", u"French"),
-    (u"fr-CA", u"Français", u"Canada", u"French"),
-    (u"fr-CH", u"Français", u"Suisse", u"French"),
-    (u"fr-FR", u"Français", u"France", u"French"),
-    (u"he-IL", u"עברית", u"", u"Hebrew"),
-    (u"hr-HR", u"Hrvatski", u"", u"Croatian"),
-    (u"hu-HU", u"Magyar", u"", u"Hungarian"),
-    (u"hy-AM", u"Հայերեն", u"", u"Armenian"),
-    (u"id-ID", u"Indonesia", u"", u"Indonesian"),
-    (u"is-IS", u"Íslenska", u"", u"Icelandic"),
-    (u"it-IT", u"Italiano", u"", u"Italian"),
-    (u"ja-JP", u"日本語", u"", u"Japanese"),
-    (u"ko-KR", u"한국어", u"", u"Korean"),
-    (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"),
-    (u"lv-LV", u"Latviešu", u"", u"Latvian"),
-    (u"ms-MY", u"Melayu", u"", u"Malay"),
-    (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"),
-    (u"nl", u"Nederlands", u"", u"Dutch"),
-    (u"nl-BE", u"Nederlands", u"België", u"Dutch"),
-    (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"),
-    (u"pl-PL", u"Polski", u"", u"Polish"),
-    (u"pt", u"Português", u"", u"Portuguese"),
-    (u"pt-BR", u"Português", u"Brasil", u"Portuguese"),
-    (u"pt-PT", u"Português", u"Portugal", u"Portuguese"),
-    (u"ro-RO", u"Română", u"", u"Romanian"),
-    (u"ru-RU", u"Русский", u"", u"Russian"),
-    (u"sk-SK", u"Slovenčina", u"", u"Slovak"),
-    (u"sl-SI", u"Slovenščina", u"", u"Slovenian"),
-    (u"sr-RS", u"Srpski", u"", u"Serbian"),
-    (u"sv-SE", u"Svenska", u"", u"Swedish"),
-    (u"sw-KE", u"Kiswahili", u"", u"Swahili"),
-    (u"th-TH", u"ไทย", u"", u"Thai"),
-    (u"tr-TR", u"Türkçe", u"", u"Turkish"),
-    (u"uk-UA", u"Українська", u"", u"Ukrainian"),
-    (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"),
-    (u"zh", u"中文", u"", u"Chinese"),
-    (u"zh-CN", u"中文", u"中国", u"Chinese"),
-    (u"zh-TW", u"中文", u"台灣", u"Chinese")
+    ("af-NA", "Afrikaans", "", "Afrikaans"),
+    ("ar-SA", "العربية", "", "Arabic"),
+    ("be-BY", "Беларуская", "", "Belarusian"),
+    ("bg-BG", "Български", "", "Bulgarian"),
+    ("ca-AD", "Català", "", "Catalan"),
+    ("cs-CZ", "Čeština", "", "Czech"),
+    ("da-DK", "Dansk", "", "Danish"),
+    ("de", "Deutsch", "", "German"),
+    ("de-AT", "Deutsch", "Österreich", "German"),
+    ("de-CH", "Deutsch", "Schweiz", "German"),
+    ("de-DE", "Deutsch", "Deutschland", "German"),
+    ("el-GR", "Ελληνικά", "", "Greek"),
+    ("en", "English", "", "English"),
+    ("en-AU", "English", "Australia", "English"),
+    ("en-CA", "English", "Canada", "English"),
+    ("en-GB", "English", "United Kingdom", "English"),
+    ("en-IE", "English", "Ireland", "English"),
+    ("en-IN", "English", "India", "English"),
+    ("en-NZ", "English", "New Zealand", "English"),
+    ("en-PH", "English", "Philippines", "English"),
+    ("en-SG", "English", "Singapore", "English"),
+    ("en-US", "English", "United States", "English"),
+    ("es", "Español", "", "Spanish"),
+    ("es-AR", "Español", "Argentina", "Spanish"),
+    ("es-CL", "Español", "Chile", "Spanish"),
+    ("es-ES", "Español", "España", "Spanish"),
+    ("es-MX", "Español", "México", "Spanish"),
+    ("et-EE", "Eesti", "", "Estonian"),
+    ("fa-IR", "فارسی", "", "Persian"),
+    ("fi-FI", "Suomi", "", "Finnish"),
+    ("fr", "Français", "", "French"),
+    ("fr-BE", "Français", "Belgique", "French"),
+    ("fr-CA", "Français", "Canada", "French"),
+    ("fr-CH", "Français", "Suisse", "French"),
+    ("fr-FR", "Français", "France", "French"),
+    ("he-IL", "עברית", "", "Hebrew"),
+    ("hr-HR", "Hrvatski", "", "Croatian"),
+    ("hu-HU", "Magyar", "", "Hungarian"),
+    ("hy-AM", "Հայերեն", "", "Armenian"),
+    ("id-ID", "Indonesia", "", "Indonesian"),
+    ("is-IS", "Íslenska", "", "Icelandic"),
+    ("it-IT", "Italiano", "", "Italian"),
+    ("ja-JP", "日本語", "", "Japanese"),
+    ("ko-KR", "한국어", "", "Korean"),
+    ("lt-LT", "Lietuvių", "", "Lithuanian"),
+    ("lv-LV", "Latviešu", "", "Latvian"),
+    ("ms-MY", "Melayu", "", "Malay"),
+    ("nb-NO", "Norsk Bokmål", "", "Norwegian Bokmål"),
+    ("nl", "Nederlands", "", "Dutch"),
+    ("nl-BE", "Nederlands", "België", "Dutch"),
+    ("nl-NL", "Nederlands", "Nederland", "Dutch"),
+    ("pl-PL", "Polski", "", "Polish"),
+    ("pt", "Português", "", "Portuguese"),
+    ("pt-BR", "Português", "Brasil", "Portuguese"),
+    ("pt-PT", "Português", "Portugal", "Portuguese"),
+    ("ro-RO", "Română", "", "Romanian"),
+    ("ru-RU", "Русский", "", "Russian"),
+    ("sk-SK", "Slovenčina", "", "Slovak"),
+    ("sl-SI", "Slovenščina", "", "Slovenian"),
+    ("sr-RS", "Srpski", "", "Serbian"),
+    ("sv-SE", "Svenska", "", "Swedish"),
+    ("sw-KE", "Kiswahili", "", "Swahili"),
+    ("th-TH", "ไทย", "", "Thai"),
+    ("tr-TR", "Türkçe", "", "Turkish"),
+    ("uk-UA", "Українська", "", "Ukrainian"),
+    ("vi-VN", "Tiếng Việt", "", "Vietnamese"),
+    ("zh", "中文", "", "Chinese"),
+    ("zh-CN", "中文", "中国", "Chinese"),
+    ("zh-TW", "中文", "台灣", "Chinese")
 )

+ 2 - 5
searx/plugins/__init__.py

@@ -20,13 +20,10 @@ from importlib import import_module
 from os import listdir, makedirs, remove, stat, utime
 from os.path import abspath, basename, dirname, exists, join
 from shutil import copyfile
-from sys import version_info
 from traceback import print_exc
 
 from searx import logger, settings, static_path
 
-if version_info[0] == 3:
-    unicode = str
 
 logger = logger.getChild('plugins')
 
@@ -38,8 +35,8 @@ from searx.plugins import (oa_doi_rewrite,
                            tracker_url_remover,
                            vim_hotkeys)
 
-required_attrs = (('name', (str, unicode)),
-                  ('description', (str, unicode)),
+required_attrs = (('name', str),
+                  ('description', str),
                   ('default_on', bool))
 
 optional_attrs = (('js_dependencies', tuple),

+ 1 - 4
searx/plugins/https_rewrite.py

@@ -16,17 +16,14 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 '''
 
 import re
-import sys
+from urllib.parse import urlparse
 from lxml import etree
 from os import listdir, environ
 from os.path import isfile, isdir, join
 from searx.plugins import logger
 from flask_babel import gettext
 from searx import searx_dir
-from searx.url_utils import urlparse
 
-if sys.version_info[0] == 3:
-    unicode = str
 
 name = "HTTPS rewrite"
 description = gettext('Rewrite HTTP links to HTTPS if possible')

+ 1 - 1
searx/plugins/oa_doi_rewrite.py

@@ -1,6 +1,6 @@
+from urllib.parse import urlparse, parse_qsl
 from flask_babel import gettext
 import re
-from searx.url_utils import urlparse, parse_qsl
 from searx import settings
 
 

+ 1 - 1
searx/plugins/tracker_url_remover.py

@@ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 
 from flask_babel import gettext
 import re
-from searx.url_utils import urlunparse, parse_qsl, urlencode
+from urllib.parse import urlunparse, parse_qsl, urlencode
 
 regexes = {re.compile(r'utm_[^&]+'),
            re.compile(r'(wkey|wemail)[^&]*'),

+ 4 - 9
searx/preferences.py

@@ -6,16 +6,11 @@
 
 from base64 import urlsafe_b64encode, urlsafe_b64decode
 from zlib import compress, decompress
-from sys import version
+from urllib.parse import parse_qs, urlencode
 
 from searx import settings, autocomplete
 from searx.languages import language_codes as languages
 from searx.utils import match_language
-from searx.url_utils import parse_qs, urlencode
-
-if version[0] == '3':
-    # pylint: disable=invalid-name
-    unicode = str
 
 
 COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5  # 5 years
@@ -402,14 +397,14 @@ class Preferences(object):
 
         settings_kv['tokens'] = ','.join(self.tokens.values)
 
-        return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8')
+        return urlsafe_b64encode(compress(urlencode(settings_kv).encode())).decode()
 
     def parse_encoded_data(self, input_data):
         """parse (base64) preferences from request (``flask.request.form['preferences']``)"""
-        decoded_data = decompress(urlsafe_b64decode(input_data.encode('utf-8')))
+        decoded_data = decompress(urlsafe_b64decode(input_data.encode()))
         dict_data = {}
         for x, y in parse_qs(decoded_data).items():
-            dict_data[x.decode('utf8')] = y[0].decode('utf8')
+            dict_data[x.decode()] = y[0].decode()
         self.parse_dict(dict_data)
 
     def parse_dict(self, input_data):

+ 5 - 7
searx/query.py

@@ -17,15 +17,13 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
 '''
 
+import re
+
 from searx.languages import language_codes
 from searx.engines import (
     categories, engines, engine_shortcuts
 )
-import re
-import sys
 
-if sys.version_info[0] == 3:
-    unicode = str
 
 VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$')
 
@@ -93,7 +91,7 @@ class RawTextQuery(object):
                 # check if any language-code is equal with
                 # declared language-codes
                 for lc in language_codes:
-                    lang_id, lang_name, country, english_name = map(unicode.lower, lc)
+                    lang_id, lang_name, country, english_name = map(str.lower, lc)
 
                     # if correct language-code is found
                     # set it as new search-language
@@ -177,7 +175,7 @@ class RawTextQuery(object):
 
     def getFullQuery(self):
         # get full querry including whitespaces
-        return u''.join(self.query_parts)
+        return ''.join(self.query_parts)
 
 
 class SearchQuery(object):
@@ -185,7 +183,7 @@ class SearchQuery(object):
 
     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range,
                  timeout_limit=None, preferences=None, external_bang=None):
-        self.query = query.encode('utf-8')
+        self.query = query.encode()
         self.engines = engines
         self.categories = categories
         self.lang = lang

+ 5 - 8
searx/results.py

@@ -1,14 +1,11 @@
 import re
-import sys
 from collections import defaultdict
 from operator import itemgetter
 from threading import RLock
+from urllib.parse import urlparse, unquote
 from searx import logger
 from searx.engines import engines
-from searx.url_utils import urlparse, unquote
 
-if sys.version_info[0] == 3:
-    basestring = str
 
 CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U)
 WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
@@ -16,7 +13,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U)
 
 # return the meaningful length of the content for a result
 def result_content_len(content):
-    if isinstance(content, basestring):
+    if isinstance(content, str):
         return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content))
     else:
         return 0
@@ -161,11 +158,11 @@ class ResultContainer(object):
                 self._number_of_results.append(result['number_of_results'])
             else:
                 # standard result (url, title, content)
-                if 'url' in result and not isinstance(result['url'], basestring):
+                if 'url' in result and not isinstance(result['url'], str):
                     logger.debug('result: invalid URL: %s', str(result))
-                elif 'title' in result and not isinstance(result['title'], basestring):
+                elif 'title' in result and not isinstance(result['title'], str):
                     logger.debug('result: invalid title: %s', str(result))
-                elif 'content' in result and not isinstance(result['content'], basestring):
+                elif 'content' in result and not isinstance(result['content'], str):
                     logger.debug('result: invalid content: %s', str(result))
                 else:
                     self._merge_result(result, standard_result_count + 1)

+ 4 - 11
searx/search.py

@@ -20,8 +20,8 @@ import sys
 import threading
 from time import time
 from uuid import uuid4
+from _thread import start_new_thread
 
-import six
 from flask_babel import gettext
 import requests.exceptions
 import searx.poolrequests as requests_lib
@@ -37,13 +37,6 @@ from searx import logger
 from searx.plugins import plugins
 from searx.exceptions import SearxParameterException
 
-try:
-    from thread import start_new_thread
-except:
-    from _thread import start_new_thread
-
-if sys.version_info[0] == 3:
-    unicode = str
 
 logger = logger.getChild('search')
 
@@ -355,11 +348,11 @@ def get_search_query_from_webapp(preferences, form):
         load_default_categories = True
         for pd_name, pd in form.items():
             if pd_name == 'categories':
-                query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories)
+                query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories)
             elif pd_name == 'engines':
                 pd_engines = [{'category': engines[engine].categories[0],
                                'name': engine}
-                              for engine in map(unicode.strip, pd.split(',')) if engine in engines]
+                              for engine in map(str.strip, pd.split(',')) if engine in engines]
                 if pd_engines:
                     query_engines.extend(pd_engines)
                     load_default_categories = False
@@ -434,7 +427,7 @@ class Search(object):
 
             # This means there was a valid bang and the
             # rest of the search does not need to be continued
-            if isinstance(self.result_container.redirect_url, six.string_types):
+            if isinstance(self.result_container.redirect_url, str):
                 return self.result_container
         # start time
         start_time = time()

+ 2 - 2
searx/testing.py

@@ -17,7 +17,7 @@ from unittest2 import TestCase
 class SearxTestLayer:
     """Base layer for non-robot tests."""
 
-    __name__ = u'SearxTestLayer'
+    __name__ = 'SearxTestLayer'
 
     @classmethod
     def setUp(cls):
@@ -66,7 +66,7 @@ class SearxRobotLayer():
             stderr=subprocess.STDOUT
         )
         if hasattr(self.server.stdout, 'read1'):
-            print(self.server.stdout.read1(1024).decode('utf-8'))
+            print(self.server.stdout.read1(1024).decode())
 
     def tearDown(self):
         os.kill(self.server.pid, 9)

+ 0 - 30
searx/url_utils.py

@@ -1,30 +0,0 @@
-from sys import version_info
-
-if version_info[0] == 2:
-    from urllib import quote, quote_plus, unquote, urlencode
-    from urlparse import parse_qs, parse_qsl, urljoin, urlparse, urlunparse, ParseResult
-else:
-    from urllib.parse import (
-        parse_qs,
-        parse_qsl,
-        quote,
-        quote_plus,
-        unquote,
-        urlencode,
-        urljoin,
-        urlparse,
-        urlunparse,
-        ParseResult
-    )
-
-
-__export__ = (parse_qs,
-              parse_qsl,
-              quote,
-              quote_plus,
-              unquote,
-              urlencode,
-              urljoin,
-              urlparse,
-              urlunparse,
-              ParseResult)

+ 25 - 52
searx/utils.py

@@ -1,21 +1,22 @@
 # -*- coding: utf-8 -*-
+import os
+import sys
 import csv
 import hashlib
 import hmac
-import os
 import re
+import json
 
-from babel.core import get_global
-from babel.dates import format_date
 from codecs import getincrementalencoder
 from imp import load_source
 from numbers import Number
 from os.path import splitext, join
-from io import open
+from io import open, StringIO
 from random import choice
+from html.parser import HTMLParser
 from lxml.etree import XPath
-import sys
-import json
+from babel.core import get_global
+from babel.dates import format_date
 
 from searx import settings
 from searx.version import VERSION_STRING
@@ -23,23 +24,6 @@ from searx.languages import language_codes
 from searx import settings
 from searx import logger
 
-try:
-    from cStringIO import StringIO
-except:
-    from io import StringIO
-
-try:
-    from HTMLParser import HTMLParser
-except:
-    from html.parser import HTMLParser
-
-if sys.version_info[0] == 3:
-    unichr = chr
-    unicode = str
-    IS_PY2 = False
-    basestring = str
-else:
-    IS_PY2 = True
 
 logger = logger.getChild('utils')
 
@@ -75,19 +59,19 @@ def highlight_content(content, query):
     if content.find('<') != -1:
         return content
 
-    query = query.decode('utf-8')
+    query = query.decode()
     if content.lower().find(query.lower()) > -1:
-        query_regex = u'({0})'.format(re.escape(query))
+        query_regex = '({0})'.format(re.escape(query))
         content = re.sub(query_regex, '<span class="highlight">\\1</span>',
                          content, flags=re.I | re.U)
     else:
         regex_parts = []
         for chunk in query.split():
             if len(chunk) == 1:
-                regex_parts.append(u'\\W+{0}\\W+'.format(re.escape(chunk)))
+                regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk)))
             else:
-                regex_parts.append(u'{0}'.format(re.escape(chunk)))
-        query_regex = u'({0})'.format('|'.join(regex_parts))
+                regex_parts.append('{0}'.format(re.escape(chunk)))
+        query_regex = '({0})'.format('|'.join(regex_parts))
         content = re.sub(query_regex, '<span class="highlight">\\1</span>',
                          content, flags=re.I | re.U)
 
@@ -124,21 +108,21 @@ class HTMLTextExtractor(HTMLParser):
     def handle_charref(self, number):
         if not self.is_valid_tag():
             return
-        if number[0] in (u'x', u'X'):
+        if number[0] in ('x', 'X'):
             codepoint = int(number[1:], 16)
         else:
             codepoint = int(number)
-        self.result.append(unichr(codepoint))
+        self.result.append(chr(codepoint))
 
     def handle_entityref(self, name):
         if not self.is_valid_tag():
             return
         # codepoint = htmlentitydefs.name2codepoint[name]
-        # self.result.append(unichr(codepoint))
+        # self.result.append(chr(codepoint))
         self.result.append(name)
 
     def get_text(self):
-        return u''.join(self.result).strip()
+        return ''.join(self.result).strip()
 
 
 def html_to_text(html):
@@ -163,22 +147,14 @@ class UnicodeWriter:
         self.encoder = getincrementalencoder(encoding)()
 
     def writerow(self, row):
-        if IS_PY2:
-            row = [s.encode("utf-8") if hasattr(s, 'encode') else s for s in row]
         self.writer.writerow(row)
         # Fetch UTF-8 output from the queue ...
         data = self.queue.getvalue()
-        if IS_PY2:
-            data = data.decode("utf-8")
-        else:
-            data = data.strip('\x00')
+        data = data.strip('\x00')
         # ... and reencode it into the target encoding
         data = self.encoder.encode(data)
         # write to the target stream
-        if IS_PY2:
-            self.stream.write(data)
-        else:
-            self.stream.write(data.decode("utf-8"))
+        self.stream.write(data.decode())
         # empty queue
         self.queue.truncate(0)
 
@@ -253,7 +229,7 @@ def dict_subset(d, properties):
 def prettify_url(url, max_length=74):
     if len(url) > max_length:
         chunk_len = int(max_length / 2 + 1)
-        return u'{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
+        return '{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
     else:
         return url
 
@@ -310,7 +286,7 @@ def int_or_zero(num):
 
 def is_valid_lang(lang):
     is_abbr = (len(lang) == 2)
-    lang = lang.lower().decode('utf-8')
+    lang = lang.lower().decode()
     if is_abbr:
         for l in language_codes:
             if l[0][:2] == lang:
@@ -407,17 +383,14 @@ def new_hmac(secret_key, url):
             secret_key_bytes = secret_key
         else:
             raise err
-    if sys.version_info[0] == 2:
-        return hmac.new(bytes(secret_key), url, hashlib.sha256).hexdigest()
-    else:
-        return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
+    return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
 
 
 def to_string(obj):
-    if isinstance(obj, basestring):
+    if isinstance(obj, str):
         return obj
     if isinstance(obj, Number):
-        return unicode(obj)
+        return str(obj)
     if hasattr(obj, '__str__'):
         return obj.__str__()
     if hasattr(obj, '__repr__'):
@@ -433,9 +406,9 @@ def ecma_unescape(s):
     """
     # s = unicode(s)
     # "%u5409" becomes "吉"
-    s = ecma_unescape4_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
+    s = ecma_unescape4_re.sub(lambda e: chr(int(e.group(1), 16)), s)
     # "%20" becomes " ", "%F3" becomes "ó"
-    s = ecma_unescape2_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
+    s = ecma_unescape2_re.sub(lambda e: chr(int(e.group(1), 16)), s)
     return s
 
 

+ 26 - 45
searx/webapp.py

@@ -17,37 +17,35 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
 '''
 
+import sys
+if sys.version_info[0] < 3:
+    print('\033[1;31m Python2 is no longer supported\033[0m')
+    exit(1)
+
 if __name__ == '__main__':
-    from sys import path
     from os.path import realpath, dirname
-    path.append(realpath(dirname(realpath(__file__)) + '/../'))
+    sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
 
 import hashlib
 import hmac
 import json
 import os
-import sys
 
 import requests
 
 from searx import logger
 logger = logger.getChild('webapp')
 
-try:
-    from pygments import highlight
-    from pygments.lexers import get_lexer_by_name
-    from pygments.formatters import HtmlFormatter
-except:
-    logger.critical("cannot import dependency: pygments")
-    from sys import exit
-    exit(1)
-try:
-    from cgi import escape
-except:
-    from html import escape
-from six import next
 from datetime import datetime, timedelta
 from time import time
+from html import escape
+from io import StringIO
+from urllib.parse import urlencode, urlparse, urljoin
+
+from pygments import highlight
+from pygments.lexers import get_lexer_by_name
+from pygments.formatters import HtmlFormatter
+
 from werkzeug.middleware.proxy_fix import ProxyFix
 from flask import (
     Flask, request, render_template, url_for, Response, make_response,
@@ -78,7 +76,6 @@ from searx.plugins import plugins
 from searx.plugins.oa_doi_rewrite import get_doi_resolver
 from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
 from searx.answerers import answerers
-from searx.url_utils import urlencode, urlparse, urljoin
 from searx.utils import new_hmac
 
 # check if the pyopenssl package is installed.
@@ -89,19 +86,6 @@ except ImportError:
     logger.critical("The pyopenssl package has to be installed.\n"
                     "Some HTTPS connections will fail")
 
-try:
-    from cStringIO import StringIO
-except:
-    from io import StringIO
-
-
-if sys.version_info[0] == 3:
-    unicode = str
-    PY3 = True
-else:
-    logger.warning('\033[1;31m Python2 is no longer supported\033[0m')
-    exit(1)
-
 # serve pages with HTTP/1.1
 from werkzeug.serving import WSGIRequestHandler
 WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0'))
@@ -315,11 +299,11 @@ def proxify(url):
     if not settings.get('result_proxy'):
         return url
 
-    url_params = dict(mortyurl=url.encode('utf-8'))
+    url_params = dict(mortyurl=url.encode())
 
     if settings['result_proxy'].get('key'):
         url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'],
-                                           url.encode('utf-8'),
+                                           url.encode(),
                                            hashlib.sha256).hexdigest()
 
     return '{0}?{1}'.format(settings['result_proxy']['url'],
@@ -347,10 +331,10 @@ def image_proxify(url):
     if settings.get('result_proxy'):
         return proxify(url)
 
-    h = new_hmac(settings['server']['secret_key'], url.encode('utf-8'))
+    h = new_hmac(settings['server']['secret_key'], url.encode())
 
     return '{0}?{1}'.format(url_for('image_proxy'),
-                            urlencode(dict(url=url.encode('utf-8'), h=h)))
+                            urlencode(dict(url=url.encode(), h=h)))
 
 
 def render(template_name, override_theme=None, **kwargs):
@@ -424,7 +408,7 @@ def render(template_name, override_theme=None, **kwargs):
 
     kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab')
 
-    kwargs['unicode'] = unicode
+    kwargs['unicode'] = str
 
     kwargs['preferences'] = request.preferences
 
@@ -612,7 +596,7 @@ def index():
             if 'content' in result and result['content']:
                 result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query)
             if 'title' in result and result['title']:
-                result['title'] = highlight_content(escape(result['title'] or u''), search_query.query)
+                result['title'] = highlight_content(escape(result['title'] or ''), search_query.query)
         else:
             if result.get('content'):
                 result['content'] = html_to_text(result['content']).strip()
@@ -634,14 +618,14 @@ def index():
                     minutes = int((timedifference.seconds / 60) % 60)
                     hours = int(timedifference.seconds / 60 / 60)
                     if hours == 0:
-                        result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes)
+                        result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
                     else:
-                        result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
+                        result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)  # noqa
                 else:
                     result['publishedDate'] = format_date(result['publishedDate'])
 
     if output_format == 'json':
-        return Response(json.dumps({'query': search_query.query.decode('utf-8'),
+        return Response(json.dumps({'query': search_query.query.decode(),
                                     'number_of_results': number_of_results,
                                     'results': results,
                                     'answers': list(result_container.answers),
@@ -670,7 +654,7 @@ def index():
             csv.writerow([row.get(key, '') for key in keys])
         csv.stream.seek(0)
         response = Response(csv.stream.read(), mimetype='application/csv')
-        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode('utf-8'))
+        cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query.decode())
         response.headers.add('Content-Disposition', cont_disp)
         return response
 
@@ -754,10 +738,7 @@ def autocompleter():
     disabled_engines = request.preferences.engines.get_disabled()
 
     # parse query
-    if PY3:
-        raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines)
-    else:
-        raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines)
+    raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines)
     raw_text_query.parse_query()
 
     # check if search query is set
@@ -879,7 +860,7 @@ def _is_selected_language_supported(engine, preferences):
 
 @app.route('/image_proxy', methods=['GET'])
 def image_proxy():
-    url = request.args.get('url').encode('utf-8')
+    url = request.args.get('url').encode()
 
     if not url:
         return '', 400

Some files were not shown because too many files changed in this diff