webutils.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import csv
  4. import hashlib
  5. import hmac
  6. import re
  7. from io import StringIO
  8. from codecs import getincrementalencoder
  9. from searx import logger
  10. logger = logger.getChild('webutils')
  11. class UnicodeWriter:
  12. """
  13. A CSV writer which will write rows to CSV file "f",
  14. which is encoded in the given encoding.
  15. """
  16. def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
  17. # Redirect output to a queue
  18. self.queue = StringIO()
  19. self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
  20. self.stream = f
  21. self.encoder = getincrementalencoder(encoding)()
  22. def writerow(self, row):
  23. self.writer.writerow(row)
  24. # Fetch UTF-8 output from the queue ...
  25. data = self.queue.getvalue()
  26. data = data.strip('\x00')
  27. # ... and reencode it into the target encoding
  28. data = self.encoder.encode(data)
  29. # write to the target stream
  30. self.stream.write(data.decode())
  31. # empty queue
  32. self.queue.truncate(0)
  33. def writerows(self, rows):
  34. for row in rows:
  35. self.writerow(row)
  36. def get_resources_directory(searx_directory, subdirectory, resources_directory):
  37. if not resources_directory:
  38. resources_directory = os.path.join(searx_directory, subdirectory)
  39. if not os.path.isdir(resources_directory):
  40. raise Exception(resources_directory + " is not a directory")
  41. return resources_directory
  42. def get_themes(templates_path):
  43. """Returns available themes list."""
  44. themes = os.listdir(templates_path)
  45. if '__common__' in themes:
  46. themes.remove('__common__')
  47. return themes
  48. def get_static_files(static_path):
  49. static_files = set()
  50. static_path_length = len(static_path) + 1
  51. for directory, _, files in os.walk(static_path):
  52. for filename in files:
  53. f = os.path.join(directory[static_path_length:], filename)
  54. static_files.add(f)
  55. return static_files
  56. def get_result_templates(templates_path):
  57. result_templates = set()
  58. templates_path_length = len(templates_path) + 1
  59. for directory, _, files in os.walk(templates_path):
  60. if directory.endswith('result_templates'):
  61. for filename in files:
  62. f = os.path.join(directory[templates_path_length:], filename)
  63. result_templates.add(f)
  64. return result_templates
  65. def new_hmac(secret_key, url):
  66. try:
  67. secret_key_bytes = bytes(secret_key, 'utf-8')
  68. except TypeError as err:
  69. if isinstance(secret_key, bytes):
  70. secret_key_bytes = secret_key
  71. else:
  72. raise err
  73. return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
  74. def prettify_url(url, max_length=74):
  75. if len(url) > max_length:
  76. chunk_len = int(max_length / 2 + 1)
  77. return '{0}[...]{1}'.format(url[:chunk_len], url[-chunk_len:])
  78. else:
  79. return url
  80. def highlight_content(content, query):
  81. if not content:
  82. return None
  83. # ignoring html contents
  84. # TODO better html content detection
  85. if content.find('<') != -1:
  86. return content
  87. if content.lower().find(query.lower()) > -1:
  88. query_regex = '({0})'.format(re.escape(query))
  89. content = re.sub(query_regex, '<span class="highlight">\\1</span>',
  90. content, flags=re.I | re.U)
  91. else:
  92. regex_parts = []
  93. for chunk in query.split():
  94. if len(chunk) == 1:
  95. regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk)))
  96. else:
  97. regex_parts.append('{0}'.format(re.escape(chunk)))
  98. query_regex = '({0})'.format('|'.join(regex_parts))
  99. content = re.sub(query_regex, '<span class="highlight">\\1</span>',
  100. content, flags=re.I | re.U)
  101. return content