Browse Source

Replace every bunch of whitespaces with only one space in HTML text

Cqoicebordel 10 years ago
parent
commit
52a57ee045
1 changed files with 2 additions and 0 deletions
  1. 2 0
      searx/utils.py

+ 2 - 0
searx/utils.py

@@ -119,6 +119,8 @@ class HTMLTextExtractor(HTMLParser):
 
 
 def html_to_text(html):
+    html = html.replace('\n', ' ')
+    html = ' '.join(html.split())
     s = HTMLTextExtractor()
     s.feed(html)
     return s.get_text()