Browse Source

[fix] baidu engine: properly decoding HTML escape codes

Aadniz 1 month ago
parent
commit
02f5002a5f
1 changed files with 7 additions and 2 deletions
  1. 7 2
      searx/engines/baidu.py

+ 7 - 2
searx/engines/baidu.py

@@ -9,6 +9,7 @@
 
 from urllib.parse import urlencode
 from datetime import datetime
+from html import unescape
 import time
 import json
 
@@ -119,11 +120,15 @@ def parse_general(data):
             except (ValueError, TypeError):
                 published_date = None
 
+        # title and content sometimes containing characters such as & ' " etc...
+        title = unescape(entry["title"])
+        content = unescape(entry.get("abs", ""))
+
         results.append(
             {
-                "title": entry["title"],
+                "title": title,
                 "url": entry["url"],
-                "content": entry.get("abs", ""),
+                "content": content,
                 "publishedDate": published_date,
             }
         )