Browse Source

[fix] fix invalid escape error in Baidu Images & default config typo

Zhijie He 1 month ago
parent
commit
38caa49540
2 changed files with 27 additions and 16 deletions
  1. 24 13
      searx/engines/baidu.py
  2. 3 3
      searx/settings.yml

+ 24 - 13
searx/engines/baidu.py

@@ -11,6 +11,7 @@ from urllib.parse import urlencode
 from datetime import datetime
 import time
 import json
+import re
 
 from searx.exceptions import SearxEngineAPIException
 from searx.utils import html_to_text
@@ -92,11 +93,12 @@ def request(query, params):
 
 
 def response(resp):
-    try:
-        data = json.loads(resp.text, strict=False)
-    except Exception as e:
-        raise SearxEngineAPIException(f"Invalid response: {e}") from e
 
+    text = resp.text
+    if baidu_category == 'images':
+        # baidu's JSON encoder wrongly quotes / and ' characters by \\ and \'
+        text = text.replace(r"\/", "/").replace(r"\'", "'")
+    data = json.loads(text, strict=False)
     parsers = {'general': parse_general, 'images': parse_images, 'it': parse_it}
 
     return parsers[baidu_category](data)
@@ -133,19 +135,28 @@ def parse_images(data):
     results = []
     if "data" in data:
         for item in data["data"]:
+            if not item:
+                # the last item in the JSON list is empty, the JSON string ends with "}, {}]"
+                continue
             replace_url = item.get("replaceUrl", [{}])[0]
-            from_url = replace_url.get("FromURL", "").replace("\\/", "/")
-            img_src = replace_url.get("ObjURL", "").replace("\\/", "/")
-
+            width = item.get("width")
+            height = item.get("height")
+            img_date = item.get("bdImgnewsDate")
+            publishedDate = None
+            if img_date:
+                publishedDate = datetime.strptime(img_date, "%Y-%m-%d %H:%M")
             results.append(
                 {
                     "template": "images.html",
-                    "url": from_url,
-                    "thumbnail_src": item.get("thumbURL", ""),
-                    "img_src": img_src,
-                    "content": html_to_text(item.get("fromPageTitleEnc", "")),
-                    "title": html_to_text(item.get("fromPageTitle", "")),
-                    "source": item.get("fromURLHost", ""),
+                    "url": replace_url.get("FromURL"),
+                    "thumbnail_src": item.get("thumbURL"),
+                    "img_src": replace_url.get("ObjURL"),
+                    "title": html_to_text(item.get("fromPageTitle")),
+                    "source": item.get("fromURLHost"),
+                    "resolution": f"{width} x {height}",
+                    "img_format": item.get("type"),
+                    "filesize": item.get("filesize"),
+                    "publishedDate": publishedDate,
                 }
             )
     return results

+ 3 - 3
searx/settings.yml

@@ -502,21 +502,21 @@ engines:
     categories: music
 
   - name: baidu
-    baidu_categories: general
+    baidu_category: general
     categories: [general]
     engine: baidu
     shortcut: bd
     disabled: true
 
   - name: baidu images
-    baidu_categories: images
+    baidu_category: images
     categories: [images]
     engine: baidu
     shortcut: bdi
     disabled: true
 
   - name: baidu kaifa
-    baidu_categories: it
+    baidu_category: it
     categories: [it]
     engine: baidu
     shortcut: bdk