Browse Source

[fix] engines bing.images & brave.videos - fix parse data string

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 2 months ago
parent
commit
4994fbb5af
1 changed files with 29 additions and 4 deletions
  1. 29 4
      searx/engines/brave.py

+ 29 - 4
searx/engines/brave.py

@@ -131,7 +131,6 @@ from lxml import html
 from searx import locales
 from searx import locales
 from searx.utils import (
 from searx.utils import (
     extract_text,
     extract_text,
-    extr,
     eval_xpath,
     eval_xpath,
     eval_xpath_list,
     eval_xpath_list,
     eval_xpath_getindex,
     eval_xpath_getindex,
@@ -249,6 +248,33 @@ def _extract_published_date(published_date_raw):
         return None
         return None
 
 
 
 
+def parse_data_string(resp):
+    # kit.start(app, element, {
+    #    node_ids: [0, 19],
+    #    data: [{"type":"data","data" .... ["q","goggles_id"],"route":1,"url":1}}]
+    #          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    kit_start = resp.text.index("kit.start(app,")
+    start = resp.text[kit_start:].index('data: [{"type":"data"')
+    start = kit_start + start + len('data: ')
+
+    lev = 0
+    end = start
+    inner = False
+    for c in resp.text[start:]:
+        if inner and lev == 0:
+            break
+        end += 1
+        if c == "[":
+            lev += 1
+            inner = True
+            continue
+        if c == "]":
+            lev -= 1
+
+    json_data = js_variable_to_python(resp.text[start:end])
+    return json_data
+
+
 def response(resp) -> EngineResults:
 def response(resp) -> EngineResults:
 
 
     if brave_category in ('search', 'goggles'):
     if brave_category in ('search', 'goggles'):
@@ -257,9 +283,8 @@ def response(resp) -> EngineResults:
     if brave_category in ('news'):
     if brave_category in ('news'):
         return _parse_news(resp)
         return _parse_news(resp)
 
 
-    datastr = extr(resp.text, "const data = ", ";\n").strip()
-
-    json_data = js_variable_to_python(datastr)
+    json_data = parse_data_string(resp)
+    # json_data is a list and at the second position (0,1) in this list we find the "response" data we need ..
     json_resp = json_data[1]['data']['body']['response']
     json_resp = json_data[1]['data']['body']['response']
 
 
     if brave_category == 'images':
     if brave_category == 'images':