|
@@ -131,7 +131,6 @@ from lxml import html
|
|
from searx import locales
|
|
from searx import locales
|
|
from searx.utils import (
|
|
from searx.utils import (
|
|
extract_text,
|
|
extract_text,
|
|
- extr,
|
|
|
|
eval_xpath,
|
|
eval_xpath,
|
|
eval_xpath_list,
|
|
eval_xpath_list,
|
|
eval_xpath_getindex,
|
|
eval_xpath_getindex,
|
|
@@ -249,6 +248,33 @@ def _extract_published_date(published_date_raw):
|
|
return None
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
+def parse_data_string(resp):
|
|
|
|
+ # kit.start(app, element, {
|
|
|
|
+ # node_ids: [0, 19],
|
|
|
|
+ # data: [{"type":"data","data" .... ["q","goggles_id"],"route":1,"url":1}}]
|
|
|
|
+ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
|
|
+ kit_start = resp.text.index("kit.start(app,")
|
|
|
|
+ start = resp.text[kit_start:].index('data: [{"type":"data"')
|
|
|
|
+ start = kit_start + start + len('data: ')
|
|
|
|
+
|
|
|
|
+ lev = 0
|
|
|
|
+ end = start
|
|
|
|
+ inner = False
|
|
|
|
+ for c in resp.text[start:]:
|
|
|
|
+ if inner and lev == 0:
|
|
|
|
+ break
|
|
|
|
+ end += 1
|
|
|
|
+ if c == "[":
|
|
|
|
+ lev += 1
|
|
|
|
+ inner = True
|
|
|
|
+ continue
|
|
|
|
+ if c == "]":
|
|
|
|
+ lev -= 1
|
|
|
|
+
|
|
|
|
+ json_data = js_variable_to_python(resp.text[start:end])
|
|
|
|
+ return json_data
|
|
|
|
+
|
|
|
|
+
|
|
def response(resp) -> EngineResults:
|
|
def response(resp) -> EngineResults:
|
|
|
|
|
|
if brave_category in ('search', 'goggles'):
|
|
if brave_category in ('search', 'goggles'):
|
|
@@ -257,9 +283,8 @@ def response(resp) -> EngineResults:
|
|
if brave_category in ('news'):
|
|
if brave_category in ('news'):
|
|
return _parse_news(resp)
|
|
return _parse_news(resp)
|
|
|
|
|
|
- datastr = extr(resp.text, "const data = ", ";\n").strip()
|
|
|
|
-
|
|
|
|
- json_data = js_variable_to_python(datastr)
|
|
|
|
|
|
+ json_data = parse_data_string(resp)
|
|
|
|
+ # json_data is a list and at the second position (0,1) in this list we find the "response" data we need ..
|
|
json_resp = json_data[1]['data']['body']['response']
|
|
json_resp = json_data[1]['data']['body']['response']
|
|
|
|
|
|
if brave_category == 'images':
|
|
if brave_category == 'images':
|