|
@@ -129,6 +129,7 @@ from lxml import html
|
|
|
|
|
|
from searx import locales
|
|
|
from searx.utils import (
|
|
|
+ extr,
|
|
|
extract_text,
|
|
|
eval_xpath,
|
|
|
eval_xpath_list,
|
|
@@ -253,33 +254,6 @@ def _extract_published_date(published_date_raw):
|
|
|
return None
|
|
|
|
|
|
|
|
|
-def parse_data_string(resp):
|
|
|
- # kit.start(app, element, {
|
|
|
- # node_ids: [0, 19],
|
|
|
- # data: [{"type":"data","data" .... ["q","goggles_id"],"route":1,"url":1}}]
|
|
|
- # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
|
- kit_start = resp.text.index("kit.start(app,")
|
|
|
- start = resp.text[kit_start:].index('data: [{"type":"data"')
|
|
|
- start = kit_start + start + len('data: ')
|
|
|
-
|
|
|
- lev = 0
|
|
|
- end = start
|
|
|
- inner = False
|
|
|
- for c in resp.text[start:]:
|
|
|
- if inner and lev == 0:
|
|
|
- break
|
|
|
- end += 1
|
|
|
- if c == "[":
|
|
|
- lev += 1
|
|
|
- inner = True
|
|
|
- continue
|
|
|
- if c == "]":
|
|
|
- lev -= 1
|
|
|
-
|
|
|
- json_data = js_variable_to_python(resp.text[start:end])
|
|
|
- return json_data
|
|
|
-
|
|
|
-
|
|
|
def response(resp) -> EngineResults:
|
|
|
|
|
|
if brave_category in ('search', 'goggles'):
|
|
@@ -288,7 +262,15 @@ def response(resp) -> EngineResults:
|
|
|
if brave_category in ('news'):
|
|
|
return _parse_news(resp)
|
|
|
|
|
|
- json_data = parse_data_string(resp)
|
|
|
+ # Example script source containing the data:
|
|
|
+ #
|
|
|
+ # kit.start(app, element, {
|
|
|
+ # node_ids: [0, 19],
|
|
|
+ # data: [{type:"data",data: .... ["q","goggles_id"],route:1,url:1}}]
|
|
|
+ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
|
+ js_object = "[{" + extr(resp.text, "data: [{", "}}],") + "}}]"
|
|
|
+ json_data = js_variable_to_python(js_object)
|
|
|
+
|
|
|
# json_data is a list and at the second position (0,1) in this list we find the "response" data we need ..
|
|
|
json_resp = json_data[1]['data']['body']['response']
|
|
|
|