Browse Source

[fix] engine qwant (web-lite) - ignore advertising adds

Closes: https://github.com/searxng/searxng/issues/2812
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
Markus Heiser 1 year ago
parent
commit
043dcbf7c5
1 changed files with 4 additions and 1 deletions
  1. 4 1
      searx/engines/qwant.py

+ 4 - 1
searx/engines/qwant.py

@@ -159,9 +159,12 @@ def parse_web_lite(resp):
     dom = lxml.html.fromstring(resp.text)
 
     for item in eval_xpath_list(dom, '//section/article'):
+        if eval_xpath(item, "./span[contains(@class, 'tooltip')]"):
+            # ignore randomly interspersed advertising adds
+            continue
         results.append(
             {
-                'url': extract_text(eval_xpath(item, './span')),
+                'url': extract_text(eval_xpath(item, "./span[contains(@class, 'url partner')]")),
                 'title': extract_text(eval_xpath(item, './h2/a')),
                 'content': extract_text(eval_xpath(item, './p')),
             }