Browse Source

Merge pull request #781 from return42/fix-google

[fix] google engine: remove adds and fix mobile_ui selector
Markus Heiser 3 years ago
parent
commit
db6f617c0f
1 changed files with 15 additions and 7 deletions
  1. 15 7
      searx/engines/google.py

+ 15 - 7
searx/engines/google.py

@@ -112,7 +112,8 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'}
 # ------------------------
 # ------------------------
 
 
 # google results are grouped into <div class="g ..." ../>
 # google results are grouped into <div class="g ..." ../>
-results_xpath = '//div[contains(@class, "g")]'
+results_xpath = '//div[@id="search"]//div[contains(@class, "g ")]'
+results_xpath_mobile_ui = '//div[contains(@class, "g ")]'
 
 
 # google *sections* are no usual *results*, we ignore them
 # google *sections* are no usual *results*, we ignore them
 g_section_with_header = './g-section-with-header'
 g_section_with_header = './g-section-with-header'
@@ -336,7 +337,12 @@ def response(resp):
                 logger.error(e, exc_info=True)
                 logger.error(e, exc_info=True)
 
 
     # parse results
     # parse results
-    for result in eval_xpath_list(dom, results_xpath):
+
+    _results_xpath = results_xpath
+    if use_mobile_ui:
+        _results_xpath = results_xpath_mobile_ui
+
+    for result in eval_xpath_list(dom, _results_xpath):
 
 
         # google *sections*
         # google *sections*
         if extract_text(eval_xpath(result, g_section_with_header)):
         if extract_text(eval_xpath(result, g_section_with_header)):
@@ -347,20 +353,22 @@ def response(resp):
             title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
             title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
             if title_tag is None:
             if title_tag is None:
                 # this not one of the common google results *section*
                 # this not one of the common google results *section*
-                logger.debug('ingoring <div class="g" ../> section: missing title')
+                logger.debug('ingoring item from the result_xpath list: missing title')
                 continue
                 continue
             title = extract_text(title_tag)
             title = extract_text(title_tag)
             url = eval_xpath_getindex(result, href_xpath, 0, None)
             url = eval_xpath_getindex(result, href_xpath, 0, None)
             if url is None:
             if url is None:
                 continue
                 continue
             content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
             content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
+            if content is None:
+                logger.debug('ingoring item from the result_xpath list: missing content of title "%s"', title)
+                continue
+
+            logger.debug('add link to results: %s', title)
             results.append({'url': url, 'title': title, 'content': content})
             results.append({'url': url, 'title': title, 'content': content})
+
         except Exception as e:  # pylint: disable=broad-except
         except Exception as e:  # pylint: disable=broad-except
             logger.error(e, exc_info=True)
             logger.error(e, exc_info=True)
-            # from lxml import etree
-            # logger.debug(etree.tostring(result, pretty_print=True))
-            # import pdb
-            # pdb.set_trace()
             continue
             continue
 
 
     # parse suggestion
     # parse suggestion