5 years ago · 52eba0c721
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -99,9 +99,9 @@ time_range_dict = {
 
				 
			
 
				 # Filter results. 0: None, 1: Moderate, 2: Strict
			
 
				 filter_mapping = {
			
 
				-    0 : 'off',
			
 
				-    1 : 'medium',
			
 
				-    2 : 'high'
			
 
				+    0: 'off',
			
 
				+    1: 'medium',
			
 
				+    2: 'high'
			
 
				 }
			
 
				 
			
 
				 # specific xpath variables
			
@@ -111,7 +111,7 @@ filter_mapping = {
 
				 results_xpath = '//div[@class="g"]'
			
 
				 
			
 
				 # google *sections* are no usual *results*, we ignore them
			
 
				-g_section_with_header='./g-section-with-header'
			
 
				+g_section_with_header = './g-section-with-header'
			
 
				 
			
 
				 # the title is a h3 tag relative to the result group
			
 
				 title_xpath = './/h3[1]'
			
@@ -131,6 +131,7 @@ suggestion_xpath = '//div[contains(@class, "card-section")]//a'
 
				 # *spelling suggestions*, we use them anyway.
			
 
				 spelling_suggestion_xpath = '//div[@class="med"]/p/a'
			
 
				 
			
 
				+
			
 
				 def extract_text_from_dom(result, xpath):
			
 
				     """returns extract_text on the first result selected by the xpath or None"""
			
 
				     r = eval_xpath(result, xpath)
			
@@ -138,6 +139,7 @@ def extract_text_from_dom(result, xpath):
 
				         return extract_text(r[0])
			
 
				     return None
			
 
				 
			
 
				+
			
 
				 def get_lang_country(params, lang_list, custom_aliases):
			
 
				     """Returns a tuple with *langauage* on its first and *country* on its second
			
 
				     position."""
			
@@ -159,6 +161,7 @@ def get_lang_country(params, lang_list, custom_aliases):
 
				 
			
 
				     return language, country, lang_country
			
 
				 
			
 
				+
			
 
				 def request(query, params):
			
 
				     """Google search request"""
			
 
				 
			
@@ -170,7 +173,7 @@ def request(query, params):
 
				     subdomain = 'www.' + google_domains.get(country.upper(), 'google.com')
			
 
				 
			
 
				     # https://www.google.de/search?q=corona&hl=de-DE&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium
			
 
				-    query_url = 'https://'+ subdomain + '/search' + "?" + urlencode({
			
 
				+    query_url = 'https://' + subdomain + '/search' + "?" + urlencode({
			
 
				         'q': query,
			
 
				         'hl': lang_country,
			
 
				         'lr': "lang_" + language,
			
@@ -190,16 +193,17 @@ def request(query, params):
 
				     # en-US,en;q=0.8,en;q=0.5
			
 
				     params['headers']['Accept-Language'] = (
			
 
				         lang_country + ',' + language + ';q=0.8,' + language + ';q=0.5'
			
 
				-        )
			
 
				+    )
			
 
				     logger.debug("HTTP header Accept-Language --> %s",
			
 
				                  params['headers']['Accept-Language'])
			
 
				     params['headers']['Accept'] = (
			
 
				         'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
			
 
				-        )
			
 
				-    #params['google_subdomain'] = subdomain
			
 
				+    )
			
 
				+    # params['google_subdomain'] = subdomain
			
 
				 
			
 
				     return params
			
 
				 
			
 
				+
			
 
				 def response(resp):
			
 
				     """Get response from google's search request"""
			
 
				     results = []
			
@@ -249,16 +253,16 @@ def response(resp):
 
				             url = eval_xpath(result, href_xpath)[0]
			
 
				             content = extract_text_from_dom(result, content_xpath)
			
 
				             results.append({
			
 
				-                'url':      url,
			
 
				-                'title':    title,
			
 
				-                'content':  content
			
 
				-                })
			
 
				+                'url': url,
			
 
				+                'title': title,
			
 
				+                'content': content
			
 
				+            })
			
 
				         except Exception as e:  # pylint: disable=broad-except
			
 
				             logger.error(e, exc_info=True)
			
 
				-            #from lxml import etree
			
 
				-            #logger.debug(etree.tostring(result, pretty_print=True))
			
 
				-            #import pdb
			
 
				-            #pdb.set_trace()
			
 
				+            # from lxml import etree
			
 
				+            # logger.debug(etree.tostring(result, pretty_print=True))
			
 
				+            # import pdb
			
 
				+            # pdb.set_trace()
			
 
				             continue
			
 
				 
			
 
				     # parse suggestion
			
@@ -272,6 +276,7 @@ def response(resp):
 
				     # return results
			
 
				     return results
			
 
				 
			
 
				+
			
 
				 # get supported languages from their site
			
 
				 def _fetch_supported_languages(resp):
			
 
				     ret_val = {}
			
--- a/searx/engines/google_images.py
+++ b/searx/engines/google_images.py
@@ -33,15 +33,15 @@ from searx.engines.xpath import extract_text
 
				 
			
 
				 # pylint: disable=unused-import
			
 
				 from searx.engines.google import (
			
 
				-    supported_languages_url
			
 
				-    ,  _fetch_supported_languages
			
 
				+    supported_languages_url,
			
 
				+    _fetch_supported_languages,
			
 
				 )
			
 
				 # pylint: enable=unused-import
			
 
				 
			
 
				 from searx.engines.google import (
			
 
				-    get_lang_country
			
 
				-    , google_domains
			
 
				-    , time_range_dict
			
 
				+    get_lang_country,
			
 
				+    google_domains,
			
 
				+    time_range_dict,
			
 
				 )
			
 
				 
			
 
				 logger = logger.getChild('google images')
			
@@ -56,11 +56,12 @@ time_range_support = True
 
				 safesearch = True
			
 
				 
			
 
				 filter_mapping = {
			
 
				-    0 : 'images',
			
 
				-    1 : 'active',
			
 
				-    2 : 'active'
			
 
				+    0: 'images',
			
 
				+    1: 'active',
			
 
				+    2: 'active'
			
 
				 }
			
 
				 
			
 
				+
			
 
				 def scrap_out_thumbs(dom):
			
 
				     """Scrap out thumbnail data from <script> tags.
			
 
				     """
			
@@ -68,13 +69,14 @@ def scrap_out_thumbs(dom):
 
				     for script in eval_xpath(dom, '//script[contains(., "_setImgSrc(")]'):
			
 
				         _script = script.text
			
 
				         # _setImgSrc('0','data:image\/jpeg;base64,\/9j\/4AAQSkZJR ....');
			
 
				-        _thumb_no, _img_data = _script[len("_setImgSrc("):-2].split(",",1)
			
 
				-        _thumb_no = _thumb_no.replace("'","")
			
 
				-        _img_data = _img_data.replace("'","")
			
 
				+        _thumb_no, _img_data = _script[len("_setImgSrc("):-2].split(",", 1)
			
 
				+        _thumb_no = _thumb_no.replace("'", "")
			
 
				+        _img_data = _img_data.replace("'", "")
			
 
				         _img_data = _img_data.replace(r"\/", r"/")
			
 
				         ret_val[_thumb_no] = _img_data.replace(r"\x3d", "=")
			
 
				     return ret_val
			
 
				 
			
 
				+
			
 
				 def request(query, params):
			
 
				     """Google-Video search request"""
			
 
				 
			
@@ -84,10 +86,10 @@ def request(query, params):
 
				     )
			
 
				     subdomain = 'www.' + google_domains.get(country.upper(), 'google.com')
			
 
				 
			
 
				-    query_url = 'https://'+ subdomain + '/search' + "?" + urlencode({
			
 
				-        'q':   query,
			
 
				+    query_url = 'https://' + subdomain + '/search' + "?" + urlencode({
			
 
				+        'q': query,
			
 
				         'tbm': "isch",
			
 
				-        'hl':  lang_country,
			
 
				+        'hl': lang_country,
			
 
				         'lr': "lang_" + language,
			
 
				         'ie': "utf8",
			
 
				         'oe': "utf8",
			
@@ -108,8 +110,8 @@ def request(query, params):
 
				         "HTTP Accept-Language --> %s", params['headers']['Accept-Language'])
			
 
				     params['headers']['Accept'] = (
			
 
				         'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
			
 
				-        )
			
 
				-    #params['google_subdomain'] = subdomain
			
 
				+    )
			
 
				+    # params['google_subdomain'] = subdomain
			
 
				     return params
			
 
				 
			
 
				 
			
@@ -196,10 +198,10 @@ def response(resp):
 
				             })
			
 
				         except Exception as e:  # pylint: disable=broad-except
			
 
				             logger.error(e, exc_info=True)
			
 
				-            #from lxml import etree
			
 
				-            #logger.debug(etree.tostring(img_node, pretty_print=True))
			
 
				-            #import pdb
			
 
				-            #pdb.set_trace()
			
 
				+            # from lxml import etree
			
 
				+            # logger.debug(etree.tostring(img_node, pretty_print=True))
			
 
				+            # import pdb
			
 
				+            # pdb.set_trace()
			
 
				             continue
			
 
				 
			
 
				     return results