|  | @@ -31,13 +31,9 @@ from searx.engines.google import (
 | 
	
		
			
				|  |  |      get_lang_info,
 | 
	
		
			
				|  |  |      time_range_dict,
 | 
	
		
			
				|  |  |      filter_mapping,
 | 
	
		
			
				|  |  | -    results_xpath,
 | 
	
		
			
				|  |  |      g_section_with_header,
 | 
	
		
			
				|  |  |      title_xpath,
 | 
	
		
			
				|  |  | -    href_xpath,
 | 
	
		
			
				|  |  | -    content_xpath,
 | 
	
		
			
				|  |  |      suggestion_xpath,
 | 
	
		
			
				|  |  | -    spelling_suggestion_xpath,
 | 
	
		
			
				|  |  |      detect_google_sorry,
 | 
	
		
			
				|  |  |  )
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -74,11 +70,27 @@ def _re(regexpr):
 | 
	
		
			
				|  |  |      RE_CACHE[regexpr] = RE_CACHE.get(regexpr, re.compile(regexpr))
 | 
	
		
			
				|  |  |      return RE_CACHE[regexpr]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def scrap_out_thumbs_src(dom):
 | 
	
		
			
				|  |  | +    ret_val = {}
 | 
	
		
			
				|  |  | +    thumb_name = 'dimg_'
 | 
	
		
			
				|  |  | +    for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'):
 | 
	
		
			
				|  |  | +        _script = script.text
 | 
	
		
			
				|  |  | +        # "dimg_35":"https://i.ytimg.c....",
 | 
	
		
			
				|  |  | +        _dimurl = _re("s='([^']*)").findall( _script)
 | 
	
		
			
				|  |  | +        for k,v in _re('(' + thumb_name + '[0-9]*)":"(http[^"]*)' ).findall(_script):
 | 
	
		
			
				|  |  | +            v = v.replace(r'\u003d','=')
 | 
	
		
			
				|  |  | +            v = v.replace(r'\u0026','&')
 | 
	
		
			
				|  |  | +            ret_val[k] = v
 | 
	
		
			
				|  |  | +    logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
 | 
	
		
			
				|  |  | +    return ret_val
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  def scrap_out_thumbs(dom):
 | 
	
		
			
				|  |  |      """Scrap out thumbnail data from <script> tags.
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |      ret_val = {}
 | 
	
		
			
				|  |  | -    thumb_name = 'vidthumb'
 | 
	
		
			
				|  |  | +    thumb_name = 'dimg_'
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      for script in eval_xpath_list(dom, '//script[contains(., "_setImagesSrc")]'):
 | 
	
		
			
				|  |  |          _script = script.text
 | 
	
	
		
			
				|  | @@ -88,20 +100,11 @@ def scrap_out_thumbs(dom):
 | 
	
		
			
				|  |  |          if not _imgdata:
 | 
	
		
			
				|  |  |              continue
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        # var ii=['vidthumb4','vidthumb7']
 | 
	
		
			
				|  |  | +        # var ii=['dimg_17']
 | 
	
		
			
				|  |  |          for _vidthumb in _re(r"(%s\d+)" % thumb_name).findall(_script):
 | 
	
		
			
				|  |  |              # At least the equal sign in the URL needs to be decoded
 | 
	
		
			
				|  |  |              ret_val[_vidthumb] = _imgdata[0].replace(r"\x3d", "=")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    # {google.ldidly=-1;google.ldi={"vidthumb8":"https://...
 | 
	
		
			
				|  |  | -    for script in eval_xpath_list(dom, '//script[contains(., "google.ldi={")]'):
 | 
	
		
			
				|  |  | -        _script = script.text
 | 
	
		
			
				|  |  | -        for key_val in _re(r'"%s\d+\":\"[^\"]*"' % thumb_name).findall( _script) :
 | 
	
		
			
				|  |  | -            match = _re(r'"(%s\d+)":"(.*)"' % thumb_name).search(key_val)
 | 
	
		
			
				|  |  | -            if match:
 | 
	
		
			
				|  |  | -                # At least the equal sign in the URL needs to be decoded
 | 
	
		
			
				|  |  | -                ret_val[match.group(1)] = match.group(2).replace(r"\u003d", "=")
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |      logger.debug("found %s imgdata for: %s", thumb_name, ret_val.keys())
 | 
	
		
			
				|  |  |      return ret_val
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -145,9 +148,11 @@ def response(resp):
 | 
	
		
			
				|  |  |      # convert the text to dom
 | 
	
		
			
				|  |  |      dom = html.fromstring(resp.text)
 | 
	
		
			
				|  |  |      vidthumb_imgdata = scrap_out_thumbs(dom)
 | 
	
		
			
				|  |  | +    thumbs_src = scrap_out_thumbs_src(dom)
 | 
	
		
			
				|  |  | +    logger.debug(str(thumbs_src))
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      # parse results
 | 
	
		
			
				|  |  | -    for result in eval_xpath_list(dom, results_xpath):
 | 
	
		
			
				|  |  | +    for result in eval_xpath_list(dom, '//div[contains(@class, "g ")]'):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          # google *sections*
 | 
	
		
			
				|  |  |          if extract_text(eval_xpath(result, g_section_with_header)):
 | 
	
	
		
			
				|  | @@ -155,21 +160,24 @@ def response(resp):
 | 
	
		
			
				|  |  |              continue
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          title = extract_text(eval_xpath_getindex(result, title_xpath, 0))
 | 
	
		
			
				|  |  | -        url = eval_xpath_getindex(result, href_xpath, 0)
 | 
	
		
			
				|  |  | -        c_node = eval_xpath_getindex(result, content_xpath, 0)
 | 
	
		
			
				|  |  | +        url = eval_xpath_getindex(result, './/div[@class="dXiKIc"]//a/@href', 0)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          # <img id="vidthumb1" ...>
 | 
	
		
			
				|  |  | -        img_id = eval_xpath_getindex(c_node, './div[1]//a/g-img/img/@id', 0, default=None)
 | 
	
		
			
				|  |  | +        img_id = eval_xpath_getindex(result, './/g-img/img/@id', 0, default=None)
 | 
	
		
			
				|  |  |          if img_id is None:
 | 
	
		
			
				|  |  | +            logger.error("no img_id for: %s" % result)
 | 
	
		
			
				|  |  |              continue
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |          img_src = vidthumb_imgdata.get(img_id, None)
 | 
	
		
			
				|  |  |          if not img_src:
 | 
	
		
			
				|  |  |              logger.error("no vidthumb imgdata for: %s" % img_id)
 | 
	
		
			
				|  |  | -            img_src = eval_xpath_getindex(c_node, './div[1]//a/g-img/img/@src', 0)
 | 
	
		
			
				|  |  | +            img_src = thumbs_src.get(img_id, "")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        length = extract_text(eval_xpath(c_node, './/div[1]//a/div[3]'))
 | 
	
		
			
				|  |  | -        content = extract_text(eval_xpath(c_node, './/div[2]/span'))
 | 
	
		
			
				|  |  | -        pub_info = extract_text(eval_xpath(c_node, './/div[2]/div'))
 | 
	
		
			
				|  |  | +        length = extract_text(eval_xpath(
 | 
	
		
			
				|  |  | +            result, './/div[contains(@class, "P7xzyf")]/span/span'))
 | 
	
		
			
				|  |  | +        c_node = eval_xpath_getindex(result, './/div[@class="Uroaid"]', 0)
 | 
	
		
			
				|  |  | +        content = extract_text(c_node)
 | 
	
		
			
				|  |  | +        pub_info = extract_text(eval_xpath(result, './/div[@class="Zg1NU"]'))
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          results.append({
 | 
	
		
			
				|  |  |              'url':         url,
 | 
	
	
		
			
				|  | @@ -186,7 +194,4 @@ def response(resp):
 | 
	
		
			
				|  |  |          # append suggestion
 | 
	
		
			
				|  |  |          results.append({'suggestion': extract_text(suggestion)})
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    for correction in eval_xpath_list(dom, spelling_suggestion_xpath):
 | 
	
		
			
				|  |  | -        results.append({'correction': extract_text(correction)})
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |      return results
 |