|
@@ -99,9 +99,9 @@ time_range_dict = {
|
|
|
|
|
|
|
|
|
filter_mapping = {
|
|
|
- 0 : 'off',
|
|
|
- 1 : 'medium',
|
|
|
- 2 : 'high'
|
|
|
+ 0: 'off',
|
|
|
+ 1: 'medium',
|
|
|
+ 2: 'high'
|
|
|
}
|
|
|
|
|
|
|
|
@@ -111,7 +111,7 @@ filter_mapping = {
|
|
|
results_xpath = '//div[@class="g"]'
|
|
|
|
|
|
|
|
|
-g_section_with_header='./g-section-with-header'
|
|
|
+g_section_with_header = './g-section-with-header'
|
|
|
|
|
|
|
|
|
title_xpath = './/h3[1]'
|
|
@@ -131,6 +131,7 @@ suggestion_xpath = '//div[contains(@class, "card-section")]//a'
|
|
|
|
|
|
spelling_suggestion_xpath = '//div[@class="med"]/p/a'
|
|
|
|
|
|
+
|
|
|
def extract_text_from_dom(result, xpath):
|
|
|
"""returns extract_text on the first result selected by the xpath or None"""
|
|
|
r = eval_xpath(result, xpath)
|
|
@@ -138,6 +139,7 @@ def extract_text_from_dom(result, xpath):
|
|
|
return extract_text(r[0])
|
|
|
return None
|
|
|
|
|
|
+
|
|
|
def get_lang_country(params, lang_list, custom_aliases):
|
|
|
"""Returns a tuple with *langauage* on its first and *country* on its second
|
|
|
position."""
|
|
@@ -159,6 +161,7 @@ def get_lang_country(params, lang_list, custom_aliases):
|
|
|
|
|
|
return language, country, lang_country
|
|
|
|
|
|
+
|
|
|
def request(query, params):
|
|
|
"""Google search request"""
|
|
|
|
|
@@ -170,7 +173,7 @@ def request(query, params):
|
|
|
subdomain = 'www.' + google_domains.get(country.upper(), 'google.com')
|
|
|
|
|
|
|
|
|
- query_url = 'https://'+ subdomain + '/search' + "?" + urlencode({
|
|
|
+ query_url = 'https://' + subdomain + '/search' + "?" + urlencode({
|
|
|
'q': query,
|
|
|
'hl': lang_country,
|
|
|
'lr': "lang_" + language,
|
|
@@ -190,16 +193,17 @@ def request(query, params):
|
|
|
|
|
|
params['headers']['Accept-Language'] = (
|
|
|
lang_country + ',' + language + ';q=0.8,' + language + ';q=0.5'
|
|
|
- )
|
|
|
+ )
|
|
|
logger.debug("HTTP header Accept-Language --> %s",
|
|
|
params['headers']['Accept-Language'])
|
|
|
params['headers']['Accept'] = (
|
|
|
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
|
|
|
- )
|
|
|
-
|
|
|
+ )
|
|
|
+
|
|
|
|
|
|
return params
|
|
|
|
|
|
+
|
|
|
def response(resp):
|
|
|
"""Get response from google's search request"""
|
|
|
results = []
|
|
@@ -249,16 +253,16 @@ def response(resp):
|
|
|
url = eval_xpath(result, href_xpath)[0]
|
|
|
content = extract_text_from_dom(result, content_xpath)
|
|
|
results.append({
|
|
|
- 'url': url,
|
|
|
- 'title': title,
|
|
|
- 'content': content
|
|
|
- })
|
|
|
+ 'url': url,
|
|
|
+ 'title': title,
|
|
|
+ 'content': content
|
|
|
+ })
|
|
|
except Exception as e:
|
|
|
logger.error(e, exc_info=True)
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
continue
|
|
|
|
|
|
|
|
@@ -272,6 +276,7 @@ def response(resp):
|
|
|
|
|
|
return results
|
|
|
|
|
|
+
|
|
|
|
|
|
def _fetch_supported_languages(resp):
|
|
|
ret_val = {}
|