|
@@ -124,9 +124,6 @@ image_img_src_xpath = './img/@src'
|
|
# FIXME : no translation
|
|
# FIXME : no translation
|
|
property_address = "Address"
|
|
property_address = "Address"
|
|
property_phone = "Phone number"
|
|
property_phone = "Phone number"
|
|
-property_location = "Location"
|
|
|
|
-property_website = "Web site"
|
|
|
|
-property_gplus_website = "Google plus"
|
|
|
|
|
|
|
|
# cookies
|
|
# cookies
|
|
pref_cookie = ''
|
|
pref_cookie = ''
|
|
@@ -166,19 +163,6 @@ def parse_url(url_string, google_hostname):
|
|
return url_string
|
|
return url_string
|
|
|
|
|
|
|
|
|
|
-# URL : get label
|
|
|
|
-def url_get_label(url_string):
|
|
|
|
- # sanity check
|
|
|
|
- if url_string is None:
|
|
|
|
- return url_string
|
|
|
|
-
|
|
|
|
- # normal case
|
|
|
|
- parsed_url = urlparse(url_string)
|
|
|
|
- if parsed_url.netloc == 'plus.google.com':
|
|
|
|
- return property_gplus_website
|
|
|
|
- return property_website
|
|
|
|
-
|
|
|
|
-
|
|
|
|
# returns extract_text on the first result selected by the xpath or None
|
|
# returns extract_text on the first result selected by the xpath or None
|
|
def extract_text_from_dom(result, xpath):
|
|
def extract_text_from_dom(result, xpath):
|
|
r = result.xpath(xpath)
|
|
r = result.xpath(xpath)
|
|
@@ -281,9 +265,9 @@ def response(resp):
|
|
# append result
|
|
# append result
|
|
results.append({'url': url,
|
|
results.append({'url': url,
|
|
'title': title,
|
|
'title': title,
|
|
- 'content': content})
|
|
|
|
- except Exception, e:
|
|
|
|
- print e
|
|
|
|
|
|
+ 'content': content
|
|
|
|
+ })
|
|
|
|
+ except:
|
|
continue
|
|
continue
|
|
|
|
|
|
# parse suggestion
|
|
# parse suggestion
|