|
@@ -66,7 +66,11 @@ def response(resp):
|
|
|
url = link.attrib.get('href')
|
|
|
|
|
|
# block google-ad url's
|
|
|
- if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url):
|
|
|
+ if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url):
|
|
|
+ continue
|
|
|
+
|
|
|
+ # block startpage search url's
|
|
|
+ if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url):
|
|
|
continue
|
|
|
|
|
|
title = escape(extract_text(link))
|