|  | @@ -0,0 +1,66 @@
 | 
	
		
			
				|  |  | +## Digg (News, Social media)
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +# @website     https://digg.com/
 | 
	
		
			
				|  |  | +# @provide-api no
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +# @using-api   no
 | 
	
		
			
				|  |  | +# @results     HTML (using search portal)
 | 
	
		
			
				|  |  | +# @stable      no (HTML can change)
 | 
	
		
			
				|  |  | +# @parse       url, title, content, publishedDate, thumbnail
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +from urllib import quote_plus
 | 
	
		
			
				|  |  | +from json import loads
 | 
	
		
			
				|  |  | +from lxml import html
 | 
	
		
			
				|  |  | +from cgi import escape
 | 
	
		
			
				|  |  | +from dateutil import parser
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# engine dependent config
 | 
	
		
			
				|  |  | +categories = ['news', 'social media']
 | 
	
		
			
				|  |  | +paging = True
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# search-url
 | 
	
		
			
				|  |  | +base_url = 'https://digg.com/'
 | 
	
		
			
				|  |  | +search_url = base_url+'api/search/{query}.json?position={position}&format=html'
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# specific xpath variables
 | 
	
		
			
				|  |  | +results_xpath = '//article'
 | 
	
		
			
				|  |  | +link_xpath = './/small[@class="time"]//a'
 | 
	
		
			
				|  |  | +title_xpath = './/h2//a//text()'
 | 
	
		
			
				|  |  | +content_xpath = './/p//text()'
 | 
	
		
			
				|  |  | +pubdate_xpath = './/time'
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# do search-request
 | 
	
		
			
				|  |  | +def request(query, params):
 | 
	
		
			
				|  |  | +    offset = (params['pageno'] - 1) * 10
 | 
	
		
			
				|  |  | +    params['url'] = search_url.format(position=offset,
 | 
	
		
			
				|  |  | +                                      query=quote_plus(query))
 | 
	
		
			
				|  |  | +    return params
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# get response from search-request
 | 
	
		
			
				|  |  | +def response(resp):
 | 
	
		
			
				|  |  | +    results = []
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    search_result = loads(resp.text)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    dom = html.fromstring(search_result['html'])
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # parse results
 | 
	
		
			
				|  |  | +    for result in dom.xpath(results_xpath):
 | 
	
		
			
				|  |  | +        url = result.attrib.get('data-contenturl')
 | 
	
		
			
				|  |  | +        thumbnail = result.xpath('.//img')[0].attrib.get('src')
 | 
	
		
			
				|  |  | +        title = ''.join(result.xpath(title_xpath))
 | 
	
		
			
				|  |  | +        content = escape(''.join(result.xpath(content_xpath)))
 | 
	
		
			
				|  |  | +        publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime'))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        # append result
 | 
	
		
			
				|  |  | +        results.append({'url': url,
 | 
	
		
			
				|  |  | +                        'title': title,
 | 
	
		
			
				|  |  | +                        'content': content,
 | 
	
		
			
				|  |  | +                        'template': 'videos.html',
 | 
	
		
			
				|  |  | +                        'publishedDate': publishedDate,
 | 
	
		
			
				|  |  | +                        'thumbnail': thumbnail})
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # return results
 | 
	
		
			
				|  |  | +    return results
 |