| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 | ## Digg (News, Social media)## @website     https://digg.com/# @provide-api no## @using-api   no# @results     HTML (using search portal)# @stable      no (HTML can change)# @parse       url, title, content, publishedDate, thumbnailfrom urllib import quote_plusfrom json import loadsfrom lxml import htmlfrom cgi import escapefrom dateutil import parser# engine dependent configcategories = ['news', 'social media']paging = True# search-urlbase_url = 'https://digg.com/'search_url = base_url+'api/search/{query}.json?position={position}&format=html'# specific xpath variablesresults_xpath = '//article'link_xpath = './/small[@class="time"]//a'title_xpath = './/h2//a//text()'content_xpath = './/p//text()'pubdate_xpath = './/time'# do search-requestdef request(query, params):    offset = (params['pageno'] - 1) * 10    params['url'] = search_url.format(position=offset,                                      query=quote_plus(query))    return params# get response from search-requestdef response(resp):    results = []    search_result = loads(resp.text)    if search_result['html'] == '':        return results    dom = html.fromstring(search_result['html'])    # parse results    for result in dom.xpath(results_xpath):        url = result.attrib.get('data-contenturl')        thumbnail = result.xpath('.//img')[0].attrib.get('src')        title = ''.join(result.xpath(title_xpath))        content = escape(''.join(result.xpath(content_xpath)))        pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime')        publishedDate = parser.parse(pubdate)        # append result        results.append({'url': url,                        'title': title,                        'content': content,                        'template': 'videos.html',                        'publishedDate': publishedDate,                        'thumbnail': thumbnail})    # return results    return results
 |