| 12345678910111213141516171819202122232425262728293031323334353637 | #!/usr/bin/env pythonfrom urllib import urlencodefrom lxml import htmlfrom urlparse import urljoincategories = ['images']url = 'https://secure.flickr.com/'search_url = url+'search/?{query}&page={page}'results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'  # noqapaging = Truedef request(query, params):    params['url'] = search_url.format(query=urlencode({'q': query}),                                      page=params['pageno'])    return paramsdef response(resp):    global base_url    results = []    dom = html.fromstring(resp.text)    for result in dom.xpath(results_xpath):        href = urljoin(url, result.attrib.get('href'))        img = result.xpath('.//img')[0]        title = img.attrib.get('alt', '')        img_src = img.attrib.get('data-defer-src')        if not img_src:            continue        results.append({'url': href,                        'title': title,                        'img_src': img_src,                        'template': 'images.html'})    return results
 |