| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 | ## Stackoverflow (It)## @website     https://stackoverflow.com/# @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)## @using-api   no# @results     HTML# @stable      no (HTML can change)# @parse       url, title, contentfrom urlparse import urljoinfrom cgi import escapefrom urllib import urlencodefrom lxml import html# engine dependent configcategories = ['it']paging = True# search-urlurl = 'http://stackoverflow.com/'search_url = url+'search?{query}&page={pageno}'# specific xpath variablesresults_xpath = '//div[contains(@class,"question-summary")]'link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'title_xpath = './/text()'content_xpath = './/div[@class="excerpt"]//text()'# do search-requestdef request(query, params):    params['url'] = search_url.format(query=urlencode({'q': query}),                                      pageno=params['pageno'])    return params# get response from search-requestdef response(resp):    results = []    dom = html.fromstring(resp.text)    # parse results    for result in dom.xpath(results_xpath):        link = result.xpath(link_xpath)[0]        href = urljoin(url, link.attrib.get('href'))        title = escape(' '.join(link.xpath(title_xpath)))        content = escape(' '.join(result.xpath(content_xpath)))        # append result        results.append({'url': href,                        'title': title,                        'content': content})    # return results    return results
 |