| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 | """ Stackoverflow (It) @website     https://stackoverflow.com/ @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) @using-api   no @results     HTML @stable      no (HTML can change) @parse       url, title, content"""from urlparse import urljoinfrom urllib import urlencodefrom lxml import htmlfrom searx.engines.xpath import extract_text# engine dependent configcategories = ['it']paging = True# search-urlurl = 'https://stackoverflow.com/'search_url = url + 'search?{query}&page={pageno}'# specific xpath variablesresults_xpath = '//div[contains(@class,"question-summary")]'link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'content_xpath = './/div[@class="excerpt"]'# do search-requestdef request(query, params):    params['url'] = search_url.format(query=urlencode({'q': query}),                                      pageno=params['pageno'])    return params# get response from search-requestdef response(resp):    results = []    dom = html.fromstring(resp.text)    # parse results    for result in dom.xpath(results_xpath):        link = result.xpath(link_xpath)[0]        href = urljoin(url, link.attrib.get('href'))        title = extract_text(link)        content = extract_text(result.xpath(content_xpath))        # append result        results.append({'url': href,                        'title': title,                        'content': content})    # return results    return results
 |