| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 | from lxml import htmlfrom urllib import urlencodefrom urlparse import urlparse, urljoinfrom cgi import escapefrom lxml.etree import _ElementStringResultsearch_url    = Noneurl_xpath     = Nonecontent_xpath = Nonetitle_xpath   = Nonesuggestion_xpath = ''results_xpath = ''def extract_url(xpath_results):    url = ''    parsed_search_url = urlparse(search_url)    if type(xpath_results) == list:        if not len(xpath_results):            raise Exception('Empty url resultset')        if type(xpath_results[0]) == _ElementStringResult:            url = ''.join(xpath_results)            if url.startswith('//'):                url = parsed_search_url.scheme+url            elif url.startswith('/'):                url = urljoin(search_url, url)        #TODO        else:            url = xpath_results[0].attrib.get('href')    else:        url = xpath_results.attrib.get('href')    if not url.startswith('http://') or not url.startswith('https://'):        url = 'http://'+url    parsed_url = urlparse(url)    if not parsed_url.netloc:        raise Exception('Cannot parse url')    if not parsed_url.path:        url += '/'    return urldef request(query, params):    query = urlencode({'q': query})[2:]    params['url'] = search_url.format(query=query)    params['query'] = query    return paramsdef response(resp):    results = []    dom = html.fromstring(resp.text)    if results_xpath:        for result in dom.xpath(results_xpath):            url = extract_url(result.xpath(url_xpath))            title = ' '.join(result.xpath(title_xpath))            content = escape(' '.join(result.xpath(content_xpath)))            results.append({'url': url, 'title': title, 'content': content})    else:        for content, url, title in zip(dom.xpath(content_xpath), map(extract_url, dom.xpath(url_xpath)), dom.xpath(title_xpath)):            results.append({'url': url, 'title': title, 'content': content})    if not suggestion_xpath:        return results    for suggestion in dom.xpath(suggestion_xpath):        results.append({'suggestion': escape(''.join(suggestion.xpath('.//text()')))})    return results
 |