| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 | """ Gigablast (Web) @website     http://gigablast.com @provide-api yes (http://gigablast.com/api.html) @using-api   yes @results     XML @stable      yes @parse       url, title, content"""from urllib import urlencodefrom cgi import escapefrom lxml import etreefrom random import randintfrom time import time# engine dependent configcategories = ['general']paging = Truenumber_of_results = 5# search-url, invalid HTTPS certificatebase_url = 'http://gigablast.com/'search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0&uxid={uxid}&rand={rand}'# specific xpath variablesresults_xpath = '//response//result'url_xpath = './/url'title_xpath = './/title'content_xpath = './/sum'# do search-requestdef request(query, params):    offset = (params['pageno'] - 1) * number_of_results    search_path = search_string.format(        query=urlencode({'q': query}),        offset=offset,        number_of_results=number_of_results,        uxid=randint(10000, 10000000),        rand=int(time()))    params['url'] = base_url + search_path    return params# get response from search-requestdef response(resp):    results = []    dom = etree.fromstring(resp.content)    # parse results    for result in dom.xpath(results_xpath):        url = result.xpath(url_xpath)[0].text        title = result.xpath(title_xpath)[0].text        content = escape(result.xpath(content_xpath)[0].text)        # append result        results.append({'url': url,                        'title': title,                        'content': content})    # return results    return results
 |