| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960 | ## General Files (Files)## @website     http://www.general-files.org# @provide-api no (nothing found)## @using-api   no (because nothing found)# @results     HTML (using search portal)# @stable      no (HTML can change)# @parse       url, title, content## @todo        detect torrents?from lxml import html# engine dependent configcategories = ['files']paging = True# search-urlbase_url = 'http://www.general-file.com'search_url = base_url + '/files-{letter}/{query}/{pageno}'# specific xpath variablesresult_xpath = '//table[@class="block-file"]'title_xpath = './/h2/a//text()'url_xpath = './/h2/a/@href'content_xpath = './/p//text()'# do search-requestdef request(query, params):    params['url'] = search_url.format(query=query,                                      letter=query[0],                                      pageno=params['pageno'])    return params# get response from search-requestdef response(resp):    results = []    dom = html.fromstring(resp.text)    # parse results    for result in dom.xpath(result_xpath):        url = result.xpath(url_xpath)[0]        # skip fast download links        if not url.startswith('/'):            continue        # append result        results.append({'url': base_url + url,                        'title': ''.join(result.xpath(title_xpath)),                        'content': ''.join(result.xpath(content_xpath))})    # return results    return results
 |