| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 | """ DuckDuckGo (Web) @website     https://duckduckgo.com/ @provide-api yes (https://duckduckgo.com/api),              but not all results from search-site @using-api   no @results     HTML (using search portal) @stable      no (HTML can change) @parse       url, title, content @todo        rewrite to api @todo        language support              (the current used site does not support language-change)"""from urllib import urlencodefrom lxml.html import fromstringfrom searx.engines.xpath import extract_text# engine dependent configcategories = ['general']paging = Truelanguage_support = True# search-urlurl = 'https://duckduckgo.com/html?{query}&s={offset}'# specific xpath variablesresult_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqaurl_xpath = './/a[@class="large"]/@href'title_xpath = './/a[@class="large"]'content_xpath = './/div[@class="snippet"]'# do search-requestdef request(query, params):    offset = (params['pageno'] - 1) * 30    if params['language'] == 'all':        locale = 'en-us'    else:        locale = params['language'].replace('_', '-').lower()    params['url'] = url.format(        query=urlencode({'q': query, 'kl': locale}),        offset=offset)    return params# get response from search-requestdef response(resp):    results = []    doc = fromstring(resp.text)    # parse results    for r in doc.xpath(result_xpath):        try:            res_url = r.xpath(url_xpath)[-1]        except:            continue        if not res_url:            continue        title = extract_text(r.xpath(title_xpath))        content = extract_text(r.xpath(content_xpath))        # append result        results.append({'title': title,                        'content': content,                        'url': res_url})    # return results    return results
 |