| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 | # SPDX-License-Identifier: AGPL-3.0-or-later# lint: pylint""".. sidebar:: info   - `Recoll <https://www.lesbonscomptes.com/recoll/>`_   - `recoll-webui <https://framagit.org/medoc92/recollwebui.git>`_   - :origin:`searx/engines/recoll.py`Recoll_ is a desktop full-text search tool based on Xapian.  By itself Recoll_does not offer WEB or API access, this can be achieved using recoll-webui_Configuration=============You must configure the following settings:``base_url``:  Location where recoll-webui can be reached.``mount_prefix``:  Location where the file hierarchy is mounted on your *local* filesystem.``dl_prefix``:  Location where the file hierarchy as indexed by recoll can be reached.``search_dir``:  Part of the indexed file hierarchy to be search, if empty the full domain is  searched.Example=======Scenario:#. Recoll indexes a local filesystem mounted in ``/export/documents/reference``,#. the Recoll search interface can be reached at https://recoll.example.org/ and#. the contents of this filesystem can be reached though https://download.example.org/reference.. code:: yaml   base_url: https://recoll.example.org/   mount_prefix: /export/documents   dl_prefix: https://download.example.org   search_dir: ''Implementations==============="""from datetime import date, timedeltafrom json import loadsfrom urllib.parse import urlencode, quote# aboutabout = {    "website": None,    "wikidata_id": 'Q15735774',    "official_api_documentation": 'https://www.lesbonscomptes.com/recoll/',    "use_official_api": True,    "require_api_key": False,    "results": 'JSON',}# engine dependent configpaging = Truetime_range_support = True# parameters from settings.ymlbase_url = Nonesearch_dir = ''mount_prefix = Nonedl_prefix = None# embeddedembedded_url = '<{ttype} controls height="166px" ' + 'src="{url}" type="{mtype}"></{ttype}>'# helper functionsdef get_time_range(time_range):    sw = {'day': 1, 'week': 7, 'month': 30, 'year': 365}  # pylint: disable=invalid-name    offset = sw.get(time_range, 0)    if not offset:        return ''    return (date.today() - timedelta(days=offset)).isoformat()# do search-requestdef request(query, params):    search_after = get_time_range(params['time_range'])    search_url = base_url + 'json?{query}&highlight=0'    params['url'] = search_url.format(        query=urlencode({'query': query, 'page': params['pageno'], 'after': search_after, 'dir': search_dir})    )    return params# get response from search-requestdef response(resp):    results = []    response_json = loads(resp.text)    if not response_json:        return []    for result in response_json.get('results', []):        title = result['label']        url = result['url'].replace('file://' + mount_prefix, dl_prefix)        content = '{}'.format(result['snippet'])        # append result        item = {'url': url, 'title': title, 'content': content, 'template': 'files.html'}        if result['size']:            item['size'] = int(result['size'])        for parameter in ['filename', 'abstract', 'author', 'mtype', 'time']:            if result[parameter]:                item[parameter] = result[parameter]        # facilitate preview support for known mime types        if 'mtype' in result and '/' in result['mtype']:            (mtype, subtype) = result['mtype'].split('/')            item['mtype'] = mtype            item['subtype'] = subtype            if mtype in ['audio', 'video']:                item['embedded'] = embedded_url.format(                    ttype=mtype, url=quote(url.encode('utf8'), '/:'), mtype=result['mtype']                )            if mtype in ['image'] and subtype in ['bmp', 'gif', 'jpeg', 'png']:                item['img_src'] = url        results.append(item)    if 'nres' in response_json:        results.append({'number_of_results': response_json['nres']})    return results
 |