| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 | # SPDX-License-Identifier: AGPL-3.0-or-later"""Jisho (the Japanese-English dictionary)"""from urllib.parse import urlencode, urljoin# aboutabout = {    "website": 'https://jisho.org',    "wikidata_id": 'Q24568389',    "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api",    "use_official_api": True,    "require_api_key": False,    "results": 'JSON',    "language": 'ja',}categories = ['dictionaries']paging = FalseURL = 'https://jisho.org'BASE_URL = 'https://jisho.org/word/'SEARCH_URL = URL + '/api/v1/search/words?{query}'def request(query, params):    query = urlencode({'keyword': query})    params['url'] = SEARCH_URL.format(query=query)    logger.debug(f"query_url --> {params['url']}")    return paramsdef response(resp):    results = []    first_result = True    search_results = resp.json()    for page in search_results.get('data', []):        # Entries that are purely from Wikipedia are excluded.        parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech')        if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition':            pass        # Process alternative forms        alt_forms = []        for title_raw in page['japanese']:            if 'word' not in title_raw:                alt_forms.append(title_raw['reading'])            else:                title = title_raw['word']                if 'reading' in title_raw:                    title += ' (' + title_raw['reading'] + ')'                alt_forms.append(title)        result_url = urljoin(BASE_URL, page['slug'])        definitions = get_definitions(page)        # For results, we'll return the URL, all alternative forms (as title),        # and all definitions (as description) truncated to 300 characters.        content = " ".join(f"{engdef}." for _, engdef, _ in definitions)        results.append(            {'url': result_url, 'title': ", ".join(alt_forms), 'content': content[:300] + (content[300:] and '...')}        )        # Like Wordnik, we'll return the first result in an infobox too.        if first_result:            first_result = False            results.append(get_infobox(alt_forms, result_url, definitions))    return resultsdef get_definitions(page):    # Process definitions    definitions = []    for defn_raw in page['senses']:        extra = []        # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.        if defn_raw.get('tags'):            if defn_raw.get('info'):                # "usually written as kana: <kana>"                extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ')            else:                # abbreviation, archaism, etc.                extra.append(', '.join(defn_raw['tags']) + '. ')        elif defn_raw.get('info'):            # inconsistent            extra.append(', '.join(defn_raw['info']).capitalize() + '. ')        if defn_raw.get('restrictions'):            extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')        definitions.append(            (                ', '.join(defn_raw['parts_of_speech']),                '; '.join(defn_raw['english_definitions']),                ''.join(extra)[:-1],            )        )    return definitionsdef get_infobox(alt_forms, result_url, definitions):    infobox_content = []    # title & alt_forms    infobox_title = alt_forms[0]    if len(alt_forms) > 1:        infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')    # definitions    infobox_content.append(        '''        <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>         and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>         by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>        <ul>    '''    )    for pos, engdef, extra in definitions:        if pos == 'Wikipedia definition':            infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')        pos = f'<i>{pos}</i>: ' if pos else ''        extra = f' ({extra})' if extra else ''        infobox_content.append(f'<li>{pos}{engdef}{extra}</li>')    infobox_content.append('</ul>')    #    return {        'infobox': infobox_title,        'content': ''.join(infobox_content),        'urls': [            {                'title': 'Jisho.org',                'url': result_url,            }        ],    }
 |