jisho.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Jisho (the Japanese-English dictionary)
  4. """
  5. import json
  6. from urllib.parse import urlencode, urljoin
  7. # about
  8. about = {
  9. "website": 'https://jisho.org',
  10. "wikidata_id": 'Q24568389',
  11. "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api",
  12. "use_official_api": True,
  13. "require_api_key": False,
  14. "results": 'JSON',
  15. "language": 'ja',
  16. }
  17. categories = ['dictionaries']
  18. engine_type = 'online_dictionary'
  19. paging = False
  20. URL = 'https://jisho.org'
  21. BASE_URL = 'https://jisho.org/word/'
  22. SEARCH_URL = URL + '/api/v1/search/words?{query}'
  23. def request(query, params):
  24. query = urlencode({'keyword': query})
  25. params['url'] = SEARCH_URL.format(query=query)
  26. logger.debug(f"query_url --> {params['url']}")
  27. return params
  28. def response(resp):
  29. results = []
  30. infoboxed = False
  31. search_results = resp.json()
  32. pages = search_results.get('data', [])
  33. for page in pages:
  34. # Entries that are purely from Wikipedia are excluded.
  35. if page['senses'][0]['parts_of_speech'] != [] and page['senses'][0]['parts_of_speech'][0] == 'Wikipedia definition':
  36. pass
  37. # Process alternative forms
  38. japanese = page['japanese']
  39. alt_forms = []
  40. for title_raw in japanese:
  41. if 'word' not in title_raw:
  42. alt_forms.append(title_raw['reading'])
  43. else:
  44. title = title_raw['word']
  45. if 'reading' in title_raw:
  46. title += ' (' + title_raw['reading'] + ')'
  47. alt_forms.append(title)
  48. # Process definitions
  49. definitions = []
  50. def_raw = page['senses']
  51. for defn_raw in def_raw:
  52. extra = ''
  53. if not infoboxed:
  54. # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
  55. if defn_raw['tags'] != []:
  56. if defn_raw['info'] != []:
  57. extra += defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ' # "usually written as kana: <kana>"
  58. else:
  59. extra += ', '.join(defn_raw['tags']) + '. ' # abbreviation, archaism, etc.
  60. elif defn_raw['info'] != []:
  61. extra += ', '.join(defn_raw['info']).capitalize() + '. ' # inconsistent
  62. if defn_raw['restrictions'] != []:
  63. extra += 'Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. '
  64. extra = extra[:-1]
  65. definitions.append((
  66. ', '.join(defn_raw['parts_of_speech']),
  67. '; '.join(defn_raw['english_definitions']),
  68. extra
  69. ))
  70. content = ''
  71. infobox_content = '''
  72. <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
  73. and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
  74. by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small><ul>
  75. '''
  76. for pos, engdef, extra in definitions:
  77. if pos == 'Wikipedia definition':
  78. infobox_content += '</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>'
  79. if pos == '':
  80. infobox_content += f"<li>{engdef}"
  81. else:
  82. infobox_content += f"<li><i>{pos}</i>: {engdef}"
  83. if extra != '':
  84. infobox_content += f" ({extra})"
  85. infobox_content += '</li>'
  86. content += f"{engdef}. "
  87. infobox_content += '</ul>'
  88. # For results, we'll return the URL, all alternative forms (as title),
  89. # and all definitions (as description) truncated to 300 characters.
  90. results.append({
  91. 'url': urljoin(BASE_URL, page['slug']),
  92. 'title': ", ".join(alt_forms),
  93. 'content': content[:300] + (content[300:] and '...')
  94. })
  95. # Like Wordnik, we'll return the first result in an infobox too.
  96. if not infoboxed:
  97. infoboxed = True
  98. infobox_urls = []
  99. infobox_urls.append({
  100. 'title': 'Jisho.org',
  101. 'url': urljoin(BASE_URL, page['slug'])
  102. })
  103. infobox = {
  104. 'infobox': alt_forms[0],
  105. 'urls': infobox_urls
  106. }
  107. alt_forms.pop(0)
  108. alt_content = ''
  109. if len(alt_forms) > 0:
  110. alt_content = '<p><i>Other forms:</i> '
  111. alt_content += ", ".join(alt_forms)
  112. alt_content += '</p>'
  113. infobox['content'] = alt_content + infobox_content
  114. results.append(infobox)
  115. return results