Browse Source

[enh] engines: add about variable

move meta information from comment to the about variable
so the preferences, the documentation can show these information
Alexandre Flament 4 years ago
parent
commit
a4dcfa025c
90 changed files with 1421 additions and 725 deletions
  1. 14 0
      searx/engines/1337x.py
  2. 11 7
      searx/engines/acgsou.py
  3. 11 8
      searx/engines/ahmia.py
  4. 10 7
      searx/engines/apkmirror.py
  5. 12 8
      searx/engines/archlinux.py
  6. 10 9
      searx/engines/arxiv.py
  7. 10 11
      searx/engines/base.py
  8. 11 11
      searx/engines/bing.py
  9. 11 10
      searx/engines/bing_images.py
  10. 11 9
      searx/engines/bing_news.py
  11. 11 8
      searx/engines/bing_videos.py
  12. 14 8
      searx/engines/btdigg.py
  13. 4 15
      searx/engines/command.py
  14. 14 0
      searx/engines/currency_convert.py
  15. 11 10
      searx/engines/dailymotion.py
  16. 11 8
      searx/engines/deezer.py
  17. 11 10
      searx/engines/deviantart.py
  18. 10 7
      searx/engines/dictzone.py
  19. 10 8
      searx/engines/digbt.py
  20. 11 8
      searx/engines/digg.py
  21. 14 10
      searx/engines/doku.py
  22. 11 11
      searx/engines/duckduckgo.py
  23. 12 8
      searx/engines/duckduckgo_definitions.py
  24. 14 11
      searx/engines/duckduckgo_images.py
  25. 11 6
      searx/engines/duden.py
  26. 11 3
      searx/engines/dummy-offline.py
  27. 11 3
      searx/engines/dummy.py
  28. 14 9
      searx/engines/ebay.py
  29. 5 0
      searx/engines/elasticsearch.py
  30. 11 7
      searx/engines/etools.py
  31. 11 7
      searx/engines/fdroid.py
  32. 11 9
      searx/engines/flickr.py
  33. 12 11
      searx/engines/flickr_noapi.py
  34. 11 8
      searx/engines/framalibre.py
  35. 15 8
      searx/engines/frinkiac.py
  36. 12 9
      searx/engines/genius.py
  37. 11 9
      searx/engines/gentoo.py
  38. 10 8
      searx/engines/gigablast.py
  39. 12 9
      searx/engines/github.py
  40. 14 12
      searx/engines/google.py
  41. 10 12
      searx/engines/google_images.py
  42. 11 8
      searx/engines/google_news.py
  43. 11 8
      searx/engines/google_videos.py
  44. 13 12
      searx/engines/ina.py
  45. 14 9
      searx/engines/invidious.py
  46. 2 0
      searx/engines/json_engine.py
  47. 11 8
      searx/engines/kickass.py
  48. 12 11
      searx/engines/mediawiki.py
  49. 12 8
      searx/engines/microsoft_academic.py
  50. 11 8
      searx/engines/mixcloud.py
  51. 11 8
      searx/engines/not_evil.py
  52. 11 7
      searx/engines/nyaa.py
  53. 13 9
      searx/engines/opensemantic.py
  54. 11 8
      searx/engines/openstreetmap.py
  55. 11 8
      searx/engines/pdbe.py
  56. 11 10
      searx/engines/peertube.py
  57. 11 8
      searx/engines/photon.py
  58. 14 9
      searx/engines/piratebay.py
  59. 13 9
      searx/engines/pubmed.py
  60. 10 8
      searx/engines/qwant.py
  61. 11 6
      searx/engines/recoll.py
  62. 11 8
      searx/engines/reddit.py
  63. 11 8
      searx/engines/scanr_structures.py
  64. 11 9
      searx/engines/searchcode_code.py
  65. 10 8
      searx/engines/searx_engine.py
  66. 14 8
      searx/engines/sepiasearch.py
  67. 10 8
      searx/engines/soundcloud.py
  68. 11 8
      searx/engines/spotify.py
  69. 12 9
      searx/engines/stackoverflow.py
  70. 14 11
      searx/engines/startpage.py
  71. 11 8
      searx/engines/tokyotoshokan.py
  72. 11 9
      searx/engines/torrentz.py
  73. 11 7
      searx/engines/translated.py
  74. 11 8
      searx/engines/unsplash.py
  75. 14 13
      searx/engines/vimeo.py
  76. 11 9
      searx/engines/wikidata.py
  77. 11 8
      searx/engines/wikipedia.py
  78. 14 9
      searx/engines/wolframalpha_api.py
  79. 14 9
      searx/engines/wolframalpha_noapi.py
  80. 11 8
      searx/engines/www1x.py
  81. 2 0
      searx/engines/xpath.py
  82. 14 13
      searx/engines/yacy.py
  83. 11 9
      searx/engines/yahoo.py
  84. 14 10
      searx/engines/yahoo_news.py
  85. 11 7
      searx/engines/yandex.py
  86. 14 9
      searx/engines/yggtorrent.py
  87. 14 9
      searx/engines/youtube_api.py
  88. 14 9
      searx/engines/youtube_noapi.py
  89. 213 0
      searx/settings.yml
  90. 206 0
      utils/fetch_engine_descriptions.py

+ 14 - 0
searx/engines/1337x.py

@@ -1,7 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ 1337x
+"""
+
 from urllib.parse import quote, urljoin
 from lxml import html
 from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
 
+# about
+about = {
+    "website": 'https://1337x.to/',
+    "wikidata_id": 'Q28134166',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 url = 'https://1337x.to/'
 search_url = url + 'search/{search_term}/{pageno}/'

+ 11 - 7
searx/engines/acgsou.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Acgsou (Japanese Animation/Music/Comics Bittorrent tracker)
-
- @website      https://www.acgsou.com/
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, content, seed, leech, torrentfile
 """
 
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex
 
+# about
+about = {
+    "website": 'https://www.acgsou.com/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['files', 'images', 'videos', 'music']
 paging = True

+ 11 - 8
searx/engines/ahmia.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Ahmia (Onions)
-
- @website      http://msydqstlz2kzerdg.onion
- @provides-api no
-
- @using-api    no
- @results      HTML
- @stable       no
- @parse        url, title, content
 """
 
 from urllib.parse import urlencode, urlparse, parse_qs
 from lxml.html import fromstring
 from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath
 
+# about
+about = {
+    "website": 'http://msydqstlz2kzerdg.onion',
+    "wikidata_id": 'Q18693938',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine config
 categories = ['onions']
 paging = True

+ 10 - 7
searx/engines/apkmirror.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  APK Mirror
-
- @website     https://www.apkmirror.com
-
- @using-api   no
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, thumbnail_src
 """
 
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 
+# about
+about = {
+    "website": 'https://www.apkmirror.com',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 # engine dependent config
 categories = ['it']

+ 12 - 8
searx/engines/archlinux.py

@@ -1,20 +1,24 @@
-# -*- coding: utf-8 -*-
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Arch Linux Wiki
 
- @website      https://wiki.archlinux.org
- @provide-api  no (Mediawiki provides API, but Arch Wiki blocks access to it
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title
+ API: Mediawiki provides API, but Arch Wiki blocks access to it
 """
 
 from urllib.parse import urlencode, urljoin
 from lxml import html
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 
+# about
+about = {
+    "website": 'https://wiki.archlinux.org/',
+    "wikidata_id": 'Q101445877',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['it']
 language_support = True

+ 10 - 9
searx/engines/arxiv.py

@@ -1,20 +1,21 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  ArXiV (Scientific preprints)
- @website     https://arxiv.org
- @provide-api yes (export.arxiv.org/api/query)
- @using-api   yes
- @results     XML-RSS
- @stable      yes
- @parse       url, title, publishedDate, content
- More info on api: https://arxiv.org/help/api/user-manual
 """
 
 from lxml import html
 from datetime import datetime
 from searx.utils import eval_xpath_list, eval_xpath_getindex
 
+# about
+about = {
+    "website": 'https://arxiv.org',
+    "wikidata_id": 'Q118398',
+    "official_api_documentation": 'https://arxiv.org/help/api',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'XML-RSS',
+}
 
 categories = ['science']
 paging = True

+ 10 - 11
searx/engines/base.py

@@ -1,16 +1,6 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  BASE (Scholar publications)
-
- @website     https://base-search.net
- @provide-api yes with authorization (https://api.base-search.net/)
-
- @using-api   yes
- @results     XML
- @stable      ?
- @parse       url, title, publishedDate, content
- More info on api: http://base-search.net/about/download/base_interface.pdf
 """
 
 from urllib.parse import urlencode
@@ -19,6 +9,15 @@ from datetime import datetime
 import re
 from searx.utils import searx_useragent
 
+# about
+about = {
+    "website": 'https://base-search.net',
+    "wikidata_id": 'Q448335',
+    "official_api_documentation": 'https://api.base-search.net/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'XML',
+}
 
 categories = ['science']
 

+ 11 - 11
searx/engines/bing.py

@@ -1,16 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Bing (Web)
-
- @website     https://www.bing.com
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
-              max. 5000 query/month
-
- @using-api   no (because of query limit)
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
-
- @todo        publishedDate
 """
 
 import re
@@ -21,6 +11,16 @@ from searx.utils import eval_xpath, extract_text, match_language
 
 logger = logger.getChild('bing engine')
 
+# about
+about = {
+    "website": 'https://www.bing.com',
+    "wikidata_id": 'Q182496',
+    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['general']
 paging = True

+ 11 - 10
searx/engines/bing_images.py

@@ -1,15 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Bing (Images)
-
- @website     https://www.bing.com/images
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
-              max. 5000 query/month
-
- @using-api   no (because of query limit)
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, img_src
-
 """
 
 from urllib.parse import urlencode
@@ -20,6 +11,16 @@ from searx.utils import match_language
 from searx.engines.bing import language_aliases
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 
+# about
+about = {
+    "website": 'https://www.bing.com/images',
+    "wikidata_id": 'Q182496',
+    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['images']
 paging = True

+ 11 - 9
searx/engines/bing_news.py

@@ -1,14 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Bing (News)
-
- @website     https://www.bing.com/news
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
-              max. 5000 query/month
-
- @using-api   no (because of query limit)
- @results     RSS (using search portal)
- @stable      yes (except perhaps for the images)
- @parse       url, title, content, publishedDate, thumbnail
 """
 
 from datetime import datetime
@@ -20,6 +12,16 @@ from searx.utils import match_language, eval_xpath_getindex
 from searx.engines.bing import language_aliases
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 
+# about
+about = {
+    "website": 'https://www.bing.com/news',
+    "wikidata_id": 'Q2878637',
+    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'RSS',
+}
+
 # engine dependent config
 categories = ['news']
 paging = True

+ 11 - 8
searx/engines/bing_videos.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Bing (Videos)
-
- @website     https://www.bing.com/videos
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search)
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, content, thumbnail
 """
 
 from json import loads
@@ -18,6 +11,16 @@ from searx.utils import match_language
 from searx.engines.bing import language_aliases
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 
+# about
+about = {
+    "website": 'https://www.bing.com/videos',
+    "wikidata_id": 'Q4914152',
+    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['videos']
 paging = True
 safesearch = True

+ 14 - 8
searx/engines/btdigg.py

@@ -1,19 +1,25 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  BTDigg (Videos, Music, Files)
-
- @website     https://btdig.com
- @provide-api yes (on demand)
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content, seed, leech, magnetlink
 """
 
 from lxml import html
 from urllib.parse import quote, urljoin
 from searx.utils import extract_text, get_torrent_size
 
+# about
+about = {
+    "website": 'https://btdig.com',
+    "wikidata_id": 'Q4836698',
+    "official_api_documentation": {
+        'url': 'https://btdig.com/contacts',
+        'comment': 'on demand'
+    },
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['videos', 'music', 'files']
 paging = True

+ 4 - 15
searx/engines/command.py

@@ -1,18 +1,7 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-'''
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Command (offline)
+"""
 
 import re
 from os.path import expanduser, isabs, realpath, commonprefix

+ 14 - 0
searx/engines/currency_convert.py

@@ -1,5 +1,19 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ currency convert (DuckDuckGo)
+"""
+
 import json
 
+# about
+about = {
+    "website": 'https://duckduckgo.com/',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": 'https://duckduckgo.com/api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSONP',
+}
 
 engine_type = 'online_currency'
 categories = []

+ 11 - 10
searx/engines/dailymotion.py

@@ -1,15 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Dailymotion (Videos)
-
- @website     https://www.dailymotion.com
- @provide-api yes (http://www.dailymotion.com/developer)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, thumbnail, publishedDate, embedded
-
- @todo        set content-parameter with correct data
 """
 
 from json import loads
@@ -17,6 +8,16 @@ from datetime import datetime
 from urllib.parse import urlencode
 from searx.utils import match_language, html_to_text
 
+# about
+about = {
+    "website": 'https://www.dailymotion.com',
+    "wikidata_id": 'Q769222',
+    "official_api_documentation": 'https://www.dailymotion.com/developer',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['videos']
 paging = True

+ 11 - 8
searx/engines/deezer.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Deezer (Music)
-
- @website     https://deezer.com
- @provide-api yes (http://developers.deezer.com/api/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, embedded
 """
 
 from json import loads
 from urllib.parse import urlencode
 
+# about
+about = {
+    "website": 'https://deezer.com',
+    "wikidata_id": 'Q602243',
+    "official_api_documentation": 'https://developers.deezer.com/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['music']
 paging = True

+ 11 - 10
searx/engines/deviantart.py

@@ -1,21 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Deviantart (Images)
-
- @website     https://www.deviantart.com/
- @provide-api yes (https://www.deviantart.com/developers/) (RSS)
-
- @using-api   no (TODO, rewrite to api)
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, img_src
-
- @todo        rewrite to api
 """
 # pylint: disable=missing-function-docstring
 
 from urllib.parse import urlencode
 from lxml import html
 
+# about
+about = {
+    "website": 'https://www.deviantart.com/',
+    "wikidata_id": 'Q46523',
+    "official_api_documentation": 'https://www.deviantart.com/developers/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['images']
 paging = True

+ 10 - 7
searx/engines/dictzone.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Dictzone
-
- @website     https://dictzone.com/
- @provide-api no
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
 """
 
 from urllib.parse import urljoin
 from lxml import html
 from searx.utils import eval_xpath
 
+# about
+about = {
+    "website": 'https://dictzone.com/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 engine_type = 'online_dictionnary'
 categories = ['general']

+ 10 - 8
searx/engines/digbt.py

@@ -1,19 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  DigBT (Videos, Music, Files)
-
- @website     https://digbt.org
- @provide-api no
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content, magnetlink
 """
 
 from urllib.parse import urljoin
 from lxml import html
 from searx.utils import extract_text, get_torrent_size
 
+# about
+about = {
+    "website": 'https://digbt.org',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 categories = ['videos', 'music', 'files']
 paging = True

+ 11 - 8
searx/engines/digg.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Digg (News, Social media)
-
- @website     https://digg.com
- @provide-api no
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content, publishedDate, thumbnail
 """
 # pylint: disable=missing-function-docstring
 
@@ -17,6 +10,16 @@ from datetime import datetime
 
 from lxml import html
 
+# about
+about = {
+    "website": 'https://digg.com',
+    "wikidata_id": 'Q270478',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['news', 'social media']
 paging = True

+ 14 - 10
searx/engines/doku.py

@@ -1,18 +1,22 @@
-# Doku Wiki
-#
-# @website     https://www.dokuwiki.org/
-# @provide-api yes
-#              (https://www.dokuwiki.org/devel:xmlrpc)
-#
-# @using-api   no
-# @results     HTML
-# @stable      yes
-# @parse       (general)    url, title, content
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Doku Wiki
+"""
 
 from urllib.parse import urlencode
 from lxml.html import fromstring
 from searx.utils import extract_text, eval_xpath
 
+# about
+about = {
+    "website": 'https://www.dokuwiki.org/',
+    "wikidata_id": 'Q851864',
+    "official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
 paging = False

+ 11 - 11
searx/engines/duckduckgo.py

@@ -1,22 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  DuckDuckGo (Web)
-
- @website     https://duckduckgo.com/
- @provide-api yes (https://duckduckgo.com/api),
-              but not all results from search-site
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
-
- @todo        rewrite to api
 """
 
 from lxml.html import fromstring
 from json import loads
 from searx.utils import extract_text, match_language, eval_xpath
 
+# about
+about = {
+    "website": 'https://duckduckgo.com/',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": 'https://duckduckgo.com/api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['general']
 paging = False

+ 12 - 8
searx/engines/duckduckgo_definitions.py

@@ -1,12 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
-DuckDuckGo (definitions)
-
-- `Instant Answer API`_
-- `DuckDuckGo query`_
-
-.. _Instant Answer API: https://duckduckgo.com/api
-.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1
-
+ DuckDuckGo (Instant Answer API)
 """
 
 import json
@@ -22,6 +16,16 @@ from searx.external_urls import get_external_url, get_earth_coordinates_url, are
 
 logger = logger.getChild('duckduckgo_definitions')
 
+# about
+about = {
+    "website": 'https://duckduckgo.com/',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": 'https://duckduckgo.com/api',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 URL = 'https://api.duckduckgo.com/'\
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
 

+ 14 - 11
searx/engines/duckduckgo_images.py

@@ -1,16 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  DuckDuckGo (Images)
-
- @website     https://duckduckgo.com/
- @provide-api yes (https://duckduckgo.com/api),
-              but images are not supported
-
- @using-api   no
- @results     JSON (site requires js to get images)
- @stable      no (JSON can change)
- @parse       url, title, img_src
-
- @todo        avoid extra request
 """
 
 from json import loads
@@ -20,6 +10,19 @@ from searx.engines.duckduckgo import get_region_code
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 from searx.poolrequests import get
 
+# about
+about = {
+    "website": 'https://duckduckgo.com/',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": {
+        'url': 'https://duckduckgo.com/api',
+        'comment': 'but images are not supported',
+    },
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON (site requires js to get images)',
+}
+
 # engine dependent config
 categories = ['images']
 paging = True

+ 11 - 6
searx/engines/duden.py

@@ -1,11 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Duden
- @website     https://www.duden.de
- @provide-api no
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
 """
 
 import re
@@ -13,6 +8,16 @@ from urllib.parse import quote, urljoin
 from lxml import html
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 
+# about
+about = {
+    "website": 'https://www.duden.de',
+    "wikidata_id": 'Q73624591',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['general']
 paging = True
 language_support = False

+ 11 - 3
searx/engines/dummy-offline.py

@@ -1,11 +1,19 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Dummy Offline
-
- @results     one result
- @stable      yes
 """
 
 
+# about
+about = {
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
+
 def search(query, request_params):
     return [{
         'result': 'this is what you get',

+ 11 - 3
searx/engines/dummy.py

@@ -1,10 +1,18 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Dummy
-
- @results     empty array
- @stable      yes
 """
 
+# about
+about = {
+    "website": None,
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'empty array',
+}
+
 
 # do search-request
 def request(query, params):

+ 14 - 9
searx/engines/ebay.py

@@ -1,17 +1,22 @@
-#  Ebay (Videos, Music, Files)
-#
-# @website     https://www.ebay.com
-# @provide-api no (nothing found)
-#
-# @using-api   no
-# @results     HTML (using search portal)
-# @stable      yes (HTML can change)
-# @parse       url, title, content, price, shipping, source
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Ebay (Videos, Music, Files)
+"""
 
 from lxml import html
 from searx.engines.xpath import extract_text
 from urllib.parse import quote
 
+# about
+about = {
+    "website": 'https://www.ebay.com',
+    "wikidata_id": 'Q58024',
+    "official_api_documentation": 'https://developer.ebay.com/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['shopping']
 paging = True
 

+ 5 - 0
searx/engines/elasticsearch.py

@@ -1,3 +1,8 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Elasticsearch
+"""
+
 from json import loads, dumps
 from requests.auth import HTTPBasicAuth
 from searx.exceptions import SearxEngineAPIException

+ 11 - 7
searx/engines/etools.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  eTools (Web)
-
- @website      https://www.etools.ch
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, content
 """
 
 from lxml import html
 from urllib.parse import quote
 from searx.utils import extract_text, eval_xpath
 
+# about
+about = {
+    "website": 'https://www.etools.ch',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['general']
 paging = False
 language_support = False

+ 11 - 7
searx/engines/fdroid.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  F-Droid (a repository of FOSS applications for Android)
-
- @website      https://f-droid.org/
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, content
 """
 
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import extract_text
 
+# about
+about = {
+    "website": 'https://f-droid.org/',
+    "wikidata_id": 'Q1386210',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['files']
 paging = True

+ 11 - 9
searx/engines/flickr.py

@@ -1,21 +1,23 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Flickr (Images)
 
- @website     https://www.flickr.com
- @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, thumbnail, img_src
  More info on api-key : https://www.flickr.com/services/apps/create/
 """
 
 from json import loads
 from urllib.parse import urlencode
 
+# about
+about = {
+    "website": 'https://www.flickr.com',
+    "wikidata_id": 'Q103204',
+    "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
+    "use_official_api": True,
+    "require_api_key": True,
+    "results": 'JSON',
+}
+
 categories = ['images']
 
 nb_per_page = 15

+ 12 - 11
searx/engines/flickr_noapi.py

@@ -1,15 +1,6 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
-  Flickr (Images)
-
- @website     https://www.flickr.com
- @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, thumbnail, img_src
+ Flickr (Images)
 """
 
 from json import loads
@@ -21,6 +12,16 @@ from searx.utils import ecma_unescape, html_to_text
 
 logger = logger.getChild('flickr-noapi')
 
+# about
+about = {
+    "website": 'https://www.flickr.com',
+    "wikidata_id": 'Q103204',
+    "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['images']
 
 url = 'https://www.flickr.com/'

+ 11 - 8
searx/engines/framalibre.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  FramaLibre (It)
-
- @website     https://framalibre.org/
- @provide-api no
-
- @using-api   no
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, content, thumbnail, img_src
 """
 
 from html import escape
@@ -15,6 +8,16 @@ from urllib.parse import urljoin, urlencode
 from lxml import html
 from searx.utils import extract_text
 
+# about
+about = {
+    "website": 'https://framalibre.org/',
+    "wikidata_id": 'Q30213882',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['it']
 paging = True

+ 15 - 8
searx/engines/frinkiac.py

@@ -1,17 +1,24 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
-Frinkiac (Images)
-
-@website     https://www.frinkiac.com
-@provide-api no
-@using-api   no
-@results     JSON
-@stable      no
-@parse       url, title, img_src
+ Frinkiac (Images)
 """
 
 from json import loads
 from urllib.parse import urlencode
 
+# about
+about = {
+    "website": 'https://frinkiac.com',
+    "wikidata_id": 'Q24882614',
+    "official_api_documentation": {
+        'url': None,
+        'comment': 'see https://github.com/MitchellAW/CompuGlobal'
+    },
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 categories = ['images']
 
 BASE = 'https://frinkiac.com/'

+ 12 - 9
searx/engines/genius.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
-Genius
-
- @website     https://www.genius.com/
- @provide-api yes (https://docs.genius.com/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, thumbnail, publishedDate
+ Genius
 """
 
 from json import loads
 from urllib.parse import urlencode
 from datetime import datetime
 
+# about
+about = {
+    "website": 'https://genius.com/',
+    "wikidata_id": 'Q3419343',
+    "official_api_documentation": 'https://docs.genius.com/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['music']
 paging = True

+ 11 - 9
searx/engines/gentoo.py

@@ -1,20 +1,22 @@
-# -*- coding: utf-8 -*-
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Gentoo Wiki
-
- @website      https://wiki.gentoo.org
- @provide-api  yes
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title
 """
 
 from urllib.parse import urlencode, urljoin
 from lxml import html
 from searx.utils import extract_text
 
+# about
+about = {
+    "website": 'https://wiki.gentoo.org/',
+    "wikidata_id": 'Q1050637',
+    "official_api_documentation": 'https://wiki.gentoo.org/api.php',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['it']
 language_support = True

+ 10 - 8
searx/engines/gigablast.py

@@ -1,14 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Gigablast (Web)
-
- @website     https://gigablast.com
- @provide-api yes (https://gigablast.com/api.html)
-
- @using-api   yes
- @results     XML
- @stable      yes
- @parse       url, title, content
 """
 # pylint: disable=missing-function-docstring, invalid-name
 
@@ -18,6 +10,16 @@ from urllib.parse import urlencode
 # from searx import logger
 from searx.poolrequests import get
 
+# about
+about = {
+    "website": 'https://www.gigablast.com',
+    "wikidata_id": 'Q3105449',
+    "official_api_documentation": 'https://gigablast.com/api.html',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['general']
 # gigablast's pagination is totally damaged, don't use it

+ 12 - 9
searx/engines/github.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
- Github (It)
-
- @website     https://github.com/
- @provide-api yes (https://developer.github.com/v3/)
-
- @using-api   yes
- @results     JSON
- @stable      yes (using api)
- @parse       url, title, content
+ Github (IT)
 """
 
 from json import loads
 from urllib.parse import urlencode
 
+# about
+about = {
+    "website": 'https://github.com/',
+    "wikidata_id": 'Q364',
+    "official_api_documentation": 'https://developer.github.com/v3/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['it']
 

+ 14 - 12
searx/engines/google.py

@@ -1,19 +1,11 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Google (Web)
 
-:website:     https://www.google.com
-:provide-api: yes (https://developers.google.com/custom-search/)
-:using-api:   not the offical, since it needs registration to another service
-:results:     HTML
-:stable:      no
-:parse:       url, title, content, number_of_results, answer, suggestion, correction
-
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.
-
-.. _Query Parameter Definitions:
-   https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
+ For detailed description of the *REST-full* API see: `Query Parameter
+ Definitions`_.
 
+ .. _Query Parameter Definitions:
+ https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
 """
 
 # pylint: disable=invalid-name, missing-function-docstring
@@ -27,6 +19,16 @@ from searx.exceptions import SearxEngineCaptchaException
 
 logger = logger.getChild('google engine')
 
+# about
+about = {
+    "website": 'https://www.google.com',
+    "wikidata_id": 'Q9366',
+    "official_api_documentation": 'https://developers.google.com/custom-search/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['general']
 paging = True

+ 10 - 12
searx/engines/google_images.py

@@ -1,14 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Google (Images)
 
-:website:     https://images.google.com (redirected to subdomain www.)
-:provide-api: yes (https://developers.google.com/custom-search/)
-:using-api:   not the offical, since it needs registration to another service
-:results:     HTML
-:stable:      no
-:template:    images.html
-:parse:       url, title, content, source, thumbnail_src, img_src
-
 For detailed description of the *REST-full* API see: `Query Parameter
 Definitions`_.
 
@@ -18,10 +10,6 @@ Definitions`_.
    ``data:` scheme).::
 
      Header set Content-Security-Policy "img-src 'self' data: ;"
-
-.. _Query Parameter Definitions:
-   https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
-
 """
 
 from urllib.parse import urlencode, urlparse, unquote
@@ -39,6 +27,16 @@ from searx.engines.google import (
 
 logger = logger.getChild('google images')
 
+# about
+about = {
+    "website": 'https://images.google.com/',
+    "wikidata_id": 'Q521550',
+    "official_api_documentation": 'https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions',  # NOQA
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 
 categories = ['images']

+ 11 - 8
searx/engines/google_news.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Google (News)
-
- @website     https://news.google.com
- @provide-api no
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, content, publishedDate
 """
 
 from urllib.parse import urlencode
@@ -15,6 +8,16 @@ from lxml import html
 from searx.utils import match_language
 from searx.engines.google import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 
+# about
+about = {
+    "website": 'https://news.google.com',
+    "wikidata_id": 'Q12020',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # search-url
 categories = ['news']
 paging = True

+ 11 - 8
searx/engines/google_videos.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Google (Videos)
-
- @website     https://www.google.com
- @provide-api yes (https://developers.google.com/custom-search/)
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, content, thumbnail
 """
 
 from datetime import date, timedelta
@@ -16,6 +9,16 @@ from lxml import html
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 import re
 
+# about
+about = {
+    "website": 'https://www.google.com',
+    "wikidata_id": 'Q219885',
+    "official_api_documentation": 'https://developers.google.com/custom-search/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['videos']
 paging = True

+ 13 - 12
searx/engines/ina.py

@@ -1,15 +1,7 @@
-#  INA (Videos)
-#
-# @website     https://www.ina.fr/
-# @provide-api no
-#
-# @using-api   no
-# @results     HTML (using search portal)
-# @stable      no (HTML can change)
-# @parse       url, title, content, publishedDate, thumbnail
-#
-# @todo        set content-parameter with correct data
-# @todo        embedded (needs some md5 from video page)
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ INA (Videos)
+"""
 
 from json import loads
 from html import unescape
@@ -18,6 +10,15 @@ from lxml import html
 from dateutil import parser
 from searx.utils import extract_text
 
+# about
+about = {
+    "website": 'https://www.ina.fr/',
+    "wikidata_id": 'Q1665109',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 # engine dependent config
 categories = ['videos']

+ 14 - 9
searx/engines/invidious.py

@@ -1,17 +1,22 @@
-# Invidious (Videos)
-#
-# @website     https://invidio.us/
-# @provide-api yes (https://github.com/omarroth/invidious/wiki/API)
-#
-# @using-api   yes
-# @results     JSON
-# @stable      yes
-# @parse       url, title, content, publishedDate, thumbnail, embedded, author, length
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Invidious (Videos)
+"""
 
 from urllib.parse import quote_plus
 from dateutil import parser
 import time
 
+# about
+about = {
+    "website": 'https://instances.invidio.us/',
+    "wikidata_id": 'Q79343316',
+    "official_api_documentation": 'https://github.com/omarroth/invidious/wiki/API',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ["videos", "music"]
 paging = True

+ 2 - 0
searx/engines/json_engine.py

@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
 from collections.abc import Iterable
 from json import loads
 from urllib.parse import urlencode

+ 11 - 8
searx/engines/kickass.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Kickass Torrent (Videos, Music, Files)
-
- @website     https://kickass.so
- @provide-api no (nothing found)
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      yes (HTML can change)
- @parse       url, title, content, seed, leech, magnetlink
 """
 
 from lxml import html
@@ -15,6 +8,16 @@ from operator import itemgetter
 from urllib.parse import quote, urljoin
 from searx.utils import extract_text, get_torrent_size, convert_str_to_int
 
+# about
+about = {
+    "website": 'https://kickass.so',
+    "wikidata_id": 'Q17062285',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['videos', 'music', 'files']
 paging = True

+ 12 - 11
searx/engines/mediawiki.py

@@ -1,21 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
- general mediawiki-engine (Web)
-
- @website     websites built on mediawiki (https://www.mediawiki.org)
- @provide-api yes (http://www.mediawiki.org/wiki/API:Search)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title
-
- @todo        content
+ General mediawiki-engine (Web)
 """
 
 from json import loads
 from string import Formatter
 from urllib.parse import urlencode, quote
 
+# about
+about = {
+    "website": None,
+    "wikidata_id": None,
+    "official_api_documentation": 'http://www.mediawiki.org/wiki/API:Search',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['general']
 language_support = True

+ 12 - 8
searx/engines/microsoft_academic.py

@@ -1,12 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
-Microsoft Academic (Science)
-
-@website     https://academic.microsoft.com
-@provide-api yes
-@using-api   no
-@results     JSON
-@stable      no
-@parse       url, title, content
+ Microsoft Academic (Science)
 """
 
 from datetime import datetime
@@ -15,6 +9,16 @@ from uuid import uuid4
 from urllib.parse import urlencode
 from searx.utils import html_to_text
 
+# about
+about = {
+    "website": 'https://academic.microsoft.com',
+    "wikidata_id": 'Q28136779',
+    "official_api_documentation": 'http://ma-graph.org/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 categories = ['images']
 paging = True
 result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'

+ 11 - 8
searx/engines/mixcloud.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Mixcloud (Music)
-
- @website     https://http://www.mixcloud.com/
- @provide-api yes (http://www.mixcloud.com/developers/
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, embedded, publishedDate
 """
 
 from json import loads
 from dateutil import parser
 from urllib.parse import urlencode
 
+# about
+about = {
+    "website": 'https://www.mixcloud.com/',
+    "wikidata_id": 'Q6883832',
+    "official_api_documentation": 'http://www.mixcloud.com/developers/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['music']
 paging = True

+ 11 - 8
searx/engines/not_evil.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  not Evil (Onions)
-
- @website     http://hss3uro2hsxfogfq.onion
- @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm)
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, content
 """
 
 from urllib.parse import urlencode
 from lxml import html
 from searx.engines.xpath import extract_text
 
+# about
+about = {
+    "website": 'http://hss3uro2hsxfogfq.onion',
+    "wikidata_id": None,
+    "official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['onions']
 paging = True

+ 11 - 7
searx/engines/nyaa.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Nyaa.si (Anime Bittorrent tracker)
-
- @website      https://nyaa.si/
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, content, seed, leech, torrentfile
 """
 
 from lxml import html
 from urllib.parse import urlencode
 from searx.utils import extract_text, get_torrent_size, int_or_zero
 
+# about
+about = {
+    "website": 'https://nyaa.si/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['files', 'images', 'videos', 'music']
 paging = True

+ 13 - 9
searx/engines/opensemantic.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
-Open Semantic Search
-
- @website    https://www.opensemanticsearch.org/
- @provide-api yes (https://www.opensemanticsearch.org/dev)
-
- @using-api  yes
- @results    JSON
- @stable     yes
- @parse      url, title, content, publishedDate
+ Open Semantic Search
 """
+
 from dateutil import parser
 from json import loads
 from urllib.parse import quote
 
+# about
+about = {
+    "website": 'https://www.opensemanticsearch.org/',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://www.opensemanticsearch.org/dev',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 base_url = 'http://localhost:8983/solr/opensemanticsearch/'
 search_string = 'query?q={query}'
 

+ 11 - 8
searx/engines/openstreetmap.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  OpenStreetMap (Map)
-
- @website     https://openstreetmap.org/
- @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title
 """
 
 import re
 from json import loads
 from flask_babel import gettext
 
+# about
+about = {
+    "website": 'https://www.openstreetmap.org/',
+    "wikidata_id": 'Q936',
+    "official_api_documentation": 'http://wiki.openstreetmap.org/wiki/Nominatim',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['map']
 paging = False

+ 11 - 8
searx/engines/pdbe.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  PDBe (Protein Data Bank in Europe)
-
- @website       https://www.ebi.ac.uk/pdbe
- @provide-api   yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
-                unlimited
- @using-api     yes
- @results       python dictionary (from json)
- @stable        yes
- @parse         url, title, content, img_src
 """
 
 from json import loads
 from flask_babel import gettext
 
+# about
+about = {
+    "website": 'https://www.ebi.ac.uk/pdbe',
+    "wikidata_id": 'Q55823905',
+    "official_api_documentation": 'https://www.ebi.ac.uk/pdbe/api/doc/search.html',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 categories = ['science']
 
 hide_obsolete = False

+ 11 - 10
searx/engines/peertube.py

@@ -1,15 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  peertube (Videos)
-
- @website     https://www.peertube.live
- @provide-api yes (https://docs.joinpeertube.org/api-rest-reference.html)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, thumbnail, publishedDate, embedded
-
- @todo        implement time range support
 """
 
 from json import loads
@@ -17,6 +8,16 @@ from datetime import datetime
 from urllib.parse import urlencode
 from searx.utils import html_to_text
 
+# about
+about = {
+    "website": 'https://joinpeertube.org',
+    "wikidata_id": 'Q50938515',
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ["videos"]
 paging = True

+ 11 - 8
searx/engines/photon.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Photon (Map)
-
- @website     https://photon.komoot.de
- @provide-api yes (https://photon.komoot.de/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title
 """
 
 from json import loads
 from urllib.parse import urlencode
 from searx.utils import searx_useragent
 
+# about
+about = {
+    "website": 'https://photon.komoot.de',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://photon.komoot.de/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['map']
 paging = False

+ 14 - 9
searx/engines/piratebay.py

@@ -1,12 +1,7 @@
-#  Piratebay (Videos, Music, Files)
-#
-# @website     https://thepiratebay.org
-# @provide-api yes (https://apibay.org/)
-#
-# @using-api   yes
-# @results     JSON
-# @stable      no (the API is not documented nor versioned)
-# @parse       url, title, seed, leech, magnetlink, filesize, publishedDate
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Piratebay (Videos, Music, Files)
+"""
 
 from json import loads
 from datetime import datetime
@@ -15,6 +10,16 @@ from operator import itemgetter
 from urllib.parse import quote
 from searx.utils import get_torrent_size
 
+# about
+about = {
+    "website": 'https://thepiratebay.org',
+    "wikidata_id": 'Q22663',
+    "official_api_documentation": 'https://apibay.org/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ["videos", "music", "files"]
 

+ 13 - 9
searx/engines/pubmed.py

@@ -1,14 +1,6 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  PubMed (Scholar publications)
- @website     https://www.ncbi.nlm.nih.gov/pubmed/
- @provide-api yes (https://www.ncbi.nlm.nih.gov/home/develop/api/)
- @using-api   yes
- @results     XML
- @stable      yes
- @parse       url, title, publishedDate, content
- More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/
 """
 
 from flask_babel import gettext
@@ -17,6 +9,18 @@ from datetime import datetime
 from urllib.parse import urlencode
 from searx.poolrequests import get
 
+# about
+about = {
+    "website": 'https://www.ncbi.nlm.nih.gov/pubmed/',
+    "wikidata_id": 'Q1540899',
+    "official_api_documentation": {
+        'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/',
+        'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/'
+    },
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'XML',
+}
 
 categories = ['science']
 

+ 10 - 8
searx/engines/qwant.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Qwant (Web, Images, News, Social)
-
- @website     https://qwant.com/
- @provide-api not officially (https://api.qwant.com/api/search/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content
 """
 
 from datetime import datetime
@@ -17,6 +10,15 @@ from searx.utils import html_to_text, match_language
 from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException
 from searx.raise_for_httperror import raise_for_httperror
 
+# about
+about = {
+    "website": 'https://www.qwant.com/',
+    "wikidata_id": 'Q14657870',
+    "official_api_documentation": None,
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
 # engine dependent config
 categories = []

+ 11 - 6
searx/engines/recoll.py

@@ -1,17 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Recoll (local search engine)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, content, size, abstract, author, mtype, subtype, time, \
-              filename, label, type, embedded
 """
 
 from datetime import date, timedelta
 from json import loads
 from urllib.parse import urlencode, quote
 
+# about
+about = {
+    "website": None,
+    "wikidata_id": 'Q15735774',
+    "official_api_documentation": 'https://www.lesbonscomptes.com/recoll/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 time_range_support = True
 

+ 11 - 8
searx/engines/reddit.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Reddit
-
- @website      https://www.reddit.com/
- @provide-api  yes (https://www.reddit.com/dev/api)
-
- @using-api    yes
- @results      JSON
- @stable       yes
- @parse        url, title, content, thumbnail, publishedDate
 """
 
 import json
 from datetime import datetime
 from urllib.parse import urlencode, urljoin, urlparse
 
+# about
+about = {
+    "website": 'https://www.reddit.com/',
+    "wikidata_id": 'Q1136',
+    "official_api_documentation": 'https://www.reddit.com/dev/api',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['general', 'images', 'news', 'social media']
 page_size = 25

+ 11 - 8
searx/engines/scanr_structures.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  ScanR Structures (Science)
-
- @website     https://scanr.enseignementsup-recherche.gouv.fr
- @provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, img_src
 """
 
 from json import loads, dumps
 from searx.utils import html_to_text
 
+# about
+about = {
+    "website": 'https://scanr.enseignementsup-recherche.gouv.fr',
+    "wikidata_id": 'Q44105684',
+    "official_api_documentation": 'https://scanr.enseignementsup-recherche.gouv.fr/opendata',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['science']
 paging = True

+ 11 - 9
searx/engines/searchcode_code.py

@@ -1,18 +1,20 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
- Searchcode (It)
-
- @website     https://searchcode.com/
- @provide-api yes (https://searchcode.com/api/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content
+ Searchcode (IT)
 """
 
 from json import loads
 from urllib.parse import urlencode
 
+# about
+about = {
+    "website": 'https://searchcode.com/',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://searchcode.com/api/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
 # engine dependent config
 categories = ['it']

+ 10 - 8
searx/engines/searx_engine.py

@@ -1,18 +1,20 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Searx (all)
-
- @website     https://github.com/searx/searx
- @provide-api yes (https://searx.github.io/searx/dev/search_api.html)
-
- @using-api   yes
- @results     JSON
- @stable      yes (using api)
- @parse       url, title, content
 """
 
 from json import loads
 from searx.engines import categories as searx_categories
 
+# about
+about = {
+    "website": 'https://github.com/searx/searx',
+    "wikidata_id": 'Q17639196',
+    "official_api_documentation": 'https://searx.github.io/searx/dev/search_api.html',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
 categories = searx_categories.keys()
 

+ 14 - 8
searx/engines/sepiasearch.py

@@ -1,17 +1,23 @@
-# SepiaSearch (Videos)
-#
-# @website     https://sepiasearch.org
-# @provide-api https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api
-# @using-api   yes
-# @results     JSON
-# @stable      yes
-# @parse       url, title, content, publishedDate, thumbnail
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ SepiaSearch (Videos)
+"""
 
 from json import loads
 from dateutil import parser, relativedelta
 from urllib.parse import urlencode
 from datetime import datetime
 
+# about
+about = {
+    "website": 'https://sepiasearch.org',
+    "wikidata_id": None,
+    "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api",  # NOQA
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 categories = ['videos']
 paging = True
 language_support = True

+ 10 - 8
searx/engines/soundcloud.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Soundcloud (Music)
-
- @website     https://soundcloud.com
- @provide-api yes (https://developers.soundcloud.com/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, publishedDate, embedded
 """
 
 import re
@@ -18,6 +11,15 @@ from urllib.parse import quote_plus, urlencode
 from searx import logger
 from searx.poolrequests import get as http_get
 
+# about
+about = {
+    "website": 'https://soundcloud.com',
+    "wikidata_id": 'Q568769',
+    "official_api_documentation": 'https://developers.soundcloud.com/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
 # engine dependent config
 categories = ['music']

+ 11 - 8
searx/engines/spotify.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Spotify (Music)
-
- @website     https://spotify.com
- @provide-api yes (https://developer.spotify.com/web-api/search-item/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, embedded
 """
 
 from json import loads
@@ -15,6 +8,16 @@ from urllib.parse import urlencode
 import requests
 import base64
 
+# about
+about = {
+    "website": 'https://www.spotify.com',
+    "wikidata_id": 'Q689141',
+    "official_api_documentation": 'https://developer.spotify.com/web-api/search-item/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['music']
 paging = True

+ 12 - 9
searx/engines/stackoverflow.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
- Stackoverflow (It)
-
- @website     https://stackoverflow.com/
- @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
-
- @using-api   no
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, content
+ Stackoverflow (IT)
 """
 
 from urllib.parse import urlencode, urljoin, urlparse
@@ -15,6 +8,16 @@ from lxml import html
 from searx.utils import extract_text
 from searx.exceptions import SearxEngineCaptchaException
 
+# about
+about = {
+    "website": 'https://stackoverflow.com/',
+    "wikidata_id": 'Q549037',
+    "official_api_documentation": 'https://api.stackexchange.com/docs',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['it']
 paging = True

+ 14 - 11
searx/engines/startpage.py

@@ -1,14 +1,7 @@
-#  Startpage (Web)
-#
-# @website     https://startpage.com
-# @provide-api no (nothing found)
-#
-# @using-api   no
-# @results     HTML
-# @stable      no (HTML can change)
-# @parse       url, title, content
-#
-# @todo        paging
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Startpage (Web)
+"""
 
 from lxml import html
 from dateutil import parser
@@ -19,6 +12,16 @@ from babel import Locale
 from babel.localedata import locale_identifiers
 from searx.utils import extract_text, eval_xpath, match_language
 
+# about
+about = {
+    "website": 'https://startpage.com',
+    "wikidata_id": 'Q2333295',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['general']
 # there is a mechanism to block "bot" search

+ 11 - 8
searx/engines/tokyotoshokan.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Tokyo Toshokan (A BitTorrent Library for Japanese Media)
-
- @website      https://www.tokyotosho.info/
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, publishedDate, seed, leech,
-               filesize, magnetlink, content
 """
 
 import re
@@ -16,6 +9,16 @@ from lxml import html
 from datetime import datetime
 from searx.utils import extract_text, get_torrent_size, int_or_zero
 
+# about
+about = {
+    "website": 'https://www.tokyotosho.info/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['files', 'videos', 'music']
 paging = True

+ 11 - 9
searx/engines/torrentz.py

@@ -1,14 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Torrentz2.is (BitTorrent meta-search engine)
-
- @website      https://torrentz2.is/
- @provide-api  no
-
- @using-api    no
- @results      HTML
- @stable       no (HTML can change, although unlikely,
-                   see https://torrentz.is/torrentz.btsearch)
- @parse        url, title, publishedDate, seed, leech, filesize, magnetlink
 """
 
 import re
@@ -17,6 +9,16 @@ from lxml import html
 from datetime import datetime
 from searx.utils import extract_text, get_torrent_size
 
+# about
+about = {
+    "website": 'https://torrentz2.is/',
+    "wikidata_id": 'Q1156687',
+    "official_api_documentation": 'https://torrentz.is/torrentz.btsearch',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['files', 'videos', 'music']
 paging = True

+ 11 - 7
searx/engines/translated.py

@@ -1,14 +1,18 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  MyMemory Translated
-
- @website     https://mymemory.translated.net/
- @provide-api yes (https://mymemory.translated.net/doc/spec.php)
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content
 """
 
+# about
+about = {
+    "website": 'https://mymemory.translated.net/',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://mymemory.translated.net/doc/spec.php',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 engine_type = 'online_dictionnary'
 categories = ['general']
 url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'

+ 11 - 8
searx/engines/unsplash.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Unsplash
-
- @website     https://unsplash.com
- @provide-api yes (https://unsplash.com/developers)
-
- @using-api   no
- @results     JSON (using search portal's infiniscroll API)
- @stable      no (JSON format could change any time)
- @parse       url, title, img_src, thumbnail_src
 """
 
 from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
 from json import loads
 
+# about
+about = {
+    "website": 'https://unsplash.com',
+    "wikidata_id": 'Q28233552',
+    "official_api_documentation": 'https://unsplash.com/developers',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 url = 'https://unsplash.com/'
 search_url = url + 'napi/search/photos?'
 categories = ['images']

+ 14 - 13
searx/engines/vimeo.py

@@ -1,21 +1,22 @@
-#  Vimeo (Videos)
-#
-# @website     https://vimeo.com/
-# @provide-api yes (http://developer.vimeo.com/api),
-#              they have a maximum count of queries/hour
-#
-# @using-api   no (TODO, rewrite to api)
-# @results     HTML (using search portal)
-# @stable      no (HTML can change)
-# @parse       url, title, publishedDate,  thumbnail, embedded
-#
-# @todo        rewrite to api
-# @todo        set content-parameter with correct data
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Wikipedia (Web
+"""
 
 from urllib.parse import urlencode
 from json import loads
 from dateutil import parser
 
+# about
+about = {
+    "website": 'https://vimeo.com/',
+    "wikidata_id": 'Q156376',
+    "official_api_documentation": 'http://developer.vimeo.com/api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['videos']
 paging = True

+ 11 - 9
searx/engines/wikidata.py

@@ -1,14 +1,6 @@
-# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Wikidata
-
- @website     https://wikidata.org
- @provide-api yes (https://query.wikidata.org/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, infobox
 """
 
 
@@ -27,6 +19,16 @@ from searx.engines.wikipedia import _fetch_supported_languages, supported_langua
 
 logger = logger.getChild('wikidata')
 
+# about
+about = {
+    "website": 'https://wikidata.org/',
+    "wikidata_id": 'Q2013',
+    "official_api_documentation": 'https://query.wikidata.org/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # SPARQL
 SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
 SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'

+ 11 - 8
searx/engines/wikipedia.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Wikipedia (Web)
-
- @website     https://en.wikipedia.org/api/rest_v1/
- @provide-api yes
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, infobox
 """
 
 from urllib.parse import quote
@@ -16,6 +9,16 @@ from lxml.html import fromstring
 from searx.utils import match_language, searx_useragent
 from searx.raise_for_httperror import raise_for_httperror
 
+# about
+about = {
+    "website": 'https://www.wikipedia.org/',
+    "wikidata_id": 'Q52',
+    "official_api_documentation": 'https://en.wikipedia.org/api/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # search-url
 search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
 supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'

+ 14 - 9
searx/engines/wolframalpha_api.py

@@ -1,16 +1,21 @@
-# Wolfram Alpha (Science)
-#
-# @website     https://www.wolframalpha.com
-# @provide-api yes (https://api.wolframalpha.com/v2/)
-#
-# @using-api   yes
-# @results     XML
-# @stable      yes
-# @parse       url, infobox
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Wolfram|Alpha (Science)
+"""
 
 from lxml import etree
 from urllib.parse import urlencode
 
+# about
+about = {
+    "website": 'https://www.wolframalpha.com',
+    "wikidata_id": 'Q207006',
+    "official_api_documentation": 'https://products.wolframalpha.com/api/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'XML',
+}
+
 # search-url
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
 site_url = 'https://www.wolframalpha.com/input/?{query}'

+ 14 - 9
searx/engines/wolframalpha_noapi.py

@@ -1,12 +1,7 @@
-# Wolfram|Alpha (Science)
-#
-# @website     https://www.wolframalpha.com/
-# @provide-api yes (https://api.wolframalpha.com/v2/)
-#
-# @using-api   no
-# @results     JSON
-# @stable      no
-# @parse       url, infobox
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Wolfram|Alpha (Science)
+"""
 
 from json import loads
 from time import time
@@ -14,6 +9,16 @@ from urllib.parse import urlencode
 
 from searx.poolrequests import get as http_get
 
+# about
+about = {
+    "website": 'https://www.wolframalpha.com/',
+    "wikidata_id": 'Q207006',
+    "official_api_documentation": 'https://products.wolframalpha.com/api/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # search-url
 url = 'https://www.wolframalpha.com/'
 

+ 11 - 8
searx/engines/www1x.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  1x (Images)
-
- @website     http://1x.com/
- @provide-api no
-
- @using-api   no
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, thumbnail
 """
 
 from lxml import html, etree
 from urllib.parse import urlencode, urljoin
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 
+# about
+about = {
+    "website": 'https://1x.com/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['images']
 paging = False

+ 2 - 0
searx/engines/xpath.py

@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
 from lxml import html
 from urllib.parse import urlencode
 from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list

+ 14 - 13
searx/engines/yacy.py

@@ -1,16 +1,7 @@
-# Yacy (Web, Images, Videos, Music, Files)
-#
-# @website     http://yacy.net
-# @provide-api yes
-#              (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
-#
-# @using-api   yes
-# @results     JSON
-# @stable      yes
-# @parse       (general)    url, title, content, publishedDate
-# @parse       (images)     url, title, img_src
-#
-# @todo        parse video, audio and file results
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Yacy (Web, Images, Videos, Music, Files)
+"""
 
 from json import loads
 from dateutil import parser
@@ -20,6 +11,16 @@ from requests.auth import HTTPDigestAuth
 
 from searx.utils import html_to_text
 
+# about
+about = {
+    "website": 'https://yacy.net/',
+    "wikidata_id": 'Q1759675',
+    "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['general', 'images']  # TODO , 'music', 'videos', 'files'
 paging = True

+ 11 - 9
searx/engines/yahoo.py

@@ -1,20 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Yahoo (Web)
-
- @website     https://search.yahoo.com/web
- @provide-api yes (https://developer.yahoo.com/boss/search/),
-              $0.80/1000 queries
-
- @using-api   no (because pricing)
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content, suggestion
 """
 
 from urllib.parse import unquote, urlencode
 from lxml import html
 from searx.utils import extract_text, extract_url, match_language, eval_xpath
 
+# about
+about = {
+    "website": 'https://search.yahoo.com/',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://developer.yahoo.com/api/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['general']
 paging = True

+ 14 - 10
searx/engines/yahoo_news.py

@@ -1,13 +1,7 @@
-# Yahoo (News)
-#
-# @website     https://news.yahoo.com
-# @provide-api yes (https://developer.yahoo.com/boss/search/)
-#              $0.80/1000 queries
-#
-# @using-api   no (because pricing)
-# @results     HTML (using search portal)
-# @stable      no (HTML can change)
-# @parse       url, title, content, publishedDate
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Yahoo (News)
+"""
 
 import re
 from datetime import datetime, timedelta
@@ -18,6 +12,16 @@ from searx.engines.yahoo import _fetch_supported_languages, supported_languages_
 from dateutil import parser
 from searx.utils import extract_text, extract_url, match_language
 
+# about
+about = {
+    "website": 'https://news.yahoo.com',
+    "wikidata_id": 'Q3044717',
+    "official_api_documentation": 'https://developer.yahoo.com/api/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['news']
 paging = True

+ 11 - 7
searx/engines/yandex.py

@@ -1,12 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
  Yahoo (Web)
-
- @website     https://yandex.ru/
- @provide-api ?
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
 """
 
 from urllib.parse import urlencode, urlparse
@@ -16,6 +10,16 @@ from searx.exceptions import SearxEngineCaptchaException
 
 logger = logger.getChild('yandex engine')
 
+# about
+about = {
+    "website": 'https://yandex.ru/',
+    "wikidata_id": 'Q5281',
+    "official_api_documentation": "?",
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['general']
 paging = True

+ 14 - 9
searx/engines/yggtorrent.py

@@ -1,12 +1,7 @@
-#  Yggtorrent (Videos, Music, Files)
-#
-# @website     https://www2.yggtorrent.si
-# @provide-api no (nothing found)
-#
-# @using-api   no
-# @results     HTML (using search portal)
-# @stable      no (HTML can change)
-# @parse       url, title, seed, leech, publishedDate, filesize
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Yggtorrent (Videos, Music, Files)
+"""
 
 from lxml import html
 from operator import itemgetter
@@ -15,6 +10,16 @@ from urllib.parse import quote
 from searx.utils import extract_text, get_torrent_size
 from searx.poolrequests import get as http_get
 
+# about
+about = {
+    "website": 'https://www2.yggtorrent.si',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['videos', 'music', 'files']
 paging = True

+ 14 - 9
searx/engines/youtube_api.py

@@ -1,18 +1,23 @@
-# Youtube (Videos)
-#
-# @website     https://www.youtube.com/
-# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
-#
-# @using-api   yes
-# @results     JSON
-# @stable      yes
-# @parse       url, title, content, publishedDate, thumbnail, embedded
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Youtube (Videos)
+"""
 
 from json import loads
 from dateutil import parser
 from urllib.parse import urlencode
 from searx.exceptions import SearxEngineAPIException
 
+# about
+about = {
+    "website": 'https://www.youtube.com/',
+    "wikidata_id": 'Q866',
+    "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 categories = ['videos', 'music']
 paging = False

+ 14 - 9
searx/engines/youtube_noapi.py

@@ -1,17 +1,22 @@
-# Youtube (Videos)
-#
-# @website     https://www.youtube.com/
-# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
-#
-# @using-api   no
-# @results     HTML
-# @stable      no
-# @parse       url, title, content, publishedDate, thumbnail, embedded
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Youtube (Videos)
+"""
 
 from functools import reduce
 from json import loads
 from urllib.parse import quote_plus
 
+# about
+about = {
+    "website": 'https://www.youtube.com/',
+    "wikidata_id": 'Q866',
+    "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 categories = ['videos', 'music']
 paging = True

+ 213 - 0
searx/settings.yml

@@ -157,6 +157,13 @@ engines:
     timeout : 7.0
     disabled : True
     shortcut : ai
+    about:
+      website: https://archive.is/
+      wikidata_id: Q13515725
+      official_api_documentation: http://mementoweb.org/depot/native/archiveis/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : arxiv
     engine : arxiv
@@ -201,6 +208,13 @@ engines:
     timeout : 4.0
     disabled : True
     shortcut : bb
+    about:
+      website: https://bitbucket.org/
+      wikidata_id: Q2493781
+      official_api_documentation: https://developer.atlassian.com/bitbucket
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : btdigg
     engine : btdigg
@@ -216,6 +230,13 @@ engines:
     categories : videos
     disabled : True
     shortcut : c3tv
+    about:
+      website: https://media.ccc.de/
+      wikidata_id: Q80729951
+      official_api_documentation: https://github.com/voc/voctoweb
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : crossref
     engine : json_engine
@@ -226,6 +247,13 @@ engines:
     content_query : fullCitation
     categories : science
     shortcut : cr
+    about:
+      website: https://www.crossref.org/
+      wikidata_id: Q5188229
+      official_api_documentation: https://github.com/CrossRef/rest-api-doc
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
   - name : currency
     engine : currency_convert
@@ -271,6 +299,13 @@ engines:
     categories : general
     shortcut : ew
     disabled : True
+    about:
+      website: https://www.erowid.org/
+      wikidata_id: Q1430691
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 #  - name : elasticsearch
 #    shortcut : es
@@ -321,6 +356,13 @@ engines:
     first_page_num : 1
     shortcut : et
     disabled : True
+    about:
+      website: https://www.etymonline.com/
+      wikidata_id: Q1188617
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 #  - name : ebay
 #    engine : ebay
@@ -360,6 +402,9 @@ engines:
     search_type : title
     timeout : 5.0
     disabled : True
+    about:
+      website: https://directory.fsf.org/
+      wikidata_id: Q2470288
 
   - name : frinkiac
     engine : frinkiac
@@ -394,6 +439,13 @@ engines:
     shortcut : gl
     timeout : 10.0
     disabled : True
+    about:
+      website: https://about.gitlab.com/
+      wikidata_id: Q16639197
+      official_api_documentation: https://docs.gitlab.com/ee/api/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
   - name : github
     engine : github
@@ -411,6 +463,13 @@ engines:
     categories : it
     shortcut : cb
     disabled : True
+    about:
+      website: https://codeberg.org/
+      wikidata_id:
+      official_api_documentation: https://try.gitea.io/api/swagger
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
   - name : google
     engine : google
@@ -441,6 +500,13 @@ engines:
     first_page_num : 0
     categories : science
     shortcut : gos
+    about:
+      website: https://scholar.google.com/
+      wikidata_id: Q494817
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : google play apps
     engine : xpath
@@ -453,6 +519,13 @@ engines:
     categories : files
     shortcut : gpa
     disabled : True
+    about:
+      website: https://play.google.com/
+      wikidata_id: Q79576
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : google play movies
     engine : xpath
@@ -465,6 +538,13 @@ engines:
     categories : videos
     shortcut : gpm
     disabled : True
+    about:
+      website: https://play.google.com/
+      wikidata_id: Q79576
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : google play music
     engine : xpath
@@ -477,6 +557,13 @@ engines:
     categories : music
     shortcut : gps
     disabled : True
+    about:
+      website: https://play.google.com/
+      wikidata_id: Q79576
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : geektimes
     engine : xpath
@@ -489,6 +576,13 @@ engines:
     timeout : 4.0
     disabled : True
     shortcut : gt
+    about:
+      website: https://geektimes.ru/
+      wikidata_id: Q50572423
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : habrahabr
     engine : xpath
@@ -501,6 +595,13 @@ engines:
     timeout : 4.0
     disabled : True
     shortcut : habr
+    about:
+      website: https://habr.com/
+      wikidata_id: Q4494434
+      official_api_documentation: https://habr.com/en/docs/help/api/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : hoogle
     engine : json_engine
@@ -513,6 +614,13 @@ engines:
     page_size : 20
     categories : it
     shortcut : ho
+    about:
+      website: https://www.haskell.org/
+      wikidata_id: Q34010
+      official_api_documentation: https://hackage.haskell.org/api
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
   - name : ina
     engine : ina
@@ -543,6 +651,13 @@ engines:
     timeout : 7.0
     disabled : True
     shortcut : lg
+    about:
+      website: http://libgen.rs/
+      wikidata_id: Q22017206
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : lobste.rs
     engine : xpath
@@ -555,6 +670,13 @@ engines:
     shortcut : lo
     timeout : 3.0
     disabled: True
+    about:
+      website: https://lobste.rs/
+      wikidata_id: Q60762874
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : metager
     engine : xpath
@@ -566,6 +688,13 @@ engines:
     categories : general
     shortcut : mg
     disabled : True
+    about:
+      website: https://metager.org/
+      wikidata_id: Q1924645
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : microsoft academic
     engine : microsoft_academic
@@ -589,6 +718,13 @@ engines:
     disabled: True
     timeout: 5.0
     shortcut : npm
+    about:
+      website: https://npms.io/
+      wikidata_id: Q7067518
+      official_api_documentation: https://api-docs.npms.io/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
 # Requires Tor
   - name : not evil
@@ -617,6 +753,13 @@ engines:
     categories : science
     shortcut : oad
     timeout: 5.0
+    about:
+      website: https://www.openaire.eu/
+      wikidata_id: Q25106053
+      official_api_documentation: https://api.openaire.eu/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
   - name : openairepublications
     engine : json_engine
@@ -629,6 +772,13 @@ engines:
     categories : science
     shortcut : oap
     timeout: 5.0
+    about:
+      website: https://www.openaire.eu/
+      wikidata_id: Q25106053
+      official_api_documentation: https://api.openaire.eu/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
 #  - name : opensemanticsearch
 #    engine : opensemantic
@@ -650,6 +800,13 @@ engines:
     timeout : 4.0
     disabled : True
     shortcut : or
+    about:
+      website: https://openrepos.net/
+      wikidata_id:
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : pdbe
     engine : pdbe
@@ -768,6 +925,13 @@ engines:
     content_xpath : .//div[@class="search-result-abstract"]
     shortcut : se
     categories : science
+    about:
+      website: https://www.semanticscholar.org/
+      wikidata_id: Q22908627
+      official_api_documentation: https://api.semanticscholar.org/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 # Spotify needs API credentials
 #  - name : spotify
@@ -876,6 +1040,9 @@ engines:
     number_of_results : 5
     search_type : text
     disabled : True
+    about:
+      website: https://www.wikibooks.org/
+      wikidata_id: Q367
 
   - name : wikinews
     engine : mediawiki
@@ -885,6 +1052,9 @@ engines:
     number_of_results : 5
     search_type : text
     disabled : True
+    about:
+      website: https://www.wikinews.org/
+      wikidata_id: Q964
 
   - name : wikiquote
     engine : mediawiki
@@ -896,6 +1066,9 @@ engines:
     disabled : True
     additional_tests:
       rosebud: *test_rosebud
+    about:
+      website: https://www.wikiquote.org/
+      wikidata_id: Q369
 
   - name : wikisource
     engine : mediawiki
@@ -905,6 +1078,9 @@ engines:
     number_of_results : 5
     search_type : text
     disabled : True
+    about:
+      website: https://www.wikisource.org/
+      wikidata_id: Q263
 
   - name : wiktionary
     engine : mediawiki
@@ -914,6 +1090,9 @@ engines:
     number_of_results : 5
     search_type : text
     disabled : True
+    about:
+      website: https://www.wiktionary.org/
+      wikidata_id: Q151
 
   - name : wikiversity
     engine : mediawiki
@@ -923,6 +1102,9 @@ engines:
     number_of_results : 5
     search_type : text
     disabled : True
+    about:
+      website: https://www.wikiversity.org/
+      wikidata_id: Q370
 
   - name : wikivoyage
     engine : mediawiki
@@ -932,6 +1114,9 @@ engines:
     number_of_results : 5
     search_type : text
     disabled : True
+    about:
+      website: https://www.wikivoyage.org/
+      wikidata_id: Q373
 
   - name : wolframalpha
     shortcut : wa
@@ -979,6 +1164,13 @@ engines:
     first_page_num : 0
     page_size : 10
     disabled : True
+    about:
+      website: https://www.seznam.cz/
+      wikidata_id: Q3490485
+      official_api_documentation: https://api.sklik.cz/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : mojeek
     shortcut: mjk
@@ -993,6 +1185,13 @@ engines:
     first_page_num : 0
     page_size : 10
     disabled : True
+    about:
+      website: https://www.mojeek.com/
+      wikidata_id: Q60747299
+      official_api_documentation: https://www.mojeek.com/services/api.html/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : naver
     shortcut: nvr
@@ -1007,6 +1206,13 @@ engines:
     first_page_num : 1
     page_size : 10
     disabled : True
+    about:
+      website: https://www.naver.com/
+      wikidata_id: Q485639
+      official_api_documentation: https://developers.naver.com/docs/nmt/examples/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : rubygems
     shortcut: rbg
@@ -1021,6 +1227,13 @@ engines:
     first_page_num : 1
     categories: it
     disabled : True
+    about:
+      website: https://rubygems.org/
+      wikidata_id: Q1853420
+      official_api_documentation: https://guides.rubygems.org/rubygems-org-api/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
   - name : peertube
     engine: peertube

+ 206 - 0
utils/fetch_engine_descriptions.py

@@ -0,0 +1,206 @@
+#!/usr/bin/env python
+
+import sys
+import json
+from urllib.parse import quote, urlparse
+from os.path import realpath, dirname
+import cld3
+from lxml.html import fromstring
+
+# set path
+sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
+
+from searx.engines.wikidata import send_wikidata_query
+from searx.utils import extract_text
+import searx
+import searx.search
+import searx.poolrequests
+
+SPARQL_WIKIPEDIA_ARTICLE = """
+SELECT DISTINCT ?item ?name
+WHERE {
+  VALUES ?item { %IDS% }
+  ?article schema:about ?item ;
+              schema:inLanguage ?lang ;
+              schema:name ?name ;
+              schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] .
+  FILTER(?lang in (%LANGUAGES_SPARQL%)) .
+  FILTER (!CONTAINS(?name, ':')) .
+}
+"""
+
+SPARQL_DESCRIPTION = """
+SELECT DISTINCT ?item ?itemDescription
+WHERE {
+  VALUES ?item { %IDS% }
+  ?item schema:description ?itemDescription .
+  FILTER (lang(?itemDescription) in (%LANGUAGES_SPARQL%))
+}
+ORDER BY ?itemLang
+"""
+
+LANGUAGES = searx.settings['locales'].keys()
+LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
+IDS = None
+
+descriptions = {}
+wd_to_engine_name = {}
+
+
+def normalize_description(description):
+    for c in [chr(c) for c in range(0, 31)]:
+        description = description.replace(c, ' ')
+    description = ' '.join(description.strip().split())
+    return description
+
+
+def update_description(engine_name, lang, description, source, replace=True):
+    if replace or lang not in descriptions[engine_name]:
+        descriptions[engine_name][lang] = [normalize_description(description), source]
+
+
+def get_wikipedia_summary(language, pageid):
+    search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
+    url = search_url.format(title=quote(pageid), language=language)
+    try:
+        response = searx.poolrequests.get(url)
+        response.raise_for_status()
+        api_result = json.loads(response.text)
+        return api_result.get('extract')
+    except:
+        return None
+
+
+def detect_language(text):
+    r = cld3.get_language(str(text))  # pylint: disable=E1101
+    if r is not None and r.probability >= 0.98 and r.is_reliable:
+        return r.language
+    return None
+
+
+def get_website_description(url, lang1, lang2=None):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+        'DNT': '1',
+        'Upgrade-Insecure-Requests': '1',
+        'Sec-GPC': '1',
+        'Cache-Control': 'max-age=0',
+    }
+    if lang1 is not None:
+        lang_list = [lang1]
+        if lang2 is not None:
+            lang_list.append(lang2)
+        headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8'
+    try:
+        response = searx.poolrequests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+    except Exception:
+        return (None, None)
+
+    try:
+        html = fromstring(response.text)
+    except ValueError:
+        html = fromstring(response.content)
+
+    description = extract_text(html.xpath('/html/head/meta[@name="description"]/@content'))
+    if not description:
+        description = extract_text(html.xpath('/html/head/meta[@property="og:description"]/@content'))
+    if not description:
+        description = extract_text(html.xpath('/html/head/title'))
+    lang = extract_text(html.xpath('/html/@lang'))
+    if lang is None and len(lang1) > 0:
+        lang = lang1
+    lang = detect_language(description) or lang or 'en'
+    lang = lang.split('_')[0]
+    lang = lang.split('-')[0]
+    return (lang, description)
+
+
+def initialize():
+    global descriptions, wd_to_engine_name, IDS
+    searx.search.initialize()
+    for engine_name, engine in searx.engines.engines.items():
+        descriptions[engine_name] = {}
+        wikidata_id = getattr(engine, "about", {}).get('wikidata_id')
+        if wikidata_id is not None:
+            wd_to_engine_name.setdefault(wikidata_id, set()).add(engine_name)
+
+    IDS = ' '.join(list(map(lambda wd_id: 'wd:' + wd_id, wd_to_engine_name.keys())))
+
+
+def fetch_wikidata_descriptions():
+    global IDS
+    result = send_wikidata_query(SPARQL_DESCRIPTION
+                                 .replace('%IDS%', IDS)
+                                 .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
+    if result is not None:
+        for binding in result['results']['bindings']:
+            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+            lang = binding['itemDescription']['xml:lang']
+            description = binding['itemDescription']['value']
+            if ' ' in description:  # skip unique word description (like "website")
+                for engine_name in wd_to_engine_name[wikidata_id]:
+                    update_description(engine_name, lang, description, 'wikidata')
+
+
+def fetch_wikipedia_descriptions():
+    global IDS
+    result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE
+                                 .replace('%IDS%', IDS)
+                                 .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
+    if result is not None:
+        for binding in result['results']['bindings']:
+            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+            lang = binding['name']['xml:lang']
+            pageid = binding['name']['value']
+            description = get_wikipedia_summary(lang, pageid)
+            if description is not None and ' ' in description:
+                for engine_name in wd_to_engine_name[wikidata_id]:
+                    update_description(engine_name, lang, description, 'wikipedia')
+
+
+def normalize_url(url):
+    url = url.replace('{language}', 'en')
+    url = urlparse(url)._replace(path='/', params='', query='', fragment='').geturl()
+    url = url.replace('https://api.', 'https://')
+    return url
+
+
+def fetch_website_description(engine_name, website):
+    default_lang, default_description = get_website_description(website, None, None)
+    if default_lang is None or default_description is None:
+        return
+    if default_lang not in descriptions[engine_name]:
+        descriptions[engine_name][default_lang] = [normalize_description(default_description), website]
+    for request_lang in ('en-US', 'es-US', 'fr-FR', 'zh', 'ja', 'ru', 'ar', 'ko'):
+        if request_lang.split('-')[0] not in descriptions[engine_name]:
+            lang, desc = get_website_description(website, request_lang, request_lang.split('-')[0])
+            if desc is not None and desc != default_description:
+                update_description(engine_name, lang, desc, website, replace=False)
+            else:
+                break
+
+
+def fetch_website_descriptions():
+    for engine_name, engine in searx.engines.engines.items():
+        website = getattr(engine, "about", {}).get('website')
+        if website is None:
+            website = normalize_url(getattr(engine, "search_url"))
+        if website is None:
+            website = normalize_url(getattr(engine, "base_url"))
+        if website is not None:
+            fetch_website_description(engine_name, website)
+
+
+def main():
+    initialize()
+    fetch_wikidata_descriptions()
+    fetch_wikipedia_descriptions()
+    fetch_website_descriptions()
+
+    sys.stdout.write(json.dumps(descriptions, indent=1, separators=(',', ':'), ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    main()