Browse Source

Merge pull request #2460 from dalf/engine-about

[enh] engines: add about variable
Alexandre Flament 4 years ago
parent
commit
1d13ad8452
90 changed files with 1421 additions and 725 deletions
  1. 14 0
      searx/engines/1337x.py
  2. 11 7
      searx/engines/acgsou.py
  3. 11 8
      searx/engines/ahmia.py
  4. 10 7
      searx/engines/apkmirror.py
  5. 12 8
      searx/engines/archlinux.py
  6. 10 9
      searx/engines/arxiv.py
  7. 10 11
      searx/engines/base.py
  8. 11 11
      searx/engines/bing.py
  9. 11 10
      searx/engines/bing_images.py
  10. 11 9
      searx/engines/bing_news.py
  11. 11 8
      searx/engines/bing_videos.py
  12. 14 8
      searx/engines/btdigg.py
  13. 4 15
      searx/engines/command.py
  14. 14 0
      searx/engines/currency_convert.py
  15. 11 10
      searx/engines/dailymotion.py
  16. 11 8
      searx/engines/deezer.py
  17. 11 10
      searx/engines/deviantart.py
  18. 10 7
      searx/engines/dictzone.py
  19. 10 8
      searx/engines/digbt.py
  20. 11 8
      searx/engines/digg.py
  21. 14 10
      searx/engines/doku.py
  22. 11 11
      searx/engines/duckduckgo.py
  23. 12 8
      searx/engines/duckduckgo_definitions.py
  24. 14 11
      searx/engines/duckduckgo_images.py
  25. 11 6
      searx/engines/duden.py
  26. 11 3
      searx/engines/dummy-offline.py
  27. 11 3
      searx/engines/dummy.py
  28. 14 9
      searx/engines/ebay.py
  29. 5 0
      searx/engines/elasticsearch.py
  30. 11 7
      searx/engines/etools.py
  31. 11 7
      searx/engines/fdroid.py
  32. 11 9
      searx/engines/flickr.py
  33. 12 11
      searx/engines/flickr_noapi.py
  34. 11 8
      searx/engines/framalibre.py
  35. 15 8
      searx/engines/frinkiac.py
  36. 12 9
      searx/engines/genius.py
  37. 11 9
      searx/engines/gentoo.py
  38. 10 8
      searx/engines/gigablast.py
  39. 12 9
      searx/engines/github.py
  40. 14 12
      searx/engines/google.py
  41. 10 12
      searx/engines/google_images.py
  42. 11 8
      searx/engines/google_news.py
  43. 11 8
      searx/engines/google_videos.py
  44. 13 12
      searx/engines/ina.py
  45. 14 9
      searx/engines/invidious.py
  46. 2 0
      searx/engines/json_engine.py
  47. 11 8
      searx/engines/kickass.py
  48. 12 11
      searx/engines/mediawiki.py
  49. 12 8
      searx/engines/microsoft_academic.py
  50. 11 8
      searx/engines/mixcloud.py
  51. 11 8
      searx/engines/not_evil.py
  52. 11 7
      searx/engines/nyaa.py
  53. 13 9
      searx/engines/opensemantic.py
  54. 11 8
      searx/engines/openstreetmap.py
  55. 11 8
      searx/engines/pdbe.py
  56. 11 10
      searx/engines/peertube.py
  57. 11 8
      searx/engines/photon.py
  58. 14 9
      searx/engines/piratebay.py
  59. 13 9
      searx/engines/pubmed.py
  60. 10 8
      searx/engines/qwant.py
  61. 11 6
      searx/engines/recoll.py
  62. 11 8
      searx/engines/reddit.py
  63. 11 8
      searx/engines/scanr_structures.py
  64. 11 9
      searx/engines/searchcode_code.py
  65. 10 8
      searx/engines/searx_engine.py
  66. 14 8
      searx/engines/sepiasearch.py
  67. 10 8
      searx/engines/soundcloud.py
  68. 11 8
      searx/engines/spotify.py
  69. 12 9
      searx/engines/stackoverflow.py
  70. 14 11
      searx/engines/startpage.py
  71. 11 8
      searx/engines/tokyotoshokan.py
  72. 11 9
      searx/engines/torrentz.py
  73. 11 7
      searx/engines/translated.py
  74. 11 8
      searx/engines/unsplash.py
  75. 14 13
      searx/engines/vimeo.py
  76. 11 9
      searx/engines/wikidata.py
  77. 11 8
      searx/engines/wikipedia.py
  78. 14 9
      searx/engines/wolframalpha_api.py
  79. 14 9
      searx/engines/wolframalpha_noapi.py
  80. 11 8
      searx/engines/www1x.py
  81. 2 0
      searx/engines/xpath.py
  82. 14 13
      searx/engines/yacy.py
  83. 11 9
      searx/engines/yahoo.py
  84. 14 10
      searx/engines/yahoo_news.py
  85. 11 7
      searx/engines/yandex.py
  86. 14 9
      searx/engines/yggtorrent.py
  87. 14 9
      searx/engines/youtube_api.py
  88. 14 9
      searx/engines/youtube_noapi.py
  89. 213 0
      searx/settings.yml
  90. 206 0
      utils/fetch_engine_descriptions.py

+ 14 - 0
searx/engines/1337x.py

@@ -1,7 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ 1337x
+"""
+
 from urllib.parse import quote, urljoin
 from urllib.parse import quote, urljoin
 from lxml import html
 from lxml import html
 from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
 from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex
 
 
+# about
+about = {
+    "website": 'https://1337x.to/',
+    "wikidata_id": 'Q28134166',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 
 url = 'https://1337x.to/'
 url = 'https://1337x.to/'
 search_url = url + 'search/{search_term}/{pageno}/'
 search_url = url + 'search/{search_term}/{pageno}/'

+ 11 - 7
searx/engines/acgsou.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Acgsou (Japanese Animation/Music/Comics Bittorrent tracker)
  Acgsou (Japanese Animation/Music/Comics Bittorrent tracker)
-
- @website      https://www.acgsou.com/
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, content, seed, leech, torrentfile
 """
 """
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex
 from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex
 
 
+# about
+about = {
+    "website": 'https://www.acgsou.com/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['files', 'images', 'videos', 'music']
 categories = ['files', 'images', 'videos', 'music']
 paging = True
 paging = True

+ 11 - 8
searx/engines/ahmia.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Ahmia (Onions)
  Ahmia (Onions)
-
- @website      http://msydqstlz2kzerdg.onion
- @provides-api no
-
- @using-api    no
- @results      HTML
- @stable       no
- @parse        url, title, content
 """
 """
 
 
 from urllib.parse import urlencode, urlparse, parse_qs
 from urllib.parse import urlencode, urlparse, parse_qs
 from lxml.html import fromstring
 from lxml.html import fromstring
 from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath
 from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath
 
 
+# about
+about = {
+    "website": 'http://msydqstlz2kzerdg.onion',
+    "wikidata_id": 'Q18693938',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine config
 # engine config
 categories = ['onions']
 categories = ['onions']
 paging = True
 paging = True

+ 10 - 7
searx/engines/apkmirror.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  APK Mirror
  APK Mirror
-
- @website     https://www.apkmirror.com
-
- @using-api   no
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, thumbnail_src
 """
 """
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 
 
+# about
+about = {
+    "website": 'https://www.apkmirror.com',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 12 - 8
searx/engines/archlinux.py

@@ -1,20 +1,24 @@
-# -*- coding: utf-8 -*-
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Arch Linux Wiki
  Arch Linux Wiki
 
 
- @website      https://wiki.archlinux.org
- @provide-api  no (Mediawiki provides API, but Arch Wiki blocks access to it
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title
+ API: Mediawiki provides API, but Arch Wiki blocks access to it
 """
 """
 
 
 from urllib.parse import urlencode, urljoin
 from urllib.parse import urlencode, urljoin
 from lxml import html
 from lxml import html
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 
 
+# about
+about = {
+    "website": 'https://wiki.archlinux.org/',
+    "wikidata_id": 'Q101445877',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']
 language_support = True
 language_support = True

+ 10 - 9
searx/engines/arxiv.py

@@ -1,20 +1,21 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  ArXiV (Scientific preprints)
  ArXiV (Scientific preprints)
- @website     https://arxiv.org
- @provide-api yes (export.arxiv.org/api/query)
- @using-api   yes
- @results     XML-RSS
- @stable      yes
- @parse       url, title, publishedDate, content
- More info on api: https://arxiv.org/help/api/user-manual
 """
 """
 
 
 from lxml import html
 from lxml import html
 from datetime import datetime
 from datetime import datetime
 from searx.utils import eval_xpath_list, eval_xpath_getindex
 from searx.utils import eval_xpath_list, eval_xpath_getindex
 
 
+# about
+about = {
+    "website": 'https://arxiv.org',
+    "wikidata_id": 'Q118398',
+    "official_api_documentation": 'https://arxiv.org/help/api',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'XML-RSS',
+}
 
 
 categories = ['science']
 categories = ['science']
 paging = True
 paging = True

+ 10 - 11
searx/engines/base.py

@@ -1,16 +1,6 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  BASE (Scholar publications)
  BASE (Scholar publications)
-
- @website     https://base-search.net
- @provide-api yes with authorization (https://api.base-search.net/)
-
- @using-api   yes
- @results     XML
- @stable      ?
- @parse       url, title, publishedDate, content
- More info on api: http://base-search.net/about/download/base_interface.pdf
 """
 """
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
@@ -19,6 +9,15 @@ from datetime import datetime
 import re
 import re
 from searx.utils import searx_useragent
 from searx.utils import searx_useragent
 
 
+# about
+about = {
+    "website": 'https://base-search.net',
+    "wikidata_id": 'Q448335',
+    "official_api_documentation": 'https://api.base-search.net/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'XML',
+}
 
 
 categories = ['science']
 categories = ['science']
 
 

+ 11 - 11
searx/engines/bing.py

@@ -1,16 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Bing (Web)
  Bing (Web)
-
- @website     https://www.bing.com
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
-              max. 5000 query/month
-
- @using-api   no (because of query limit)
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
-
- @todo        publishedDate
 """
 """
 
 
 import re
 import re
@@ -21,6 +11,16 @@ from searx.utils import eval_xpath, extract_text, match_language
 
 
 logger = logger.getChild('bing engine')
 logger = logger.getChild('bing engine')
 
 
+# about
+about = {
+    "website": 'https://www.bing.com',
+    "wikidata_id": 'Q182496',
+    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
 paging = True
 paging = True

+ 11 - 10
searx/engines/bing_images.py

@@ -1,15 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Bing (Images)
  Bing (Images)
-
- @website     https://www.bing.com/images
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
-              max. 5000 query/month
-
- @using-api   no (because of query limit)
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, img_src
-
 """
 """
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
@@ -20,6 +11,16 @@ from searx.utils import match_language
 from searx.engines.bing import language_aliases
 from searx.engines.bing import language_aliases
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 
 
+# about
+about = {
+    "website": 'https://www.bing.com/images',
+    "wikidata_id": 'Q182496',
+    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']
 paging = True
 paging = True

+ 11 - 9
searx/engines/bing_news.py

@@ -1,14 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Bing (News)
  Bing (News)
-
- @website     https://www.bing.com/news
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
-              max. 5000 query/month
-
- @using-api   no (because of query limit)
- @results     RSS (using search portal)
- @stable      yes (except perhaps for the images)
- @parse       url, title, content, publishedDate, thumbnail
 """
 """
 
 
 from datetime import datetime
 from datetime import datetime
@@ -20,6 +12,16 @@ from searx.utils import match_language, eval_xpath_getindex
 from searx.engines.bing import language_aliases
 from searx.engines.bing import language_aliases
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 
 
+# about
+about = {
+    "website": 'https://www.bing.com/news',
+    "wikidata_id": 'Q2878637',
+    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'RSS',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['news']
 categories = ['news']
 paging = True
 paging = True

+ 11 - 8
searx/engines/bing_videos.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Bing (Videos)
  Bing (Videos)
-
- @website     https://www.bing.com/videos
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search)
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, content, thumbnail
 """
 """
 
 
 from json import loads
 from json import loads
@@ -18,6 +11,16 @@ from searx.utils import match_language
 from searx.engines.bing import language_aliases
 from searx.engines.bing import language_aliases
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 
 
+# about
+about = {
+    "website": 'https://www.bing.com/videos',
+    "wikidata_id": 'Q4914152',
+    "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['videos']
 categories = ['videos']
 paging = True
 paging = True
 safesearch = True
 safesearch = True

+ 14 - 8
searx/engines/btdigg.py

@@ -1,19 +1,25 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  BTDigg (Videos, Music, Files)
  BTDigg (Videos, Music, Files)
-
- @website     https://btdig.com
- @provide-api yes (on demand)
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content, seed, leech, magnetlink
 """
 """
 
 
 from lxml import html
 from lxml import html
 from urllib.parse import quote, urljoin
 from urllib.parse import quote, urljoin
 from searx.utils import extract_text, get_torrent_size
 from searx.utils import extract_text, get_torrent_size
 
 
+# about
+about = {
+    "website": 'https://btdig.com',
+    "wikidata_id": 'Q4836698',
+    "official_api_documentation": {
+        'url': 'https://btdig.com/contacts',
+        'comment': 'on demand'
+    },
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']
 paging = True
 paging = True

+ 4 - 15
searx/engines/command.py

@@ -1,18 +1,7 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-'''
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Command (offline)
+"""
 
 
 import re
 import re
 from os.path import expanduser, isabs, realpath, commonprefix
 from os.path import expanduser, isabs, realpath, commonprefix

+ 14 - 0
searx/engines/currency_convert.py

@@ -1,5 +1,19 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ currency convert (DuckDuckGo)
+"""
+
 import json
 import json
 
 
+# about
+about = {
+    "website": 'https://duckduckgo.com/',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": 'https://duckduckgo.com/api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSONP',
+}
 
 
 engine_type = 'online_currency'
 engine_type = 'online_currency'
 categories = []
 categories = []

+ 11 - 10
searx/engines/dailymotion.py

@@ -1,15 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Dailymotion (Videos)
  Dailymotion (Videos)
-
- @website     https://www.dailymotion.com
- @provide-api yes (http://www.dailymotion.com/developer)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, thumbnail, publishedDate, embedded
-
- @todo        set content-parameter with correct data
 """
 """
 
 
 from json import loads
 from json import loads
@@ -17,6 +8,16 @@ from datetime import datetime
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from searx.utils import match_language, html_to_text
 from searx.utils import match_language, html_to_text
 
 
+# about
+about = {
+    "website": 'https://www.dailymotion.com',
+    "wikidata_id": 'Q769222',
+    "official_api_documentation": 'https://www.dailymotion.com/developer',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']
 paging = True
 paging = True

+ 11 - 8
searx/engines/deezer.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Deezer (Music)
  Deezer (Music)
-
- @website     https://deezer.com
- @provide-api yes (http://developers.deezer.com/api/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, embedded
 """
 """
 
 
 from json import loads
 from json import loads
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
+# about
+about = {
+    "website": 'https://deezer.com',
+    "wikidata_id": 'Q602243',
+    "official_api_documentation": 'https://developers.deezer.com/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']
 paging = True
 paging = True

+ 11 - 10
searx/engines/deviantart.py

@@ -1,21 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Deviantart (Images)
  Deviantart (Images)
-
- @website     https://www.deviantart.com/
- @provide-api yes (https://www.deviantart.com/developers/) (RSS)
-
- @using-api   no (TODO, rewrite to api)
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, img_src
-
- @todo        rewrite to api
 """
 """
 # pylint: disable=missing-function-docstring
 # pylint: disable=missing-function-docstring
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 
 
+# about
+about = {
+    "website": 'https://www.deviantart.com/',
+    "wikidata_id": 'Q46523',
+    "official_api_documentation": 'https://www.deviantart.com/developers/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']
 paging = True
 paging = True

+ 10 - 7
searx/engines/dictzone.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Dictzone
  Dictzone
-
- @website     https://dictzone.com/
- @provide-api no
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
 """
 """
 
 
 from urllib.parse import urljoin
 from urllib.parse import urljoin
 from lxml import html
 from lxml import html
 from searx.utils import eval_xpath
 from searx.utils import eval_xpath
 
 
+# about
+about = {
+    "website": 'https://dictzone.com/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 
 engine_type = 'online_dictionnary'
 engine_type = 'online_dictionnary'
 categories = ['general']
 categories = ['general']

+ 10 - 8
searx/engines/digbt.py

@@ -1,19 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  DigBT (Videos, Music, Files)
  DigBT (Videos, Music, Files)
-
- @website     https://digbt.org
- @provide-api no
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content, magnetlink
 """
 """
 
 
 from urllib.parse import urljoin
 from urllib.parse import urljoin
 from lxml import html
 from lxml import html
 from searx.utils import extract_text, get_torrent_size
 from searx.utils import extract_text, get_torrent_size
 
 
+# about
+about = {
+    "website": 'https://digbt.org',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']
 paging = True
 paging = True

+ 11 - 8
searx/engines/digg.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Digg (News, Social media)
  Digg (News, Social media)
-
- @website     https://digg.com
- @provide-api no
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content, publishedDate, thumbnail
 """
 """
 # pylint: disable=missing-function-docstring
 # pylint: disable=missing-function-docstring
 
 
@@ -17,6 +10,16 @@ from datetime import datetime
 
 
 from lxml import html
 from lxml import html
 
 
+# about
+about = {
+    "website": 'https://digg.com',
+    "wikidata_id": 'Q270478',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['news', 'social media']
 categories = ['news', 'social media']
 paging = True
 paging = True

+ 14 - 10
searx/engines/doku.py

@@ -1,18 +1,22 @@
-# Doku Wiki
-#
-# @website     https://www.dokuwiki.org/
-# @provide-api yes
-#              (https://www.dokuwiki.org/devel:xmlrpc)
-#
-# @using-api   no
-# @results     HTML
-# @stable      yes
-# @parse       (general)    url, title, content
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Doku Wiki
+"""
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from lxml.html import fromstring
 from lxml.html import fromstring
 from searx.utils import extract_text, eval_xpath
 from searx.utils import extract_text, eval_xpath
 
 
+# about
+about = {
+    "website": 'https://www.dokuwiki.org/',
+    "wikidata_id": 'Q851864',
+    "official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
 categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
 paging = False
 paging = False

+ 11 - 11
searx/engines/duckduckgo.py

@@ -1,22 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  DuckDuckGo (Web)
  DuckDuckGo (Web)
-
- @website     https://duckduckgo.com/
- @provide-api yes (https://duckduckgo.com/api),
-              but not all results from search-site
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
-
- @todo        rewrite to api
 """
 """
 
 
 from lxml.html import fromstring
 from lxml.html import fromstring
 from json import loads
 from json import loads
 from searx.utils import extract_text, match_language, eval_xpath
 from searx.utils import extract_text, match_language, eval_xpath
 
 
+# about
+about = {
+    "website": 'https://duckduckgo.com/',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": 'https://duckduckgo.com/api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
 paging = False
 paging = False

+ 12 - 8
searx/engines/duckduckgo_definitions.py

@@ -1,12 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
-DuckDuckGo (definitions)
-
-- `Instant Answer API`_
-- `DuckDuckGo query`_
-
-.. _Instant Answer API: https://duckduckgo.com/api
-.. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1
-
+ DuckDuckGo (Instant Answer API)
 """
 """
 
 
 import json
 import json
@@ -22,6 +16,16 @@ from searx.external_urls import get_external_url, get_earth_coordinates_url, are
 
 
 logger = logger.getChild('duckduckgo_definitions')
 logger = logger.getChild('duckduckgo_definitions')
 
 
+# about
+about = {
+    "website": 'https://duckduckgo.com/',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": 'https://duckduckgo.com/api',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 URL = 'https://api.duckduckgo.com/'\
 URL = 'https://api.duckduckgo.com/'\
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
     + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
 
 

+ 14 - 11
searx/engines/duckduckgo_images.py

@@ -1,16 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  DuckDuckGo (Images)
  DuckDuckGo (Images)
-
- @website     https://duckduckgo.com/
- @provide-api yes (https://duckduckgo.com/api),
-              but images are not supported
-
- @using-api   no
- @results     JSON (site requires js to get images)
- @stable      no (JSON can change)
- @parse       url, title, img_src
-
- @todo        avoid extra request
 """
 """
 
 
 from json import loads
 from json import loads
@@ -20,6 +10,19 @@ from searx.engines.duckduckgo import get_region_code
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 from searx.poolrequests import get
 from searx.poolrequests import get
 
 
+# about
+about = {
+    "website": 'https://duckduckgo.com/',
+    "wikidata_id": 'Q12805',
+    "official_api_documentation": {
+        'url': 'https://duckduckgo.com/api',
+        'comment': 'but images are not supported',
+    },
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON (site requires js to get images)',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']
 paging = True
 paging = True

+ 11 - 6
searx/engines/duden.py

@@ -1,11 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Duden
  Duden
- @website     https://www.duden.de
- @provide-api no
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
 """
 """
 
 
 import re
 import re
@@ -13,6 +8,16 @@ from urllib.parse import quote, urljoin
 from lxml import html
 from lxml import html
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 
 
+# about
+about = {
+    "website": 'https://www.duden.de',
+    "wikidata_id": 'Q73624591',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['general']
 categories = ['general']
 paging = True
 paging = True
 language_support = False
 language_support = False

+ 11 - 3
searx/engines/dummy-offline.py

@@ -1,11 +1,19 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Dummy Offline
  Dummy Offline
-
- @results     one result
- @stable      yes
 """
 """
 
 
 
 
+# about
+about = {
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
+
 def search(query, request_params):
 def search(query, request_params):
     return [{
     return [{
         'result': 'this is what you get',
         'result': 'this is what you get',

+ 11 - 3
searx/engines/dummy.py

@@ -1,10 +1,18 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Dummy
  Dummy
-
- @results     empty array
- @stable      yes
 """
 """
 
 
+# about
+about = {
+    "website": None,
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'empty array',
+}
+
 
 
 # do search-request
 # do search-request
 def request(query, params):
 def request(query, params):

+ 14 - 9
searx/engines/ebay.py

@@ -1,17 +1,22 @@
-#  Ebay (Videos, Music, Files)
-#
-# @website     https://www.ebay.com
-# @provide-api no (nothing found)
-#
-# @using-api   no
-# @results     HTML (using search portal)
-# @stable      yes (HTML can change)
-# @parse       url, title, content, price, shipping, source
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Ebay (Videos, Music, Files)
+"""
 
 
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 from urllib.parse import quote
 from urllib.parse import quote
 
 
+# about
+about = {
+    "website": 'https://www.ebay.com',
+    "wikidata_id": 'Q58024',
+    "official_api_documentation": 'https://developer.ebay.com/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['shopping']
 categories = ['shopping']
 paging = True
 paging = True
 
 

+ 5 - 0
searx/engines/elasticsearch.py

@@ -1,3 +1,8 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Elasticsearch
+"""
+
 from json import loads, dumps
 from json import loads, dumps
 from requests.auth import HTTPBasicAuth
 from requests.auth import HTTPBasicAuth
 from searx.exceptions import SearxEngineAPIException
 from searx.exceptions import SearxEngineAPIException

+ 11 - 7
searx/engines/etools.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  eTools (Web)
  eTools (Web)
-
- @website      https://www.etools.ch
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, content
 """
 """
 
 
 from lxml import html
 from lxml import html
 from urllib.parse import quote
 from urllib.parse import quote
 from searx.utils import extract_text, eval_xpath
 from searx.utils import extract_text, eval_xpath
 
 
+# about
+about = {
+    "website": 'https://www.etools.ch',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['general']
 categories = ['general']
 paging = False
 paging = False
 language_support = False
 language_support = False

+ 11 - 7
searx/engines/fdroid.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  F-Droid (a repository of FOSS applications for Android)
  F-Droid (a repository of FOSS applications for Android)
-
- @website      https://f-droid.org/
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, content
 """
 """
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.utils import extract_text
 from searx.utils import extract_text
 
 
+# about
+about = {
+    "website": 'https://f-droid.org/',
+    "wikidata_id": 'Q1386210',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['files']
 categories = ['files']
 paging = True
 paging = True

+ 11 - 9
searx/engines/flickr.py

@@ -1,21 +1,23 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Flickr (Images)
  Flickr (Images)
 
 
- @website     https://www.flickr.com
- @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, thumbnail, img_src
  More info on api-key : https://www.flickr.com/services/apps/create/
  More info on api-key : https://www.flickr.com/services/apps/create/
 """
 """
 
 
 from json import loads
 from json import loads
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
+# about
+about = {
+    "website": 'https://www.flickr.com',
+    "wikidata_id": 'Q103204',
+    "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
+    "use_official_api": True,
+    "require_api_key": True,
+    "results": 'JSON',
+}
+
 categories = ['images']
 categories = ['images']
 
 
 nb_per_page = 15
 nb_per_page = 15

+ 12 - 11
searx/engines/flickr_noapi.py

@@ -1,15 +1,6 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
-  Flickr (Images)
-
- @website     https://www.flickr.com
- @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, thumbnail, img_src
+ Flickr (Images)
 """
 """
 
 
 from json import loads
 from json import loads
@@ -21,6 +12,16 @@ from searx.utils import ecma_unescape, html_to_text
 
 
 logger = logger.getChild('flickr-noapi')
 logger = logger.getChild('flickr-noapi')
 
 
+# about
+about = {
+    "website": 'https://www.flickr.com',
+    "wikidata_id": 'Q103204',
+    "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 categories = ['images']
 categories = ['images']
 
 
 url = 'https://www.flickr.com/'
 url = 'https://www.flickr.com/'

+ 11 - 8
searx/engines/framalibre.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  FramaLibre (It)
  FramaLibre (It)
-
- @website     https://framalibre.org/
- @provide-api no
-
- @using-api   no
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, content, thumbnail, img_src
 """
 """
 
 
 from html import escape
 from html import escape
@@ -15,6 +8,16 @@ from urllib.parse import urljoin, urlencode
 from lxml import html
 from lxml import html
 from searx.utils import extract_text
 from searx.utils import extract_text
 
 
+# about
+about = {
+    "website": 'https://framalibre.org/',
+    "wikidata_id": 'Q30213882',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']
 paging = True
 paging = True

+ 15 - 8
searx/engines/frinkiac.py

@@ -1,17 +1,24 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
-Frinkiac (Images)
-
-@website     https://www.frinkiac.com
-@provide-api no
-@using-api   no
-@results     JSON
-@stable      no
-@parse       url, title, img_src
+ Frinkiac (Images)
 """
 """
 
 
 from json import loads
 from json import loads
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
+# about
+about = {
+    "website": 'https://frinkiac.com',
+    "wikidata_id": 'Q24882614',
+    "official_api_documentation": {
+        'url': None,
+        'comment': 'see https://github.com/MitchellAW/CompuGlobal'
+    },
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 categories = ['images']
 categories = ['images']
 
 
 BASE = 'https://frinkiac.com/'
 BASE = 'https://frinkiac.com/'

+ 12 - 9
searx/engines/genius.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
-Genius
-
- @website     https://www.genius.com/
- @provide-api yes (https://docs.genius.com/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, thumbnail, publishedDate
+ Genius
 """
 """
 
 
 from json import loads
 from json import loads
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from datetime import datetime
 from datetime import datetime
 
 
+# about
+about = {
+    "website": 'https://genius.com/',
+    "wikidata_id": 'Q3419343',
+    "official_api_documentation": 'https://docs.genius.com/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']
 paging = True
 paging = True

+ 11 - 9
searx/engines/gentoo.py

@@ -1,20 +1,22 @@
-# -*- coding: utf-8 -*-
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Gentoo Wiki
  Gentoo Wiki
-
- @website      https://wiki.gentoo.org
- @provide-api  yes
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title
 """
 """
 
 
 from urllib.parse import urlencode, urljoin
 from urllib.parse import urlencode, urljoin
 from lxml import html
 from lxml import html
 from searx.utils import extract_text
 from searx.utils import extract_text
 
 
+# about
+about = {
+    "website": 'https://wiki.gentoo.org/',
+    "wikidata_id": 'Q1050637',
+    "official_api_documentation": 'https://wiki.gentoo.org/api.php',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']
 language_support = True
 language_support = True

+ 10 - 8
searx/engines/gigablast.py

@@ -1,14 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Gigablast (Web)
  Gigablast (Web)
-
- @website     https://gigablast.com
- @provide-api yes (https://gigablast.com/api.html)
-
- @using-api   yes
- @results     XML
- @stable      yes
- @parse       url, title, content
 """
 """
 # pylint: disable=missing-function-docstring, invalid-name
 # pylint: disable=missing-function-docstring, invalid-name
 
 
@@ -18,6 +10,16 @@ from urllib.parse import urlencode
 # from searx import logger
 # from searx import logger
 from searx.poolrequests import get
 from searx.poolrequests import get
 
 
+# about
+about = {
+    "website": 'https://www.gigablast.com',
+    "wikidata_id": 'Q3105449',
+    "official_api_documentation": 'https://gigablast.com/api.html',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
 # gigablast's pagination is totally damaged, don't use it
 # gigablast's pagination is totally damaged, don't use it

+ 12 - 9
searx/engines/github.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
- Github (It)
-
- @website     https://github.com/
- @provide-api yes (https://developer.github.com/v3/)
-
- @using-api   yes
- @results     JSON
- @stable      yes (using api)
- @parse       url, title, content
+ Github (IT)
 """
 """
 
 
 from json import loads
 from json import loads
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
+# about
+about = {
+    "website": 'https://github.com/',
+    "wikidata_id": 'Q364',
+    "official_api_documentation": 'https://developer.github.com/v3/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']
 
 

+ 14 - 12
searx/engines/google.py

@@ -1,19 +1,11 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Google (Web)
 """Google (Web)
 
 
-:website:     https://www.google.com
-:provide-api: yes (https://developers.google.com/custom-search/)
-:using-api:   not the offical, since it needs registration to another service
-:results:     HTML
-:stable:      no
-:parse:       url, title, content, number_of_results, answer, suggestion, correction
-
-For detailed description of the *REST-full* API see: `Query Parameter
-Definitions`_.
-
-.. _Query Parameter Definitions:
-   https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
+ For detailed description of the *REST-full* API see: `Query Parameter
+ Definitions`_.
 
 
+ .. _Query Parameter Definitions:
+ https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
 """
 """
 
 
 # pylint: disable=invalid-name, missing-function-docstring
 # pylint: disable=invalid-name, missing-function-docstring
@@ -27,6 +19,16 @@ from searx.exceptions import SearxEngineCaptchaException
 
 
 logger = logger.getChild('google engine')
 logger = logger.getChild('google engine')
 
 
+# about
+about = {
+    "website": 'https://www.google.com',
+    "wikidata_id": 'Q9366',
+    "official_api_documentation": 'https://developers.google.com/custom-search/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
 paging = True
 paging = True

+ 10 - 12
searx/engines/google_images.py

@@ -1,14 +1,6 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """Google (Images)
 """Google (Images)
 
 
-:website:     https://images.google.com (redirected to subdomain www.)
-:provide-api: yes (https://developers.google.com/custom-search/)
-:using-api:   not the offical, since it needs registration to another service
-:results:     HTML
-:stable:      no
-:template:    images.html
-:parse:       url, title, content, source, thumbnail_src, img_src
-
 For detailed description of the *REST-full* API see: `Query Parameter
 For detailed description of the *REST-full* API see: `Query Parameter
 Definitions`_.
 Definitions`_.
 
 
@@ -18,10 +10,6 @@ Definitions`_.
    ``data:` scheme).::
    ``data:` scheme).::
 
 
      Header set Content-Security-Policy "img-src 'self' data: ;"
      Header set Content-Security-Policy "img-src 'self' data: ;"
-
-.. _Query Parameter Definitions:
-   https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions
-
 """
 """
 
 
 from urllib.parse import urlencode, urlparse, unquote
 from urllib.parse import urlencode, urlparse, unquote
@@ -39,6 +27,16 @@ from searx.engines.google import (
 
 
 logger = logger.getChild('google images')
 logger = logger.getChild('google images')
 
 
+# about
+about = {
+    "website": 'https://images.google.com/',
+    "wikidata_id": 'Q521550',
+    "official_api_documentation": 'https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions',  # NOQA
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 
 
 categories = ['images']
 categories = ['images']

+ 11 - 8
searx/engines/google_news.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Google (News)
  Google (News)
-
- @website     https://news.google.com
- @provide-api no
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, content, publishedDate
 """
 """
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
@@ -15,6 +8,16 @@ from lxml import html
 from searx.utils import match_language
 from searx.utils import match_language
 from searx.engines.google import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 from searx.engines.google import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import
 
 
+# about
+about = {
+    "website": 'https://news.google.com',
+    "wikidata_id": 'Q12020',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # search-url
 # search-url
 categories = ['news']
 categories = ['news']
 paging = True
 paging = True

+ 11 - 8
searx/engines/google_videos.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Google (Videos)
  Google (Videos)
-
- @website     https://www.google.com
- @provide-api yes (https://developers.google.com/custom-search/)
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, content, thumbnail
 """
 """
 
 
 from datetime import date, timedelta
 from datetime import date, timedelta
@@ -16,6 +9,16 @@ from lxml import html
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex
 import re
 import re
 
 
+# about
+about = {
+    "website": 'https://www.google.com',
+    "wikidata_id": 'Q219885',
+    "official_api_documentation": 'https://developers.google.com/custom-search/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']
 paging = True
 paging = True

+ 13 - 12
searx/engines/ina.py

@@ -1,15 +1,7 @@
-#  INA (Videos)
-#
-# @website     https://www.ina.fr/
-# @provide-api no
-#
-# @using-api   no
-# @results     HTML (using search portal)
-# @stable      no (HTML can change)
-# @parse       url, title, content, publishedDate, thumbnail
-#
-# @todo        set content-parameter with correct data
-# @todo        embedded (needs some md5 from video page)
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ INA (Videos)
+"""
 
 
 from json import loads
 from json import loads
 from html import unescape
 from html import unescape
@@ -18,6 +10,15 @@ from lxml import html
 from dateutil import parser
 from dateutil import parser
 from searx.utils import extract_text
 from searx.utils import extract_text
 
 
+# about
+about = {
+    "website": 'https://www.ina.fr/',
+    "wikidata_id": 'Q1665109',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
 
 
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']

+ 14 - 9
searx/engines/invidious.py

@@ -1,17 +1,22 @@
-# Invidious (Videos)
-#
-# @website     https://invidio.us/
-# @provide-api yes (https://github.com/omarroth/invidious/wiki/API)
-#
-# @using-api   yes
-# @results     JSON
-# @stable      yes
-# @parse       url, title, content, publishedDate, thumbnail, embedded, author, length
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Invidious (Videos)
+"""
 
 
 from urllib.parse import quote_plus
 from urllib.parse import quote_plus
 from dateutil import parser
 from dateutil import parser
 import time
 import time
 
 
+# about
+about = {
+    "website": 'https://instances.invidio.us/',
+    "wikidata_id": 'Q79343316',
+    "official_api_documentation": 'https://github.com/omarroth/invidious/wiki/API',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ["videos", "music"]
 categories = ["videos", "music"]
 paging = True
 paging = True

+ 2 - 0
searx/engines/json_engine.py

@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
 from collections.abc import Iterable
 from collections.abc import Iterable
 from json import loads
 from json import loads
 from urllib.parse import urlencode
 from urllib.parse import urlencode

+ 11 - 8
searx/engines/kickass.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Kickass Torrent (Videos, Music, Files)
  Kickass Torrent (Videos, Music, Files)
-
- @website     https://kickass.so
- @provide-api no (nothing found)
-
- @using-api   no
- @results     HTML (using search portal)
- @stable      yes (HTML can change)
- @parse       url, title, content, seed, leech, magnetlink
 """
 """
 
 
 from lxml import html
 from lxml import html
@@ -15,6 +8,16 @@ from operator import itemgetter
 from urllib.parse import quote, urljoin
 from urllib.parse import quote, urljoin
 from searx.utils import extract_text, get_torrent_size, convert_str_to_int
 from searx.utils import extract_text, get_torrent_size, convert_str_to_int
 
 
+# about
+about = {
+    "website": 'https://kickass.so',
+    "wikidata_id": 'Q17062285',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']
 paging = True
 paging = True

+ 12 - 11
searx/engines/mediawiki.py

@@ -1,21 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
- general mediawiki-engine (Web)
-
- @website     websites built on mediawiki (https://www.mediawiki.org)
- @provide-api yes (http://www.mediawiki.org/wiki/API:Search)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title
-
- @todo        content
+ General mediawiki-engine (Web)
 """
 """
 
 
 from json import loads
 from json import loads
 from string import Formatter
 from string import Formatter
 from urllib.parse import urlencode, quote
 from urllib.parse import urlencode, quote
 
 
+# about
+about = {
+    "website": None,
+    "wikidata_id": None,
+    "official_api_documentation": 'http://www.mediawiki.org/wiki/API:Search',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
 language_support = True
 language_support = True

+ 12 - 8
searx/engines/microsoft_academic.py

@@ -1,12 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
-Microsoft Academic (Science)
-
-@website     https://academic.microsoft.com
-@provide-api yes
-@using-api   no
-@results     JSON
-@stable      no
-@parse       url, title, content
+ Microsoft Academic (Science)
 """
 """
 
 
 from datetime import datetime
 from datetime import datetime
@@ -15,6 +9,16 @@ from uuid import uuid4
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 
+# about
+about = {
+    "website": 'https://academic.microsoft.com',
+    "wikidata_id": 'Q28136779',
+    "official_api_documentation": 'http://ma-graph.org/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 categories = ['images']
 categories = ['images']
 paging = True
 paging = True
 result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'
 result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'

+ 11 - 8
searx/engines/mixcloud.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Mixcloud (Music)
  Mixcloud (Music)
-
- @website     https://http://www.mixcloud.com/
- @provide-api yes (http://www.mixcloud.com/developers/
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, embedded, publishedDate
 """
 """
 
 
 from json import loads
 from json import loads
 from dateutil import parser
 from dateutil import parser
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
+# about
+about = {
+    "website": 'https://www.mixcloud.com/',
+    "wikidata_id": 'Q6883832',
+    "official_api_documentation": 'http://www.mixcloud.com/developers/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']
 paging = True
 paging = True

+ 11 - 8
searx/engines/not_evil.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  not Evil (Onions)
  not Evil (Onions)
-
- @website     http://hss3uro2hsxfogfq.onion
- @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm)
-
- @using-api   no
- @results     HTML
- @stable      no
- @parse       url, title, content
 """
 """
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from lxml import html
 from lxml import html
 from searx.engines.xpath import extract_text
 from searx.engines.xpath import extract_text
 
 
+# about
+about = {
+    "website": 'http://hss3uro2hsxfogfq.onion',
+    "wikidata_id": None,
+    "official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['onions']
 categories = ['onions']
 paging = True
 paging = True

+ 11 - 7
searx/engines/nyaa.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Nyaa.si (Anime Bittorrent tracker)
  Nyaa.si (Anime Bittorrent tracker)
-
- @website      https://nyaa.si/
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, content, seed, leech, torrentfile
 """
 """
 
 
 from lxml import html
 from lxml import html
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from searx.utils import extract_text, get_torrent_size, int_or_zero
 from searx.utils import extract_text, get_torrent_size, int_or_zero
 
 
+# about
+about = {
+    "website": 'https://nyaa.si/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['files', 'images', 'videos', 'music']
 categories = ['files', 'images', 'videos', 'music']
 paging = True
 paging = True

+ 13 - 9
searx/engines/opensemantic.py

@@ -1,18 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
-Open Semantic Search
-
- @website    https://www.opensemanticsearch.org/
- @provide-api yes (https://www.opensemanticsearch.org/dev)
-
- @using-api  yes
- @results    JSON
- @stable     yes
- @parse      url, title, content, publishedDate
+ Open Semantic Search
 """
 """
+
 from dateutil import parser
 from dateutil import parser
 from json import loads
 from json import loads
 from urllib.parse import quote
 from urllib.parse import quote
 
 
+# about
+about = {
+    "website": 'https://www.opensemanticsearch.org/',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://www.opensemanticsearch.org/dev',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 base_url = 'http://localhost:8983/solr/opensemanticsearch/'
 base_url = 'http://localhost:8983/solr/opensemanticsearch/'
 search_string = 'query?q={query}'
 search_string = 'query?q={query}'
 
 

+ 11 - 8
searx/engines/openstreetmap.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  OpenStreetMap (Map)
  OpenStreetMap (Map)
-
- @website     https://openstreetmap.org/
- @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title
 """
 """
 
 
 import re
 import re
 from json import loads
 from json import loads
 from flask_babel import gettext
 from flask_babel import gettext
 
 
+# about
+about = {
+    "website": 'https://www.openstreetmap.org/',
+    "wikidata_id": 'Q936',
+    "official_api_documentation": 'http://wiki.openstreetmap.org/wiki/Nominatim',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['map']
 categories = ['map']
 paging = False
 paging = False

+ 11 - 8
searx/engines/pdbe.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  PDBe (Protein Data Bank in Europe)
  PDBe (Protein Data Bank in Europe)
-
- @website       https://www.ebi.ac.uk/pdbe
- @provide-api   yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
-                unlimited
- @using-api     yes
- @results       python dictionary (from json)
- @stable        yes
- @parse         url, title, content, img_src
 """
 """
 
 
 from json import loads
 from json import loads
 from flask_babel import gettext
 from flask_babel import gettext
 
 
+# about
+about = {
+    "website": 'https://www.ebi.ac.uk/pdbe',
+    "wikidata_id": 'Q55823905',
+    "official_api_documentation": 'https://www.ebi.ac.uk/pdbe/api/doc/search.html',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 categories = ['science']
 categories = ['science']
 
 
 hide_obsolete = False
 hide_obsolete = False

+ 11 - 10
searx/engines/peertube.py

@@ -1,15 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  peertube (Videos)
  peertube (Videos)
-
- @website     https://www.peertube.live
- @provide-api yes (https://docs.joinpeertube.org/api-rest-reference.html)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, thumbnail, publishedDate, embedded
-
- @todo        implement time range support
 """
 """
 
 
 from json import loads
 from json import loads
@@ -17,6 +8,16 @@ from datetime import datetime
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 
+# about
+about = {
+    "website": 'https://joinpeertube.org',
+    "wikidata_id": 'Q50938515',
+    "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ["videos"]
 categories = ["videos"]
 paging = True
 paging = True

+ 11 - 8
searx/engines/photon.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Photon (Map)
  Photon (Map)
-
- @website     https://photon.komoot.de
- @provide-api yes (https://photon.komoot.de/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title
 """
 """
 
 
 from json import loads
 from json import loads
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from searx.utils import searx_useragent
 from searx.utils import searx_useragent
 
 
+# about
+about = {
+    "website": 'https://photon.komoot.de',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://photon.komoot.de/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['map']
 categories = ['map']
 paging = False
 paging = False

+ 14 - 9
searx/engines/piratebay.py

@@ -1,12 +1,7 @@
-#  Piratebay (Videos, Music, Files)
-#
-# @website     https://thepiratebay.org
-# @provide-api yes (https://apibay.org/)
-#
-# @using-api   yes
-# @results     JSON
-# @stable      no (the API is not documented nor versioned)
-# @parse       url, title, seed, leech, magnetlink, filesize, publishedDate
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Piratebay (Videos, Music, Files)
+"""
 
 
 from json import loads
 from json import loads
 from datetime import datetime
 from datetime import datetime
@@ -15,6 +10,16 @@ from operator import itemgetter
 from urllib.parse import quote
 from urllib.parse import quote
 from searx.utils import get_torrent_size
 from searx.utils import get_torrent_size
 
 
+# about
+about = {
+    "website": 'https://thepiratebay.org',
+    "wikidata_id": 'Q22663',
+    "official_api_documentation": 'https://apibay.org/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ["videos", "music", "files"]
 categories = ["videos", "music", "files"]
 
 

+ 13 - 9
searx/engines/pubmed.py

@@ -1,14 +1,6 @@
-#!/usr/bin/env python
-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  PubMed (Scholar publications)
  PubMed (Scholar publications)
- @website     https://www.ncbi.nlm.nih.gov/pubmed/
- @provide-api yes (https://www.ncbi.nlm.nih.gov/home/develop/api/)
- @using-api   yes
- @results     XML
- @stable      yes
- @parse       url, title, publishedDate, content
- More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/
 """
 """
 
 
 from flask_babel import gettext
 from flask_babel import gettext
@@ -17,6 +9,18 @@ from datetime import datetime
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from searx.poolrequests import get
 from searx.poolrequests import get
 
 
+# about
+about = {
+    "website": 'https://www.ncbi.nlm.nih.gov/pubmed/',
+    "wikidata_id": 'Q1540899',
+    "official_api_documentation": {
+        'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/',
+        'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/'
+    },
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'XML',
+}
 
 
 categories = ['science']
 categories = ['science']
 
 

+ 10 - 8
searx/engines/qwant.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Qwant (Web, Images, News, Social)
  Qwant (Web, Images, News, Social)
-
- @website     https://qwant.com/
- @provide-api not officially (https://api.qwant.com/api/search/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content
 """
 """
 
 
 from datetime import datetime
 from datetime import datetime
@@ -17,6 +10,15 @@ from searx.utils import html_to_text, match_language
 from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException
 from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException
 from searx.raise_for_httperror import raise_for_httperror
 from searx.raise_for_httperror import raise_for_httperror
 
 
+# about
+about = {
+    "website": 'https://www.qwant.com/',
+    "wikidata_id": 'Q14657870',
+    "official_api_documentation": None,
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
 
 # engine dependent config
 # engine dependent config
 categories = []
 categories = []

+ 11 - 6
searx/engines/recoll.py

@@ -1,17 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Recoll (local search engine)
  Recoll (local search engine)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, content, size, abstract, author, mtype, subtype, time, \
-              filename, label, type, embedded
 """
 """
 
 
 from datetime import date, timedelta
 from datetime import date, timedelta
 from json import loads
 from json import loads
 from urllib.parse import urlencode, quote
 from urllib.parse import urlencode, quote
 
 
+# about
+about = {
+    "website": None,
+    "wikidata_id": 'Q15735774',
+    "official_api_documentation": 'https://www.lesbonscomptes.com/recoll/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 time_range_support = True
 time_range_support = True
 
 

+ 11 - 8
searx/engines/reddit.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Reddit
  Reddit
-
- @website      https://www.reddit.com/
- @provide-api  yes (https://www.reddit.com/dev/api)
-
- @using-api    yes
- @results      JSON
- @stable       yes
- @parse        url, title, content, thumbnail, publishedDate
 """
 """
 
 
 import json
 import json
 from datetime import datetime
 from datetime import datetime
 from urllib.parse import urlencode, urljoin, urlparse
 from urllib.parse import urlencode, urljoin, urlparse
 
 
+# about
+about = {
+    "website": 'https://www.reddit.com/',
+    "wikidata_id": 'Q1136',
+    "official_api_documentation": 'https://www.reddit.com/dev/api',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general', 'images', 'news', 'social media']
 categories = ['general', 'images', 'news', 'social media']
 page_size = 25
 page_size = 25

+ 11 - 8
searx/engines/scanr_structures.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  ScanR Structures (Science)
  ScanR Structures (Science)
-
- @website     https://scanr.enseignementsup-recherche.gouv.fr
- @provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, img_src
 """
 """
 
 
 from json import loads, dumps
 from json import loads, dumps
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 
+# about
+about = {
+    "website": 'https://scanr.enseignementsup-recherche.gouv.fr',
+    "wikidata_id": 'Q44105684',
+    "official_api_documentation": 'https://scanr.enseignementsup-recherche.gouv.fr/opendata',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['science']
 categories = ['science']
 paging = True
 paging = True

+ 11 - 9
searx/engines/searchcode_code.py

@@ -1,18 +1,20 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
- Searchcode (It)
-
- @website     https://searchcode.com/
- @provide-api yes (https://searchcode.com/api/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content
+ Searchcode (IT)
 """
 """
 
 
 from json import loads
 from json import loads
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
+# about
+about = {
+    "website": 'https://searchcode.com/',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://searchcode.com/api/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
 
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']

+ 10 - 8
searx/engines/searx_engine.py

@@ -1,18 +1,20 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Searx (all)
  Searx (all)
-
- @website     https://github.com/searx/searx
- @provide-api yes (https://searx.github.io/searx/dev/search_api.html)
-
- @using-api   yes
- @results     JSON
- @stable      yes (using api)
- @parse       url, title, content
 """
 """
 
 
 from json import loads
 from json import loads
 from searx.engines import categories as searx_categories
 from searx.engines import categories as searx_categories
 
 
+# about
+about = {
+    "website": 'https://github.com/searx/searx',
+    "wikidata_id": 'Q17639196',
+    "official_api_documentation": 'https://searx.github.io/searx/dev/search_api.html',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
 
 categories = searx_categories.keys()
 categories = searx_categories.keys()
 
 

+ 14 - 8
searx/engines/sepiasearch.py

@@ -1,17 +1,23 @@
-# SepiaSearch (Videos)
-#
-# @website     https://sepiasearch.org
-# @provide-api https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api
-# @using-api   yes
-# @results     JSON
-# @stable      yes
-# @parse       url, title, content, publishedDate, thumbnail
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ SepiaSearch (Videos)
+"""
 
 
 from json import loads
 from json import loads
 from dateutil import parser, relativedelta
 from dateutil import parser, relativedelta
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from datetime import datetime
 from datetime import datetime
 
 
+# about
+about = {
+    "website": 'https://sepiasearch.org',
+    "wikidata_id": None,
+    "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api",  # NOQA
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 categories = ['videos']
 categories = ['videos']
 paging = True
 paging = True
 language_support = True
 language_support = True

+ 10 - 8
searx/engines/soundcloud.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Soundcloud (Music)
  Soundcloud (Music)
-
- @website     https://soundcloud.com
- @provide-api yes (https://developers.soundcloud.com/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, publishedDate, embedded
 """
 """
 
 
 import re
 import re
@@ -18,6 +11,15 @@ from urllib.parse import quote_plus, urlencode
 from searx import logger
 from searx import logger
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
 
 
+# about
+about = {
+    "website": 'https://soundcloud.com',
+    "wikidata_id": 'Q568769',
+    "official_api_documentation": 'https://developers.soundcloud.com/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
 
 
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']

+ 11 - 8
searx/engines/spotify.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Spotify (Music)
  Spotify (Music)
-
- @website     https://spotify.com
- @provide-api yes (https://developer.spotify.com/web-api/search-item/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content, embedded
 """
 """
 
 
 from json import loads
 from json import loads
@@ -15,6 +8,16 @@ from urllib.parse import urlencode
 import requests
 import requests
 import base64
 import base64
 
 
+# about
+about = {
+    "website": 'https://www.spotify.com',
+    "wikidata_id": 'Q689141',
+    "official_api_documentation": 'https://developer.spotify.com/web-api/search-item/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['music']
 categories = ['music']
 paging = True
 paging = True

+ 12 - 9
searx/engines/stackoverflow.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
- Stackoverflow (It)
-
- @website     https://stackoverflow.com/
- @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
-
- @using-api   no
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, content
+ Stackoverflow (IT)
 """
 """
 
 
 from urllib.parse import urlencode, urljoin, urlparse
 from urllib.parse import urlencode, urljoin, urlparse
@@ -15,6 +8,16 @@ from lxml import html
 from searx.utils import extract_text
 from searx.utils import extract_text
 from searx.exceptions import SearxEngineCaptchaException
 from searx.exceptions import SearxEngineCaptchaException
 
 
+# about
+about = {
+    "website": 'https://stackoverflow.com/',
+    "wikidata_id": 'Q549037',
+    "official_api_documentation": 'https://api.stackexchange.com/docs',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['it']
 categories = ['it']
 paging = True
 paging = True

+ 14 - 11
searx/engines/startpage.py

@@ -1,14 +1,7 @@
-#  Startpage (Web)
-#
-# @website     https://startpage.com
-# @provide-api no (nothing found)
-#
-# @using-api   no
-# @results     HTML
-# @stable      no (HTML can change)
-# @parse       url, title, content
-#
-# @todo        paging
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Startpage (Web)
+"""
 
 
 from lxml import html
 from lxml import html
 from dateutil import parser
 from dateutil import parser
@@ -19,6 +12,16 @@ from babel import Locale
 from babel.localedata import locale_identifiers
 from babel.localedata import locale_identifiers
 from searx.utils import extract_text, eval_xpath, match_language
 from searx.utils import extract_text, eval_xpath, match_language
 
 
+# about
+about = {
+    "website": 'https://startpage.com',
+    "wikidata_id": 'Q2333295',
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
 # there is a mechanism to block "bot" search
 # there is a mechanism to block "bot" search

+ 11 - 8
searx/engines/tokyotoshokan.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Tokyo Toshokan (A BitTorrent Library for Japanese Media)
  Tokyo Toshokan (A BitTorrent Library for Japanese Media)
-
- @website      https://www.tokyotosho.info/
- @provide-api  no
- @using-api    no
- @results      HTML
- @stable       no (HTML can change)
- @parse        url, title, publishedDate, seed, leech,
-               filesize, magnetlink, content
 """
 """
 
 
 import re
 import re
@@ -16,6 +9,16 @@ from lxml import html
 from datetime import datetime
 from datetime import datetime
 from searx.utils import extract_text, get_torrent_size, int_or_zero
 from searx.utils import extract_text, get_torrent_size, int_or_zero
 
 
+# about
+about = {
+    "website": 'https://www.tokyotosho.info/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['files', 'videos', 'music']
 categories = ['files', 'videos', 'music']
 paging = True
 paging = True

+ 11 - 9
searx/engines/torrentz.py

@@ -1,14 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Torrentz2.is (BitTorrent meta-search engine)
  Torrentz2.is (BitTorrent meta-search engine)
-
- @website      https://torrentz2.is/
- @provide-api  no
-
- @using-api    no
- @results      HTML
- @stable       no (HTML can change, although unlikely,
-                   see https://torrentz.is/torrentz.btsearch)
- @parse        url, title, publishedDate, seed, leech, filesize, magnetlink
 """
 """
 
 
 import re
 import re
@@ -17,6 +9,16 @@ from lxml import html
 from datetime import datetime
 from datetime import datetime
 from searx.utils import extract_text, get_torrent_size
 from searx.utils import extract_text, get_torrent_size
 
 
+# about
+about = {
+    "website": 'https://torrentz2.is/',
+    "wikidata_id": 'Q1156687',
+    "official_api_documentation": 'https://torrentz.is/torrentz.btsearch',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['files', 'videos', 'music']
 categories = ['files', 'videos', 'music']
 paging = True
 paging = True

+ 11 - 7
searx/engines/translated.py

@@ -1,14 +1,18 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  MyMemory Translated
  MyMemory Translated
-
- @website     https://mymemory.translated.net/
- @provide-api yes (https://mymemory.translated.net/doc/spec.php)
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, title, content
 """
 """
 
 
+# about
+about = {
+    "website": 'https://mymemory.translated.net/',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://mymemory.translated.net/doc/spec.php',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 engine_type = 'online_dictionnary'
 engine_type = 'online_dictionnary'
 categories = ['general']
 categories = ['general']
 url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'
 url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}'

+ 11 - 8
searx/engines/unsplash.py

@@ -1,18 +1,21 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Unsplash
  Unsplash
-
- @website     https://unsplash.com
- @provide-api yes (https://unsplash.com/developers)
-
- @using-api   no
- @results     JSON (using search portal's infiniscroll API)
- @stable      no (JSON format could change any time)
- @parse       url, title, img_src, thumbnail_src
 """
 """
 
 
 from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
 from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
 from json import loads
 from json import loads
 
 
+# about
+about = {
+    "website": 'https://unsplash.com',
+    "wikidata_id": 'Q28233552',
+    "official_api_documentation": 'https://unsplash.com/developers',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 url = 'https://unsplash.com/'
 url = 'https://unsplash.com/'
 search_url = url + 'napi/search/photos?'
 search_url = url + 'napi/search/photos?'
 categories = ['images']
 categories = ['images']

+ 14 - 13
searx/engines/vimeo.py

@@ -1,21 +1,22 @@
-#  Vimeo (Videos)
-#
-# @website     https://vimeo.com/
-# @provide-api yes (http://developer.vimeo.com/api),
-#              they have a maximum count of queries/hour
-#
-# @using-api   no (TODO, rewrite to api)
-# @results     HTML (using search portal)
-# @stable      no (HTML can change)
-# @parse       url, title, publishedDate,  thumbnail, embedded
-#
-# @todo        rewrite to api
-# @todo        set content-parameter with correct data
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Wikipedia (Web
+"""
 
 
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from json import loads
 from json import loads
 from dateutil import parser
 from dateutil import parser
 
 
+# about
+about = {
+    "website": 'https://vimeo.com/',
+    "wikidata_id": 'Q156376',
+    "official_api_documentation": 'http://developer.vimeo.com/api',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['videos']
 categories = ['videos']
 paging = True
 paging = True

+ 11 - 9
searx/engines/wikidata.py

@@ -1,14 +1,6 @@
-# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Wikidata
  Wikidata
-
- @website     https://wikidata.org
- @provide-api yes (https://query.wikidata.org/)
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, infobox
 """
 """
 
 
 
 
@@ -27,6 +19,16 @@ from searx.engines.wikipedia import _fetch_supported_languages, supported_langua
 
 
 logger = logger.getChild('wikidata')
 logger = logger.getChild('wikidata')
 
 
+# about
+about = {
+    "website": 'https://wikidata.org/',
+    "wikidata_id": 'Q2013',
+    "official_api_documentation": 'https://query.wikidata.org/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # SPARQL
 # SPARQL
 SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
 SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql'
 SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'
 SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain'

+ 11 - 8
searx/engines/wikipedia.py

@@ -1,13 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Wikipedia (Web)
  Wikipedia (Web)
-
- @website     https://en.wikipedia.org/api/rest_v1/
- @provide-api yes
-
- @using-api   yes
- @results     JSON
- @stable      yes
- @parse       url, infobox
 """
 """
 
 
 from urllib.parse import quote
 from urllib.parse import quote
@@ -16,6 +9,16 @@ from lxml.html import fromstring
 from searx.utils import match_language, searx_useragent
 from searx.utils import match_language, searx_useragent
 from searx.raise_for_httperror import raise_for_httperror
 from searx.raise_for_httperror import raise_for_httperror
 
 
+# about
+about = {
+    "website": 'https://www.wikipedia.org/',
+    "wikidata_id": 'Q52',
+    "official_api_documentation": 'https://en.wikipedia.org/api/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # search-url
 # search-url
 search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
 search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
 supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
 supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'

+ 14 - 9
searx/engines/wolframalpha_api.py

@@ -1,16 +1,21 @@
-# Wolfram Alpha (Science)
-#
-# @website     https://www.wolframalpha.com
-# @provide-api yes (https://api.wolframalpha.com/v2/)
-#
-# @using-api   yes
-# @results     XML
-# @stable      yes
-# @parse       url, infobox
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Wolfram|Alpha (Science)
+"""
 
 
 from lxml import etree
 from lxml import etree
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 
 
+# about
+about = {
+    "website": 'https://www.wolframalpha.com',
+    "wikidata_id": 'Q207006',
+    "official_api_documentation": 'https://products.wolframalpha.com/api/',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'XML',
+}
+
 # search-url
 # search-url
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
 search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}'
 site_url = 'https://www.wolframalpha.com/input/?{query}'
 site_url = 'https://www.wolframalpha.com/input/?{query}'

+ 14 - 9
searx/engines/wolframalpha_noapi.py

@@ -1,12 +1,7 @@
-# Wolfram|Alpha (Science)
-#
-# @website     https://www.wolframalpha.com/
-# @provide-api yes (https://api.wolframalpha.com/v2/)
-#
-# @using-api   no
-# @results     JSON
-# @stable      no
-# @parse       url, infobox
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Wolfram|Alpha (Science)
+"""
 
 
 from json import loads
 from json import loads
 from time import time
 from time import time
@@ -14,6 +9,16 @@ from urllib.parse import urlencode
 
 
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
 
 
+# about
+about = {
+    "website": 'https://www.wolframalpha.com/',
+    "wikidata_id": 'Q207006',
+    "official_api_documentation": 'https://products.wolframalpha.com/api/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # search-url
 # search-url
 url = 'https://www.wolframalpha.com/'
 url = 'https://www.wolframalpha.com/'
 
 

+ 11 - 8
searx/engines/www1x.py

@@ -1,19 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  1x (Images)
  1x (Images)
-
- @website     http://1x.com/
- @provide-api no
-
- @using-api   no
- @results     HTML
- @stable      no (HTML can change)
- @parse       url, title, thumbnail
 """
 """
 
 
 from lxml import html, etree
 from lxml import html, etree
 from urllib.parse import urlencode, urljoin
 from urllib.parse import urlencode, urljoin
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
 
 
+# about
+about = {
+    "website": 'https://1x.com/',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['images']
 categories = ['images']
 paging = False
 paging = False

+ 2 - 0
searx/engines/xpath.py

@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
 from lxml import html
 from lxml import html
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list
 from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list

+ 14 - 13
searx/engines/yacy.py

@@ -1,16 +1,7 @@
-# Yacy (Web, Images, Videos, Music, Files)
-#
-# @website     http://yacy.net
-# @provide-api yes
-#              (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
-#
-# @using-api   yes
-# @results     JSON
-# @stable      yes
-# @parse       (general)    url, title, content, publishedDate
-# @parse       (images)     url, title, img_src
-#
-# @todo        parse video, audio and file results
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Yacy (Web, Images, Videos, Music, Files)
+"""
 
 
 from json import loads
 from json import loads
 from dateutil import parser
 from dateutil import parser
@@ -20,6 +11,16 @@ from requests.auth import HTTPDigestAuth
 
 
 from searx.utils import html_to_text
 from searx.utils import html_to_text
 
 
+# about
+about = {
+    "website": 'https://yacy.net/',
+    "wikidata_id": 'Q1759675',
+    "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general', 'images']  # TODO , 'music', 'videos', 'files'
 categories = ['general', 'images']  # TODO , 'music', 'videos', 'files'
 paging = True
 paging = True

+ 11 - 9
searx/engines/yahoo.py

@@ -1,20 +1,22 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Yahoo (Web)
  Yahoo (Web)
-
- @website     https://search.yahoo.com/web
- @provide-api yes (https://developer.yahoo.com/boss/search/),
-              $0.80/1000 queries
-
- @using-api   no (because pricing)
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content, suggestion
 """
 """
 
 
 from urllib.parse import unquote, urlencode
 from urllib.parse import unquote, urlencode
 from lxml import html
 from lxml import html
 from searx.utils import extract_text, extract_url, match_language, eval_xpath
 from searx.utils import extract_text, extract_url, match_language, eval_xpath
 
 
+# about
+about = {
+    "website": 'https://search.yahoo.com/',
+    "wikidata_id": None,
+    "official_api_documentation": 'https://developer.yahoo.com/api/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
 paging = True
 paging = True

+ 14 - 10
searx/engines/yahoo_news.py

@@ -1,13 +1,7 @@
-# Yahoo (News)
-#
-# @website     https://news.yahoo.com
-# @provide-api yes (https://developer.yahoo.com/boss/search/)
-#              $0.80/1000 queries
-#
-# @using-api   no (because pricing)
-# @results     HTML (using search portal)
-# @stable      no (HTML can change)
-# @parse       url, title, content, publishedDate
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Yahoo (News)
+"""
 
 
 import re
 import re
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
@@ -18,6 +12,16 @@ from searx.engines.yahoo import _fetch_supported_languages, supported_languages_
 from dateutil import parser
 from dateutil import parser
 from searx.utils import extract_text, extract_url, match_language
 from searx.utils import extract_text, extract_url, match_language
 
 
+# about
+about = {
+    "website": 'https://news.yahoo.com',
+    "wikidata_id": 'Q3044717',
+    "official_api_documentation": 'https://developer.yahoo.com/api/',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['news']
 categories = ['news']
 paging = True
 paging = True

+ 11 - 7
searx/engines/yandex.py

@@ -1,12 +1,6 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
 """
 """
  Yahoo (Web)
  Yahoo (Web)
-
- @website     https://yandex.ru/
- @provide-api ?
- @using-api   no
- @results     HTML (using search portal)
- @stable      no (HTML can change)
- @parse       url, title, content
 """
 """
 
 
 from urllib.parse import urlencode, urlparse
 from urllib.parse import urlencode, urlparse
@@ -16,6 +10,16 @@ from searx.exceptions import SearxEngineCaptchaException
 
 
 logger = logger.getChild('yandex engine')
 logger = logger.getChild('yandex engine')
 
 
+# about
+about = {
+    "website": 'https://yandex.ru/',
+    "wikidata_id": 'Q5281',
+    "official_api_documentation": "?",
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['general']
 categories = ['general']
 paging = True
 paging = True

+ 14 - 9
searx/engines/yggtorrent.py

@@ -1,12 +1,7 @@
-#  Yggtorrent (Videos, Music, Files)
-#
-# @website     https://www2.yggtorrent.si
-# @provide-api no (nothing found)
-#
-# @using-api   no
-# @results     HTML (using search portal)
-# @stable      no (HTML can change)
-# @parse       url, title, seed, leech, publishedDate, filesize
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Yggtorrent (Videos, Music, Files)
+"""
 
 
 from lxml import html
 from lxml import html
 from operator import itemgetter
 from operator import itemgetter
@@ -15,6 +10,16 @@ from urllib.parse import quote
 from searx.utils import extract_text, get_torrent_size
 from searx.utils import extract_text, get_torrent_size
 from searx.poolrequests import get as http_get
 from searx.poolrequests import get as http_get
 
 
+# about
+about = {
+    "website": 'https://www2.yggtorrent.si',
+    "wikidata_id": None,
+    "official_api_documentation": None,
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music', 'files']
 categories = ['videos', 'music', 'files']
 paging = True
 paging = True

+ 14 - 9
searx/engines/youtube_api.py

@@ -1,18 +1,23 @@
-# Youtube (Videos)
-#
-# @website     https://www.youtube.com/
-# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
-#
-# @using-api   yes
-# @results     JSON
-# @stable      yes
-# @parse       url, title, content, publishedDate, thumbnail, embedded
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Youtube (Videos)
+"""
 
 
 from json import loads
 from json import loads
 from dateutil import parser
 from dateutil import parser
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from searx.exceptions import SearxEngineAPIException
 from searx.exceptions import SearxEngineAPIException
 
 
+# about
+about = {
+    "website": 'https://www.youtube.com/',
+    "wikidata_id": 'Q866',
+    "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true',
+    "use_official_api": True,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music']
 categories = ['videos', 'music']
 paging = False
 paging = False

+ 14 - 9
searx/engines/youtube_noapi.py

@@ -1,17 +1,22 @@
-# Youtube (Videos)
-#
-# @website     https://www.youtube.com/
-# @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list)
-#
-# @using-api   no
-# @results     HTML
-# @stable      no
-# @parse       url, title, content, publishedDate, thumbnail, embedded
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+ Youtube (Videos)
+"""
 
 
 from functools import reduce
 from functools import reduce
 from json import loads
 from json import loads
 from urllib.parse import quote_plus
 from urllib.parse import quote_plus
 
 
+# about
+about = {
+    "website": 'https://www.youtube.com/',
+    "wikidata_id": 'Q866',
+    "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'HTML',
+}
+
 # engine dependent config
 # engine dependent config
 categories = ['videos', 'music']
 categories = ['videos', 'music']
 paging = True
 paging = True

+ 213 - 0
searx/settings.yml

@@ -157,6 +157,13 @@ engines:
     timeout : 7.0
     timeout : 7.0
     disabled : True
     disabled : True
     shortcut : ai
     shortcut : ai
+    about:
+      website: https://archive.is/
+      wikidata_id: Q13515725
+      official_api_documentation: http://mementoweb.org/depot/native/archiveis/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : arxiv
   - name : arxiv
     engine : arxiv
     engine : arxiv
@@ -201,6 +208,13 @@ engines:
     timeout : 4.0
     timeout : 4.0
     disabled : True
     disabled : True
     shortcut : bb
     shortcut : bb
+    about:
+      website: https://bitbucket.org/
+      wikidata_id: Q2493781
+      official_api_documentation: https://developer.atlassian.com/bitbucket
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : btdigg
   - name : btdigg
     engine : btdigg
     engine : btdigg
@@ -216,6 +230,13 @@ engines:
     categories : videos
     categories : videos
     disabled : True
     disabled : True
     shortcut : c3tv
     shortcut : c3tv
+    about:
+      website: https://media.ccc.de/
+      wikidata_id: Q80729951
+      official_api_documentation: https://github.com/voc/voctoweb
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : crossref
   - name : crossref
     engine : json_engine
     engine : json_engine
@@ -226,6 +247,13 @@ engines:
     content_query : fullCitation
     content_query : fullCitation
     categories : science
     categories : science
     shortcut : cr
     shortcut : cr
+    about:
+      website: https://www.crossref.org/
+      wikidata_id: Q5188229
+      official_api_documentation: https://github.com/CrossRef/rest-api-doc
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
 
   - name : currency
   - name : currency
     engine : currency_convert
     engine : currency_convert
@@ -271,6 +299,13 @@ engines:
     categories : general
     categories : general
     shortcut : ew
     shortcut : ew
     disabled : True
     disabled : True
+    about:
+      website: https://www.erowid.org/
+      wikidata_id: Q1430691
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
 #  - name : elasticsearch
 #  - name : elasticsearch
 #    shortcut : es
 #    shortcut : es
@@ -321,6 +356,13 @@ engines:
     first_page_num : 1
     first_page_num : 1
     shortcut : et
     shortcut : et
     disabled : True
     disabled : True
+    about:
+      website: https://www.etymonline.com/
+      wikidata_id: Q1188617
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
 #  - name : ebay
 #  - name : ebay
 #    engine : ebay
 #    engine : ebay
@@ -360,6 +402,9 @@ engines:
     search_type : title
     search_type : title
     timeout : 5.0
     timeout : 5.0
     disabled : True
     disabled : True
+    about:
+      website: https://directory.fsf.org/
+      wikidata_id: Q2470288
 
 
   - name : frinkiac
   - name : frinkiac
     engine : frinkiac
     engine : frinkiac
@@ -394,6 +439,13 @@ engines:
     shortcut : gl
     shortcut : gl
     timeout : 10.0
     timeout : 10.0
     disabled : True
     disabled : True
+    about:
+      website: https://about.gitlab.com/
+      wikidata_id: Q16639197
+      official_api_documentation: https://docs.gitlab.com/ee/api/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
 
   - name : github
   - name : github
     engine : github
     engine : github
@@ -411,6 +463,13 @@ engines:
     categories : it
     categories : it
     shortcut : cb
     shortcut : cb
     disabled : True
     disabled : True
+    about:
+      website: https://codeberg.org/
+      wikidata_id:
+      official_api_documentation: https://try.gitea.io/api/swagger
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
 
   - name : google
   - name : google
     engine : google
     engine : google
@@ -441,6 +500,13 @@ engines:
     first_page_num : 0
     first_page_num : 0
     categories : science
     categories : science
     shortcut : gos
     shortcut : gos
+    about:
+      website: https://scholar.google.com/
+      wikidata_id: Q494817
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : google play apps
   - name : google play apps
     engine : xpath
     engine : xpath
@@ -453,6 +519,13 @@ engines:
     categories : files
     categories : files
     shortcut : gpa
     shortcut : gpa
     disabled : True
     disabled : True
+    about:
+      website: https://play.google.com/
+      wikidata_id: Q79576
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : google play movies
   - name : google play movies
     engine : xpath
     engine : xpath
@@ -465,6 +538,13 @@ engines:
     categories : videos
     categories : videos
     shortcut : gpm
     shortcut : gpm
     disabled : True
     disabled : True
+    about:
+      website: https://play.google.com/
+      wikidata_id: Q79576
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : google play music
   - name : google play music
     engine : xpath
     engine : xpath
@@ -477,6 +557,13 @@ engines:
     categories : music
     categories : music
     shortcut : gps
     shortcut : gps
     disabled : True
     disabled : True
+    about:
+      website: https://play.google.com/
+      wikidata_id: Q79576
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : geektimes
   - name : geektimes
     engine : xpath
     engine : xpath
@@ -489,6 +576,13 @@ engines:
     timeout : 4.0
     timeout : 4.0
     disabled : True
     disabled : True
     shortcut : gt
     shortcut : gt
+    about:
+      website: https://geektimes.ru/
+      wikidata_id: Q50572423
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : habrahabr
   - name : habrahabr
     engine : xpath
     engine : xpath
@@ -501,6 +595,13 @@ engines:
     timeout : 4.0
     timeout : 4.0
     disabled : True
     disabled : True
     shortcut : habr
     shortcut : habr
+    about:
+      website: https://habr.com/
+      wikidata_id: Q4494434
+      official_api_documentation: https://habr.com/en/docs/help/api/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : hoogle
   - name : hoogle
     engine : json_engine
     engine : json_engine
@@ -513,6 +614,13 @@ engines:
     page_size : 20
     page_size : 20
     categories : it
     categories : it
     shortcut : ho
     shortcut : ho
+    about:
+      website: https://www.haskell.org/
+      wikidata_id: Q34010
+      official_api_documentation: https://hackage.haskell.org/api
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
 
   - name : ina
   - name : ina
     engine : ina
     engine : ina
@@ -543,6 +651,13 @@ engines:
     timeout : 7.0
     timeout : 7.0
     disabled : True
     disabled : True
     shortcut : lg
     shortcut : lg
+    about:
+      website: http://libgen.rs/
+      wikidata_id: Q22017206
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : lobste.rs
   - name : lobste.rs
     engine : xpath
     engine : xpath
@@ -555,6 +670,13 @@ engines:
     shortcut : lo
     shortcut : lo
     timeout : 3.0
     timeout : 3.0
     disabled: True
     disabled: True
+    about:
+      website: https://lobste.rs/
+      wikidata_id: Q60762874
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : metager
   - name : metager
     engine : xpath
     engine : xpath
@@ -566,6 +688,13 @@ engines:
     categories : general
     categories : general
     shortcut : mg
     shortcut : mg
     disabled : True
     disabled : True
+    about:
+      website: https://metager.org/
+      wikidata_id: Q1924645
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : microsoft academic
   - name : microsoft academic
     engine : microsoft_academic
     engine : microsoft_academic
@@ -589,6 +718,13 @@ engines:
     disabled: True
     disabled: True
     timeout: 5.0
     timeout: 5.0
     shortcut : npm
     shortcut : npm
+    about:
+      website: https://npms.io/
+      wikidata_id: Q7067518
+      official_api_documentation: https://api-docs.npms.io/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
 
 # Requires Tor
 # Requires Tor
   - name : not evil
   - name : not evil
@@ -617,6 +753,13 @@ engines:
     categories : science
     categories : science
     shortcut : oad
     shortcut : oad
     timeout: 5.0
     timeout: 5.0
+    about:
+      website: https://www.openaire.eu/
+      wikidata_id: Q25106053
+      official_api_documentation: https://api.openaire.eu/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
 
   - name : openairepublications
   - name : openairepublications
     engine : json_engine
     engine : json_engine
@@ -629,6 +772,13 @@ engines:
     categories : science
     categories : science
     shortcut : oap
     shortcut : oap
     timeout: 5.0
     timeout: 5.0
+    about:
+      website: https://www.openaire.eu/
+      wikidata_id: Q25106053
+      official_api_documentation: https://api.openaire.eu/
+      use_official_api: false
+      require_api_key: false
+      results: JSON
 
 
 #  - name : opensemanticsearch
 #  - name : opensemanticsearch
 #    engine : opensemantic
 #    engine : opensemantic
@@ -650,6 +800,13 @@ engines:
     timeout : 4.0
     timeout : 4.0
     disabled : True
     disabled : True
     shortcut : or
     shortcut : or
+    about:
+      website: https://openrepos.net/
+      wikidata_id:
+      official_api_documentation:
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : pdbe
   - name : pdbe
     engine : pdbe
     engine : pdbe
@@ -768,6 +925,13 @@ engines:
     content_xpath : .//div[@class="search-result-abstract"]
     content_xpath : .//div[@class="search-result-abstract"]
     shortcut : se
     shortcut : se
     categories : science
     categories : science
+    about:
+      website: https://www.semanticscholar.org/
+      wikidata_id: Q22908627
+      official_api_documentation: https://api.semanticscholar.org/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
 # Spotify needs API credentials
 # Spotify needs API credentials
 #  - name : spotify
 #  - name : spotify
@@ -876,6 +1040,9 @@ engines:
     number_of_results : 5
     number_of_results : 5
     search_type : text
     search_type : text
     disabled : True
     disabled : True
+    about:
+      website: https://www.wikibooks.org/
+      wikidata_id: Q367
 
 
   - name : wikinews
   - name : wikinews
     engine : mediawiki
     engine : mediawiki
@@ -885,6 +1052,9 @@ engines:
     number_of_results : 5
     number_of_results : 5
     search_type : text
     search_type : text
     disabled : True
     disabled : True
+    about:
+      website: https://www.wikinews.org/
+      wikidata_id: Q964
 
 
   - name : wikiquote
   - name : wikiquote
     engine : mediawiki
     engine : mediawiki
@@ -896,6 +1066,9 @@ engines:
     disabled : True
     disabled : True
     additional_tests:
     additional_tests:
       rosebud: *test_rosebud
       rosebud: *test_rosebud
+    about:
+      website: https://www.wikiquote.org/
+      wikidata_id: Q369
 
 
   - name : wikisource
   - name : wikisource
     engine : mediawiki
     engine : mediawiki
@@ -905,6 +1078,9 @@ engines:
     number_of_results : 5
     number_of_results : 5
     search_type : text
     search_type : text
     disabled : True
     disabled : True
+    about:
+      website: https://www.wikisource.org/
+      wikidata_id: Q263
 
 
   - name : wiktionary
   - name : wiktionary
     engine : mediawiki
     engine : mediawiki
@@ -914,6 +1090,9 @@ engines:
     number_of_results : 5
     number_of_results : 5
     search_type : text
     search_type : text
     disabled : True
     disabled : True
+    about:
+      website: https://www.wiktionary.org/
+      wikidata_id: Q151
 
 
   - name : wikiversity
   - name : wikiversity
     engine : mediawiki
     engine : mediawiki
@@ -923,6 +1102,9 @@ engines:
     number_of_results : 5
     number_of_results : 5
     search_type : text
     search_type : text
     disabled : True
     disabled : True
+    about:
+      website: https://www.wikiversity.org/
+      wikidata_id: Q370
 
 
   - name : wikivoyage
   - name : wikivoyage
     engine : mediawiki
     engine : mediawiki
@@ -932,6 +1114,9 @@ engines:
     number_of_results : 5
     number_of_results : 5
     search_type : text
     search_type : text
     disabled : True
     disabled : True
+    about:
+      website: https://www.wikivoyage.org/
+      wikidata_id: Q373
 
 
   - name : wolframalpha
   - name : wolframalpha
     shortcut : wa
     shortcut : wa
@@ -979,6 +1164,13 @@ engines:
     first_page_num : 0
     first_page_num : 0
     page_size : 10
     page_size : 10
     disabled : True
     disabled : True
+    about:
+      website: https://www.seznam.cz/
+      wikidata_id: Q3490485
+      official_api_documentation: https://api.sklik.cz/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : mojeek
   - name : mojeek
     shortcut: mjk
     shortcut: mjk
@@ -993,6 +1185,13 @@ engines:
     first_page_num : 0
     first_page_num : 0
     page_size : 10
     page_size : 10
     disabled : True
     disabled : True
+    about:
+      website: https://www.mojeek.com/
+      wikidata_id: Q60747299
+      official_api_documentation: https://www.mojeek.com/services/api.html/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : naver
   - name : naver
     shortcut: nvr
     shortcut: nvr
@@ -1007,6 +1206,13 @@ engines:
     first_page_num : 1
     first_page_num : 1
     page_size : 10
     page_size : 10
     disabled : True
     disabled : True
+    about:
+      website: https://www.naver.com/
+      wikidata_id: Q485639
+      official_api_documentation: https://developers.naver.com/docs/nmt/examples/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : rubygems
   - name : rubygems
     shortcut: rbg
     shortcut: rbg
@@ -1021,6 +1227,13 @@ engines:
     first_page_num : 1
     first_page_num : 1
     categories: it
     categories: it
     disabled : True
     disabled : True
+    about:
+      website: https://rubygems.org/
+      wikidata_id: Q1853420
+      official_api_documentation: https://guides.rubygems.org/rubygems-org-api/
+      use_official_api: false
+      require_api_key: false
+      results: HTML
 
 
   - name : peertube
   - name : peertube
     engine: peertube
     engine: peertube

+ 206 - 0
utils/fetch_engine_descriptions.py

@@ -0,0 +1,206 @@
+#!/usr/bin/env python
+
+import sys
+import json
+from urllib.parse import quote, urlparse
+from os.path import realpath, dirname
+import cld3
+from lxml.html import fromstring
+
+# set path
+sys.path.append(realpath(dirname(realpath(__file__)) + '/../'))
+
+from searx.engines.wikidata import send_wikidata_query
+from searx.utils import extract_text
+import searx
+import searx.search
+import searx.poolrequests
+
+SPARQL_WIKIPEDIA_ARTICLE = """
+SELECT DISTINCT ?item ?name
+WHERE {
+  VALUES ?item { %IDS% }
+  ?article schema:about ?item ;
+              schema:inLanguage ?lang ;
+              schema:name ?name ;
+              schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] .
+  FILTER(?lang in (%LANGUAGES_SPARQL%)) .
+  FILTER (!CONTAINS(?name, ':')) .
+}
+"""
+
+SPARQL_DESCRIPTION = """
+SELECT DISTINCT ?item ?itemDescription
+WHERE {
+  VALUES ?item { %IDS% }
+  ?item schema:description ?itemDescription .
+  FILTER (lang(?itemDescription) in (%LANGUAGES_SPARQL%))
+}
+ORDER BY ?itemLang
+"""
+
+LANGUAGES = searx.settings['locales'].keys()
+LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES)))
+IDS = None
+
+descriptions = {}
+wd_to_engine_name = {}
+
+
+def normalize_description(description):
+    for c in [chr(c) for c in range(0, 31)]:
+        description = description.replace(c, ' ')
+    description = ' '.join(description.strip().split())
+    return description
+
+
+def update_description(engine_name, lang, description, source, replace=True):
+    if replace or lang not in descriptions[engine_name]:
+        descriptions[engine_name][lang] = [normalize_description(description), source]
+
+
+def get_wikipedia_summary(language, pageid):
+    search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
+    url = search_url.format(title=quote(pageid), language=language)
+    try:
+        response = searx.poolrequests.get(url)
+        response.raise_for_status()
+        api_result = json.loads(response.text)
+        return api_result.get('extract')
+    except:
+        return None
+
+
+def detect_language(text):
+    r = cld3.get_language(str(text))  # pylint: disable=E1101
+    if r is not None and r.probability >= 0.98 and r.is_reliable:
+        return r.language
+    return None
+
+
+def get_website_description(url, lang1, lang2=None):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+        'DNT': '1',
+        'Upgrade-Insecure-Requests': '1',
+        'Sec-GPC': '1',
+        'Cache-Control': 'max-age=0',
+    }
+    if lang1 is not None:
+        lang_list = [lang1]
+        if lang2 is not None:
+            lang_list.append(lang2)
+        headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8'
+    try:
+        response = searx.poolrequests.get(url, headers=headers, timeout=10)
+        response.raise_for_status()
+    except Exception:
+        return (None, None)
+
+    try:
+        html = fromstring(response.text)
+    except ValueError:
+        html = fromstring(response.content)
+
+    description = extract_text(html.xpath('/html/head/meta[@name="description"]/@content'))
+    if not description:
+        description = extract_text(html.xpath('/html/head/meta[@property="og:description"]/@content'))
+    if not description:
+        description = extract_text(html.xpath('/html/head/title'))
+    lang = extract_text(html.xpath('/html/@lang'))
+    if lang is None and len(lang1) > 0:
+        lang = lang1
+    lang = detect_language(description) or lang or 'en'
+    lang = lang.split('_')[0]
+    lang = lang.split('-')[0]
+    return (lang, description)
+
+
+def initialize():
+    global descriptions, wd_to_engine_name, IDS
+    searx.search.initialize()
+    for engine_name, engine in searx.engines.engines.items():
+        descriptions[engine_name] = {}
+        wikidata_id = getattr(engine, "about", {}).get('wikidata_id')
+        if wikidata_id is not None:
+            wd_to_engine_name.setdefault(wikidata_id, set()).add(engine_name)
+
+    IDS = ' '.join(list(map(lambda wd_id: 'wd:' + wd_id, wd_to_engine_name.keys())))
+
+
+def fetch_wikidata_descriptions():
+    global IDS
+    result = send_wikidata_query(SPARQL_DESCRIPTION
+                                 .replace('%IDS%', IDS)
+                                 .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
+    if result is not None:
+        for binding in result['results']['bindings']:
+            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+            lang = binding['itemDescription']['xml:lang']
+            description = binding['itemDescription']['value']
+            if ' ' in description:  # skip unique word description (like "website")
+                for engine_name in wd_to_engine_name[wikidata_id]:
+                    update_description(engine_name, lang, description, 'wikidata')
+
+
+def fetch_wikipedia_descriptions():
+    global IDS
+    result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE
+                                 .replace('%IDS%', IDS)
+                                 .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL))
+    if result is not None:
+        for binding in result['results']['bindings']:
+            wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '')
+            lang = binding['name']['xml:lang']
+            pageid = binding['name']['value']
+            description = get_wikipedia_summary(lang, pageid)
+            if description is not None and ' ' in description:
+                for engine_name in wd_to_engine_name[wikidata_id]:
+                    update_description(engine_name, lang, description, 'wikipedia')
+
+
+def normalize_url(url):
+    url = url.replace('{language}', 'en')
+    url = urlparse(url)._replace(path='/', params='', query='', fragment='').geturl()
+    url = url.replace('https://api.', 'https://')
+    return url
+
+
+def fetch_website_description(engine_name, website):
+    default_lang, default_description = get_website_description(website, None, None)
+    if default_lang is None or default_description is None:
+        return
+    if default_lang not in descriptions[engine_name]:
+        descriptions[engine_name][default_lang] = [normalize_description(default_description), website]
+    for request_lang in ('en-US', 'es-US', 'fr-FR', 'zh', 'ja', 'ru', 'ar', 'ko'):
+        if request_lang.split('-')[0] not in descriptions[engine_name]:
+            lang, desc = get_website_description(website, request_lang, request_lang.split('-')[0])
+            if desc is not None and desc != default_description:
+                update_description(engine_name, lang, desc, website, replace=False)
+            else:
+                break
+
+
+def fetch_website_descriptions():
+    for engine_name, engine in searx.engines.engines.items():
+        website = getattr(engine, "about", {}).get('website')
+        if website is None:
+            website = normalize_url(getattr(engine, "search_url"))
+        if website is None:
+            website = normalize_url(getattr(engine, "base_url"))
+        if website is not None:
+            fetch_website_description(engine_name, website)
+
+
+def main():
+    initialize()
+    fetch_wikidata_descriptions()
+    fetch_wikipedia_descriptions()
+    fetch_website_descriptions()
+
+    sys.stdout.write(json.dumps(descriptions, indent=1, separators=(',', ':'), ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    main()