|
@@ -1,7 +1,7 @@
|
|
|
"""
|
|
|
Wikipedia (Web)
|
|
|
|
|
|
- @website https://{language}.wikipedia.org
|
|
|
+ @website https://en.wikipedia.org/api/rest_v1/
|
|
|
@provide-api yes
|
|
|
|
|
|
@using-api yes
|
|
@@ -12,21 +12,11 @@
|
|
|
|
|
|
from json import loads
|
|
|
from lxml.html import fromstring
|
|
|
-from searx.url_utils import quote, urlencode
|
|
|
-from searx.utils import match_language
|
|
|
+from searx.url_utils import quote
|
|
|
+from searx.utils import match_language, searx_useragent
|
|
|
|
|
|
|
|
|
-base_url = u'https://{language}.wikipedia.org/'
|
|
|
-search_url = base_url + u'w/api.php?'\
|
|
|
- 'action=query'\
|
|
|
- '&format=json'\
|
|
|
- '&{query}'\
|
|
|
- '&prop=extracts|pageimages|pageprops'\
|
|
|
- '&ppprop=disambiguation'\
|
|
|
- '&exintro'\
|
|
|
- '&explaintext'\
|
|
|
- '&pithumbsize=300'\
|
|
|
- '&redirects'
|
|
|
+search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}'
|
|
|
supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
|
|
|
|
|
|
|
|
@@ -41,51 +31,37 @@ def url_lang(lang):
|
|
|
|
|
|
def request(query, params):
|
|
|
if query.islower():
|
|
|
- query = u'{0}|{1}'.format(query.decode('utf-8'), query.decode('utf-8').title()).encode('utf-8')
|
|
|
+ query = query.title()
|
|
|
|
|
|
- params['url'] = search_url.format(query=urlencode({'titles': query}),
|
|
|
+ params['url'] = search_url.format(title=quote(query),
|
|
|
language=url_lang(params['language']))
|
|
|
|
|
|
+ params['headers']['User-Agent'] = searx_useragent()
|
|
|
+
|
|
|
return params
|
|
|
|
|
|
|
|
|
|
|
|
def response(resp):
|
|
|
- results = []
|
|
|
-
|
|
|
- search_result = loads(resp.text)
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- if 'pages' not in search_result['query']:
|
|
|
- return results
|
|
|
-
|
|
|
- for article_id in search_result['query']['pages']:
|
|
|
- page = search_result['query']['pages'][article_id]
|
|
|
- if int(article_id) > 0:
|
|
|
- break
|
|
|
-
|
|
|
- if int(article_id) < 0 or 'disambiguation' in page.get('pageprops', {}):
|
|
|
+ if not resp.ok:
|
|
|
return []
|
|
|
|
|
|
- title = page.get('title')
|
|
|
-
|
|
|
- image = page.get('thumbnail')
|
|
|
- if image:
|
|
|
- image = image.get('source')
|
|
|
+ results = []
|
|
|
+ api_result = loads(resp.text)
|
|
|
|
|
|
- summary = page.get('extract', '').split('\n')[0].replace('()', '')
|
|
|
+
|
|
|
+ if api_result['type'] != 'standard':
|
|
|
+ return []
|
|
|
|
|
|
-
|
|
|
- wikipedia_link = base_url.format(language=url_lang(resp.search_params['language'])) \
|
|
|
- + 'wiki/' + quote(title.replace(' ', '_').encode('utf8'))
|
|
|
+ title = api_result['title']
|
|
|
+ wikipedia_link = api_result['content_urls']['desktop']['page']
|
|
|
|
|
|
results.append({'url': wikipedia_link, 'title': title})
|
|
|
|
|
|
results.append({'infobox': title,
|
|
|
'id': wikipedia_link,
|
|
|
- 'content': summary,
|
|
|
- 'img_src': image,
|
|
|
+ 'content': api_result.get('extract', ''),
|
|
|
+ 'img_src': api_result.get('thumbnail', {}).get('source'),
|
|
|
'urls': [{'title': 'Wikipedia', 'url': wikipedia_link}]})
|
|
|
|
|
|
return results
|