doku.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. # Doku Wiki
  2. #
  3. # @website https://www.dokuwiki.org/
  4. # @provide-api yes
  5. # (https://www.dokuwiki.org/devel:xmlrpc)
  6. #
  7. # @using-api no
  8. # @results HTML
  9. # @stable yes
  10. # @parse (general) url, title, content
  11. from urllib.parse import urlencode
  12. from lxml.html import fromstring
  13. from searx.engines.xpath import extract_text
  14. from searx.utils import eval_xpath
  15. # engine dependent config
  16. categories = ['general'] # TODO , 'images', 'music', 'videos', 'files'
  17. paging = False
  18. language_support = False
  19. number_of_results = 5
  20. # search-url
  21. # Doku is OpenSearch compatible
  22. base_url = 'http://localhost:8090'
  23. search_url = '/?do=search'\
  24. '&{query}'
  25. # TODO '&startRecord={offset}'\
  26. # TODO '&maximumRecords={limit}'\
  27. # do search-request
  28. def request(query, params):
  29. params['url'] = base_url +\
  30. search_url.format(query=urlencode({'id': query}))
  31. return params
  32. # get response from search-request
  33. def response(resp):
  34. results = []
  35. doc = fromstring(resp.text)
  36. # parse results
  37. # Quickhits
  38. for r in eval_xpath(doc, '//div[@class="search_quickresult"]/ul/li'):
  39. try:
  40. res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
  41. except:
  42. continue
  43. if not res_url:
  44. continue
  45. title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
  46. # append result
  47. results.append({'title': title,
  48. 'content': "",
  49. 'url': base_url + res_url})
  50. # Search results
  51. for r in eval_xpath(doc, '//dl[@class="search_results"]/*'):
  52. try:
  53. if r.tag == "dt":
  54. res_url = eval_xpath(r, './/a[@class="wikilink1"]/@href')[-1]
  55. title = extract_text(eval_xpath(r, './/a[@class="wikilink1"]/@title'))
  56. elif r.tag == "dd":
  57. content = extract_text(eval_xpath(r, '.'))
  58. # append result
  59. results.append({'title': title,
  60. 'content': content,
  61. 'url': base_url + res_url})
  62. except:
  63. continue
  64. if not res_url:
  65. continue
  66. # return results
  67. return results