not_evil.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. """
  2. not Evil (Onions)
  3. @website http://hss3uro2hsxfogfq.onion
  4. @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm)
  5. @using-api no
  6. @results HTML
  7. @stable no
  8. @parse url, title, content
  9. """
  10. from urllib.parse import urlencode
  11. from lxml import html
  12. from searx.engines.xpath import extract_text
  13. # engine dependent config
  14. categories = ['onions']
  15. paging = True
  16. page_size = 20
  17. # search-url
  18. base_url = 'http://hss3uro2hsxfogfq.onion/'
  19. search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}'
  20. # specific xpath variables
  21. results_xpath = '//*[@id="content"]/div/p'
  22. url_xpath = './span[1]'
  23. title_xpath = './a[1]'
  24. content_xpath = './text()'
  25. # do search-request
  26. def request(query, params):
  27. offset = (params['pageno'] - 1) * page_size
  28. params['url'] = base_url + search_url.format(pageno=offset,
  29. query=urlencode({'q': query}),
  30. page_size=page_size)
  31. return params
  32. # get response from search-request
  33. def response(resp):
  34. results = []
  35. # needed because otherwise requests guesses wrong encoding
  36. resp.encoding = 'utf8'
  37. dom = html.fromstring(resp.text)
  38. # parse results
  39. for result in dom.xpath(results_xpath):
  40. url = extract_text(result.xpath(url_xpath)[0])
  41. title = extract_text(result.xpath(title_xpath)[0])
  42. content = extract_text(result.xpath(content_xpath))
  43. # append result
  44. results.append({'url': url,
  45. 'title': title,
  46. 'content': content,
  47. 'is_onion': True})
  48. return results