dictzone.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. import re
  2. from urlparse import urljoin
  3. from lxml import html
  4. from cgi import escape
  5. from searx.engines.xpath import extract_text
  6. from searx.languages import language_codes
  7. categories = ['general']
  8. url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
  9. weight = 100
  10. parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.+)', re.I)
  11. results_xpath = './/table[@id="r"]/tr'
  12. def is_valid_lang(lang):
  13. is_abbr = (len(lang) == 2)
  14. if is_abbr:
  15. for l in language_codes:
  16. if l[0][:2] == lang.lower():
  17. return (True, l[1].lower())
  18. return False
  19. else:
  20. for l in language_codes:
  21. if l[1].lower() == lang.lower():
  22. return (True, l[1].lower())
  23. return False
  24. def request(query, params):
  25. m = parser_re.match(unicode(query, 'utf8'))
  26. if not m:
  27. return params
  28. from_lang, to_lang, query = m.groups()
  29. from_lang = is_valid_lang(from_lang)
  30. to_lang = is_valid_lang(to_lang)
  31. if not from_lang or not to_lang:
  32. return params
  33. params['url'] = url.format(from_lang=from_lang[1],
  34. to_lang=to_lang[1],
  35. query=query)
  36. return params
  37. def response(resp):
  38. results = []
  39. dom = html.fromstring(resp.text)
  40. for k, result in enumerate(dom.xpath(results_xpath)[1:]):
  41. try:
  42. from_result, to_results_raw = result.xpath('./td')
  43. except:
  44. continue
  45. to_results = []
  46. for to_result in to_results_raw.xpath('./p/a'):
  47. t = to_result.text_content()
  48. if t.strip():
  49. to_results.append(to_result.text_content())
  50. results.append({
  51. 'url': urljoin(resp.url, '?%d' % k),
  52. 'title': escape(from_result.text_content()),
  53. 'content': escape('; '.join(to_results))
  54. })
  55. return results