dictionary.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. import re
  2. from lxml import html
  3. from searx.engines.xpath import extract_text
  4. from searx.languages import language_codes
  5. categories = ['general']
  6. url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
  7. weight = 100
  8. parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.+)', re.I)
  9. results_xpath = './/table[@id="r"]/tr'
  10. def request(query, params):
  11. m = parser_re.match(unicode(query, 'utf8'))
  12. if not m:
  13. return params
  14. from_lang, to_lang, query = m.groups()
  15. if len(from_lang) == 2:
  16. lan = filter(lambda x: x[0][:2] == from_lang, language_codes)
  17. if lan:
  18. from_lang = lan[0][1].lower()
  19. else:
  20. return params
  21. if len(to_lang) == 2:
  22. lan = filter(lambda x: x[0][:2] == to_lang, language_codes)
  23. if lan:
  24. to_lang = lan[0][1].lower()
  25. else:
  26. return params
  27. params['url'] = url.format(from_lang=from_lang, to_lang=to_lang,query=query)
  28. params['from_lang'] = from_lang
  29. params['to_lang'] = to_lang
  30. params['query'] = query
  31. return params
  32. def response(resp):
  33. results = []
  34. dom = html.fromstring(resp.text)
  35. for result in dom.xpath(results_xpath)[1:]:
  36. try:
  37. from_result, to_results_raw = result.xpath('./td')
  38. except:
  39. continue
  40. to_results = []
  41. for to_result in to_results_raw.xpath('./p/a'):
  42. t = to_result.text_content()
  43. if t.strip():
  44. to_results.append(to_result.text_content())
  45. results.append({
  46. 'url': resp.url,
  47. 'title': from_result.text_content(),
  48. 'content': '; '.join(to_results)
  49. })
  50. return results