dictionary.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. import re
  2. from urlparse import urljoin
  3. from lxml import html
  4. from searx.engines.xpath import extract_text
  5. from searx.languages import language_codes
  6. categories = ['general']
  7. url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
  8. weight = 100
  9. parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.+)', re.I)
  10. results_xpath = './/table[@id="r"]/tr'
  11. def request(query, params):
  12. m = parser_re.match(unicode(query, 'utf8'))
  13. if not m:
  14. return params
  15. from_lang, to_lang, query = m.groups()
  16. if len(from_lang) == 2:
  17. lan = filter(lambda x: x[0][:2] == from_lang, language_codes)
  18. if lan:
  19. from_lang = lan[0][1].lower()
  20. else:
  21. return params
  22. if len(to_lang) == 2:
  23. lan = filter(lambda x: x[0][:2] == to_lang, language_codes)
  24. if lan:
  25. to_lang = lan[0][1].lower()
  26. else:
  27. return params
  28. params['url'] = url.format(from_lang=from_lang, to_lang=to_lang,query=query)
  29. params['from_lang'] = from_lang
  30. params['to_lang'] = to_lang
  31. params['query'] = query
  32. return params
  33. def response(resp):
  34. results = []
  35. dom = html.fromstring(resp.text)
  36. for k, result in enumerate(dom.xpath(results_xpath)[1:]):
  37. try:
  38. from_result, to_results_raw = result.xpath('./td')
  39. except:
  40. continue
  41. to_results = []
  42. for to_result in to_results_raw.xpath('./p/a'):
  43. t = to_result.text_content()
  44. if t.strip():
  45. to_results.append(to_result.text_content())
  46. results.append({
  47. 'url': urljoin(resp.url, '?%d' % k),
  48. 'title': from_result.text_content(),
  49. 'content': '; '.join(to_results)
  50. })
  51. return results