www1x.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. ## 1x (Images)
  2. #
  3. # @website http://1x.com/
  4. # @provide-api no
  5. #
  6. # @using-api no
  7. # @results HTML
  8. # @stable no (HTML can change)
  9. # @parse url, title, thumbnail, img_src, content
  10. from urllib import urlencode
  11. from urlparse import urljoin
  12. from lxml import html
  13. import string
  14. import re
  15. # engine dependent config
  16. categories = ['images']
  17. paging = False
  18. # search-url
  19. base_url = 'http://1x.com'
  20. search_url = base_url+'/backend/search.php?{query}'
  21. # do search-request
  22. def request(query, params):
  23. params['url'] = search_url.format(query=urlencode({'q': query}))
  24. return params
  25. # get response from search-request
  26. def response(resp):
  27. results = []
  28. # get links from result-text
  29. results_parts = re.split(r'(</a>|<a)', resp.text)
  30. cur_element = ''
  31. # iterate over link parts
  32. for result_part in results_parts:
  33. # processed start and end of link
  34. if result_part == '<a':
  35. cur_element = result_part
  36. continue
  37. elif result_part != '</a>':
  38. cur_element += result_part
  39. continue
  40. cur_element += result_part
  41. # fix xml-error
  42. cur_element = string.replace(cur_element, '"></a>', '"/></a>')
  43. dom = html.fromstring(cur_element)
  44. link = dom.xpath('//a')[0]
  45. url = urljoin(base_url, link.attrib.get('href'))
  46. title = link.attrib.get('title', '')
  47. thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
  48. # TODO: get image with higher resolution
  49. img_src = thumbnail_src
  50. # check if url is showing to a photo
  51. if '/photo/' not in url:
  52. continue
  53. # append result
  54. results.append({'url': url,
  55. 'title': title,
  56. 'img_src': img_src,
  57. 'content': '',
  58. 'thumbnail_src': thumbnail_src,
  59. 'template': 'images.html'})
  60. # return results
  61. return results