digg.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. """
  2. Digg (News, Social media)
  3. @website https://digg.com/
  4. @provide-api no
  5. @using-api no
  6. @results HTML (using search portal)
  7. @stable no (HTML can change)
  8. @parse url, title, content, publishedDate, thumbnail
  9. """
  10. import random
  11. import string
  12. from dateutil import parser
  13. from json import loads
  14. from urllib.parse import urlencode
  15. from lxml import html
  16. from datetime import datetime
  17. # engine dependent config
  18. categories = ['news', 'social media']
  19. paging = True
  20. # search-url
  21. base_url = 'https://digg.com/'
  22. search_url = base_url + 'api/search/?{query}&from={position}&size=20&format=html'
  23. # specific xpath variables
  24. results_xpath = '//article'
  25. link_xpath = './/small[@class="time"]//a'
  26. title_xpath = './/h2//a//text()'
  27. content_xpath = './/p//text()'
  28. pubdate_xpath = './/time'
  29. digg_cookie_chars = string.ascii_uppercase + string.ascii_lowercase +\
  30. string.digits + "+_"
  31. # do search-request
  32. def request(query, params):
  33. offset = (params['pageno'] - 1) * 20
  34. params['url'] = search_url.format(position=offset,
  35. query=urlencode({'q': query}))
  36. params['cookies']['frontend.auid'] = ''.join(random.choice(
  37. digg_cookie_chars) for _ in range(22))
  38. return params
  39. # get response from search-request
  40. def response(resp):
  41. results = []
  42. search_result = loads(resp.text)
  43. # parse results
  44. for result in search_result['mapped']:
  45. published = datetime.strptime(result['created']['ISO'], "%Y-%m-%d %H:%M:%S")
  46. # append result
  47. results.append({'url': result['url'],
  48. 'title': result['title'],
  49. 'content': result['excerpt'],
  50. 'template': 'videos.html',
  51. 'publishedDate': published,
  52. 'thumbnail': result['images']['thumbImage']})
  53. # return results
  54. return results