flickr_noapi.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. #!/usr/bin/env python
  2. """
  3. Flickr (Images)
  4. @website https://www.flickr.com
  5. @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)
  6. @using-api no
  7. @results HTML
  8. @stable no
  9. @parse url, title, thumbnail, img_src
  10. """
  11. from json import loads
  12. from time import time
  13. import re
  14. from urllib.parse import urlencode
  15. from searx.engines import logger
  16. from searx.utils import ecma_unescape, html_to_text
  17. logger = logger.getChild('flickr-noapi')
  18. categories = ['images']
  19. url = 'https://www.flickr.com/'
  20. search_url = url + 'search?{query}&page={page}'
  21. time_range_url = '&min_upload_date={start}&max_upload_date={end}'
  22. photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}'
  23. modelexport_re = re.compile(r"^\s*modelExport:\s*({.*}),$", re.M)
  24. image_sizes = ('o', 'k', 'h', 'b', 'c', 'z', 'n', 'm', 't', 'q', 's')
  25. paging = True
  26. time_range_support = True
  27. time_range_dict = {'day': 60 * 60 * 24,
  28. 'week': 60 * 60 * 24 * 7,
  29. 'month': 60 * 60 * 24 * 7 * 4,
  30. 'year': 60 * 60 * 24 * 7 * 52}
  31. def build_flickr_url(user_id, photo_id):
  32. return photo_url.format(userid=user_id, photoid=photo_id)
  33. def _get_time_range_url(time_range):
  34. if time_range in time_range_dict:
  35. return time_range_url.format(start=time(), end=str(int(time()) - time_range_dict[time_range]))
  36. return ''
  37. def request(query, params):
  38. params['url'] = (search_url.format(query=urlencode({'text': query}), page=params['pageno'])
  39. + _get_time_range_url(params['time_range']))
  40. return params
  41. def response(resp):
  42. results = []
  43. matches = modelexport_re.search(resp.text)
  44. if matches is None:
  45. return results
  46. match = matches.group(1)
  47. model_export = loads(match)
  48. if 'legend' not in model_export:
  49. return results
  50. legend = model_export['legend']
  51. # handle empty page
  52. if not legend or not legend[0]:
  53. return results
  54. for index in legend:
  55. photo = model_export['main'][index[0]][int(index[1])][index[2]][index[3]][int(index[4])]
  56. author = ecma_unescape(photo.get('realname', ''))
  57. source = ecma_unescape(photo.get('username', '')) + ' @ Flickr'
  58. title = ecma_unescape(photo.get('title', ''))
  59. content = html_to_text(ecma_unescape(photo.get('description', '')))
  60. img_src = None
  61. # From the biggest to the lowest format
  62. for image_size in image_sizes:
  63. if image_size in photo['sizes']:
  64. img_src = photo['sizes'][image_size]['url']
  65. img_format = 'jpg ' \
  66. + str(photo['sizes'][image_size]['width']) \
  67. + 'x' \
  68. + str(photo['sizes'][image_size]['height'])
  69. break
  70. if not img_src:
  71. logger.debug('cannot find valid image size: {0}'.format(repr(photo)))
  72. continue
  73. # For a bigger thumbnail, keep only the url_z, not the url_n
  74. if 'n' in photo['sizes']:
  75. thumbnail_src = photo['sizes']['n']['url']
  76. elif 'z' in photo['sizes']:
  77. thumbnail_src = photo['sizes']['z']['url']
  78. else:
  79. thumbnail_src = img_src
  80. if 'ownerNsid' not in photo:
  81. # should not happen, disowned photo? Show it anyway
  82. url = img_src
  83. else:
  84. url = build_flickr_url(photo['ownerNsid'], photo['id'])
  85. result = {
  86. 'url': url,
  87. 'img_src': img_src,
  88. 'thumbnail_src': thumbnail_src,
  89. 'source': source,
  90. 'img_format': img_format,
  91. 'template': 'images.html'
  92. }
  93. result['author'] = author.encode(errors='ignore').decode()
  94. result['source'] = source.encode(errors='ignore').decode()
  95. result['title'] = title.encode(errors='ignore').decode()
  96. result['content'] = content.encode(errors='ignore').decode()
  97. results.append(result)
  98. return results