tootfinder.py 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # lint: pylint
  3. """Tootfinder (social media)
  4. """
  5. from datetime import datetime
  6. from json import loads
  7. from searx.utils import html_to_text
  8. about = {
  9. 'website': "https://www.tootfinder.ch",
  10. 'official_api_documentation': "https://wiki.tootfinder.ch/index.php?name=the-tootfinder-rest-api",
  11. 'use_official_api': True,
  12. 'require_api_key': False,
  13. 'results': "JSON",
  14. }
  15. categories = ['social media']
  16. base_url = "https://www.tootfinder.ch"
  17. def request(query, params):
  18. params['url'] = f"{base_url}/rest/api/search/{query}"
  19. return params
  20. def response(resp):
  21. results = []
  22. # the API of tootfinder has an issue that errors on server side are appended to the API response as HTML
  23. # thus we're only looking for the line that contains the actual json data and ignore everything else
  24. json_str = ""
  25. for line in resp.text.split("\n"):
  26. if line.startswith("[{"):
  27. json_str = line
  28. break
  29. for result in loads(json_str):
  30. thumbnail = None
  31. attachments = result.get('media_attachments', [])
  32. images = [attachment['preview_url'] for attachment in attachments if attachment['type'] == 'image']
  33. if len(images) > 0:
  34. thumbnail = images[0]
  35. title = result.get('card', {}).get('title')
  36. if not title:
  37. title = html_to_text(result['content'])[:75]
  38. results.append(
  39. {
  40. 'url': result['url'],
  41. 'title': title,
  42. 'content': html_to_text(result['content']),
  43. 'thumbnail': thumbnail,
  44. 'publishedDate': datetime.strptime(result['created_at'], '%Y-%m-%d %H:%M:%S'),
  45. }
  46. )
  47. return results