petal_images.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Petalsearch Images
  3. """
  4. from json import loads
  5. from urllib.parse import urlencode
  6. from datetime import datetime
  7. from lxml import html
  8. from searx.utils import extract_text
  9. about = {
  10. "website": 'https://petalsearch.com/',
  11. "wikidata_id": 'Q104399280',
  12. "official_api_documentation": False,
  13. "use_official_api": False,
  14. "require_api_key": False,
  15. "results": 'JSON',
  16. }
  17. categories = ['images']
  18. paging = True
  19. time_range_support = False
  20. safesearch = True
  21. safesearch_table = {0: 'off', 1: 'moderate', 2: 'on'}
  22. base_url = 'https://petalsearch.com/'
  23. search_string = 'search?{query}&channel=image&ps=50&pn={page}&region={lang}&ss_mode={safesearch}&ss_type=normal'
  24. def request(query, params):
  25. search_path = search_string.format(
  26. query=urlencode({'query': query}),
  27. page=params['pageno'],
  28. lang=params['language'].lower(),
  29. safesearch=safesearch_table[params['safesearch']],
  30. )
  31. params['url'] = base_url + search_path
  32. return params
  33. def response(resp):
  34. results = []
  35. tree = html.fromstring(resp.text)
  36. root = tree.findall('.//script[3]')
  37. # Convert list to JSON
  38. json_content = extract_text(root)
  39. # Manipulate with JSON
  40. data = loads(json_content)
  41. for result in data['newImages']:
  42. url = result['url']
  43. title = result['title']
  44. thumbnail_src = result['image']
  45. pic_dict = result.get('extrainfo')
  46. date_from_api = pic_dict.get('publish_time')
  47. width = pic_dict.get('width')
  48. height = pic_dict.get('height')
  49. img_src = pic_dict.get('real_url')
  50. # Continue if img_src is missing
  51. if img_src is None or '':
  52. continue
  53. # Get and convert published date
  54. if date_from_api is not None:
  55. publishedDate = datetime.fromtimestamp(int(date_from_api))
  56. # Append results
  57. results.append(
  58. {
  59. 'template': 'images.html',
  60. 'url': url,
  61. 'title': title,
  62. 'img_src': img_src,
  63. 'thumbnail_src': thumbnail_src,
  64. 'width': width,
  65. 'height': height,
  66. 'publishedDate': publishedDate,
  67. }
  68. )
  69. return results