sogou_images.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Sogou-Images: A search engine for retrieving images from Sogou."""
  3. import json
  4. import re
  5. from urllib.parse import urlencode
  6. # about
  7. about = {
  8. "website": "https://pic.sogou.com/",
  9. "wikidata_id": "Q7554565",
  10. "use_official_api": False,
  11. "require_api_key": False,
  12. "results": "HTML",
  13. }
  14. # engine dependent config
  15. categories = ["images"]
  16. paging = True
  17. base_url = "https://pic.sogou.com"
  18. def request(query, params):
  19. query_params = {
  20. "query": query,
  21. "start": (params["pageno"] - 1) * 48,
  22. }
  23. params["url"] = f"{base_url}/pics?{urlencode(query_params)}"
  24. return params
  25. def response(resp):
  26. results = []
  27. match = re.search(r'window\.__INITIAL_STATE__\s*=\s*({.*?});', resp.text, re.S)
  28. if not match:
  29. return results
  30. data = json.loads(match.group(1))
  31. if "searchList" in data and "searchList" in data["searchList"]:
  32. for item in data["searchList"]["searchList"]:
  33. results.append(
  34. {
  35. "template": "images.html",
  36. "url": item.get("url", ""),
  37. "thumbnail_src": item.get("picUrl", ""),
  38. "img_src": item.get("picUrl", ""),
  39. "content": item.get("content_major", ""),
  40. "title": item.get("title", ""),
  41. "source": item.get("ch_site_name", ""),
  42. }
  43. )
  44. return results