sogou.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Sogou search engine for searxng"""
  3. from urllib.parse import urlencode
  4. from lxml import html
  5. from searx.utils import extract_text
  6. # Metadata
  7. about = {
  8. "website": "https://www.sogou.com/",
  9. "wikidata_id": "Q7554565",
  10. "use_official_api": False,
  11. "require_api_key": False,
  12. "results": "HTML",
  13. }
  14. # Engine Configuration
  15. categories = ["general"]
  16. paging = True
  17. time_range_support = True
  18. time_range_dict = {'day': 'inttime_day', 'week': 'inttime_week', 'month': 'inttime_month', 'year': 'inttime_year'}
  19. # Base URL
  20. base_url = "https://www.sogou.com"
  21. def request(query, params):
  22. query_params = {
  23. "query": query,
  24. "page": params["pageno"],
  25. }
  26. if time_range_dict.get(params['time_range']):
  27. query_params["s_from"] = time_range_dict.get(params['time_range'])
  28. query_params["tsn"] = 1
  29. params["url"] = f"{base_url}/web?{urlencode(query_params)}"
  30. return params
  31. def response(resp):
  32. dom = html.fromstring(resp.text)
  33. results = []
  34. for item in dom.xpath('//div[contains(@class, "vrwrap")]'):
  35. title = extract_text(item.xpath('.//h3[contains(@class, "vr-title")]/a'))
  36. url = extract_text(item.xpath('.//h3[contains(@class, "vr-title")]/a/@href'))
  37. if url.startswith("/link?url="):
  38. url = f"{base_url}{url}"
  39. content = extract_text(item.xpath('.//div[contains(@class, "text-layout")]//p[contains(@class, "star-wiki")]'))
  40. if not content:
  41. content = extract_text(item.xpath('.//div[contains(@class, "fz-mid space-txt")]'))
  42. if title and url:
  43. results.append(
  44. {
  45. "title": title,
  46. "url": url,
  47. "content": content,
  48. }
  49. )
  50. return results