niconico.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Niconico search engine for searxng"""
  3. from urllib.parse import urlencode
  4. from datetime import datetime, timedelta
  5. from lxml import html
  6. from searx.utils import eval_xpath_getindex, eval_xpath_list, eval_xpath, extract_text
  7. about = {
  8. "website": "https://www.nicovideo.jp/",
  9. "wikidata_id": "Q697233",
  10. "use_official_api": False,
  11. "require_api_key": False,
  12. "results": "HTML",
  13. "language": "ja",
  14. }
  15. categories = ["videos"]
  16. paging = True
  17. time_range_support = True
  18. time_range_dict = {"day": 1, "week": 7, "month": 30, "year": 365}
  19. base_url = "https://www.nicovideo.jp"
  20. embed_url = "https://embed.nicovideo.jp"
  21. results_xpath = '//li[@data-video-item]'
  22. url_xpath = './/a[@class="itemThumbWrap"]/@href'
  23. video_length_xpath = './/span[@class="videoLength"]'
  24. upload_time_xpath = './/p[@class="itemTime"]//span[@class="time"]/text()'
  25. title_xpath = './/p[@class="itemTitle"]/a'
  26. content_xpath = './/p[@class="itemDescription"]/@title'
  27. thumbnail_xpath = './/img[@class="thumb"]/@src'
  28. def request(query, params):
  29. query_params = {"page": params['pageno']}
  30. if time_range_dict.get(params['time_range']):
  31. time_diff_days = time_range_dict[params['time_range']]
  32. start_date = datetime.now() - timedelta(days=time_diff_days)
  33. query_params['start'] = start_date.strftime('%Y-%m-%d')
  34. params['url'] = f"{base_url}/search/{query}?{urlencode(query_params)}"
  35. return params
  36. def response(resp):
  37. results = []
  38. dom = html.fromstring(resp.text)
  39. for item in eval_xpath_list(dom, results_xpath):
  40. relative_url = eval_xpath_getindex(item, url_xpath, 0)
  41. video_id = relative_url.rsplit('?', maxsplit=1)[0].split('/')[-1]
  42. url = f"{base_url}/watch/{video_id}"
  43. iframe_src = f"{embed_url}/watch/{video_id}"
  44. length = None
  45. video_length = eval_xpath_getindex(item, video_length_xpath, 0)
  46. if len(video_length) > 0:
  47. try:
  48. timediff = datetime.strptime(video_length, "%M:%S")
  49. length = timedelta(minutes=timediff.minute, seconds=timediff.second)
  50. except ValueError:
  51. pass
  52. published_date = None
  53. upload_time = eval_xpath_getindex(item, upload_time_xpath, 0)
  54. if len(upload_time) > 0:
  55. try:
  56. published_date = datetime.strptime(upload_time, "%Y/%m/%d %H:%M")
  57. except ValueError:
  58. pass
  59. results.append(
  60. {
  61. 'template': 'videos.html',
  62. 'title': extract_text(eval_xpath(item, title_xpath)),
  63. 'content': eval_xpath_getindex(item, content_xpath, 0),
  64. 'url': url,
  65. "iframe_src": iframe_src,
  66. 'thumbnail': eval_xpath_getindex(item, thumbnail_xpath, 0),
  67. 'length': length,
  68. "publishedDate": published_date,
  69. }
  70. )
  71. return results