logo

oasis-root

Compiled tree of Oasis Linux based on own branch at <https://hacktivis.me/git/oasis/> git clone https://anongit.hacktivis.me/git/oasis-root.git

kenh14.py (7063B)


  1. from .common import InfoExtractor
  2. from ..utils import (
  3. clean_html,
  4. extract_attributes,
  5. get_element_by_class,
  6. get_element_html_by_attribute,
  7. get_elements_html_by_class,
  8. int_or_none,
  9. parse_duration,
  10. parse_iso8601,
  11. remove_start,
  12. strip_or_none,
  13. unescapeHTML,
  14. update_url,
  15. url_or_none,
  16. )
  17. from ..utils.traversal import traverse_obj
  18. class Kenh14VideoIE(InfoExtractor):
  19. _VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
  20. _TESTS = [{
  21. 'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
  22. 'md5': '1ed67f9c3a1e74acf15db69590cf6210',
  23. 'info_dict': {
  24. 'id': '316173',
  25. 'ext': 'mp4',
  26. 'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
  27. 'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
  28. 'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
  29. 'tags': [],
  30. 'uploader': 'Unbox Therapy',
  31. 'upload_date': '20220517',
  32. 'view_count': int,
  33. 'duration': 722.86,
  34. 'timestamp': 1652764468,
  35. },
  36. }, {
  37. 'url': 'https://video.kenh14.vn/video-316174.chn',
  38. 'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
  39. 'info_dict': {
  40. 'id': '316174',
  41. 'ext': 'mp4',
  42. 'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
  43. 'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
  44. 'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
  45. 'tags': [],
  46. 'upload_date': '20220517',
  47. 'view_count': int,
  48. 'duration': 70.04,
  49. 'timestamp': 1652766021,
  50. },
  51. }, {
  52. 'url': 'https://video.kenh14.vn/0-344740.chn',
  53. 'md5': 'b843495d5e728142c8870c09b46df2a9',
  54. 'info_dict': {
  55. 'id': '344740',
  56. 'ext': 'mov',
  57. 'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
  58. 'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
  59. 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
  60. 'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
  61. 'uploader': 'Quang Vũ',
  62. 'upload_date': '20241024',
  63. 'view_count': int,
  64. 'duration': 198.88,
  65. 'timestamp': 1729741590,
  66. },
  67. }]
  68. def _real_extract(self, url):
  69. video_id = self._match_id(url)
  70. webpage = self._download_webpage(url, video_id)
  71. attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
  72. direct_url = attrs['data-vid']
  73. metadata = self._download_json(
  74. 'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
  75. remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
  76. formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
  77. subtitles = {}
  78. video_data = self._download_json(
  79. f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
  80. if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
  81. fmts, subs = self._extract_m3u8_formats_and_subtitles(
  82. hls_url, video_id, m3u8_id='hls', fatal=False)
  83. formats.extend(fmts)
  84. self._merge_subtitles(subs, target=subtitles)
  85. if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
  86. fmts, subs = self._extract_mpd_formats_and_subtitles(
  87. dash_url, video_id, mpd_id='dash', fatal=False)
  88. formats.extend(fmts)
  89. self._merge_subtitles(subs, target=subtitles)
  90. return {
  91. **traverse_obj(metadata, {
  92. 'duration': ('duration', {parse_duration}),
  93. 'uploader': ('author', {strip_or_none}),
  94. 'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
  95. 'view_count': ('views', {int_or_none}),
  96. }),
  97. 'id': video_id,
  98. 'title': (
  99. traverse_obj(metadata, ('title', {strip_or_none}))
  100. or clean_html(self._og_search_title(webpage))
  101. or clean_html(get_element_by_class('vdbw-title', webpage))),
  102. 'formats': formats,
  103. 'subtitles': subtitles,
  104. 'description': (
  105. clean_html(self._og_search_description(webpage))
  106. or clean_html(get_element_by_class('vdbw-sapo', webpage))),
  107. 'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
  108. 'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
  109. {lambda x: x.split(';')}, ..., filter)),
  110. }
  111. class Kenh14PlaylistIE(InfoExtractor):
  112. _VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
  113. _TESTS = [{
  114. 'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
  115. 'info_dict': {
  116. 'id': '71',
  117. 'title': 'Trần Tình (Naked love) mùa 2',
  118. 'description': 'md5:e9522339304956dea931722dd72eddb2',
  119. 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
  120. },
  121. 'playlist_count': 9,
  122. }, {
  123. 'url': 'https://video.kenh14.vn/playlist/0-72.chn',
  124. 'info_dict': {
  125. 'id': '72',
  126. 'title': 'Lau Lại Đầu Từ',
  127. 'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
  128. 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
  129. },
  130. 'playlist_count': 6,
  131. }]
  132. def _real_extract(self, url):
  133. playlist_id = self._match_id(url)
  134. webpage = self._download_webpage(url, playlist_id)
  135. category_detail = get_element_by_class('category-detail', webpage) or ''
  136. embed_info = traverse_obj(
  137. self._yield_json_ld(webpage, playlist_id),
  138. (lambda _, v: v['name'] and v['alternateName'], any)) or {}
  139. return self.playlist_from_matches(
  140. get_elements_html_by_class('video-item', webpage), playlist_id,
  141. (clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
  142. getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
  143. ie=Kenh14VideoIE, playlist_description=(
  144. clean_html(get_element_by_class('description', category_detail))
  145. or unescapeHTML(embed_info.get('alternateName'))),
  146. thumbnail=traverse_obj(
  147. self._og_search_thumbnail(webpage),
  148. ({url_or_none}, {update_url(query=None)})))