From 6f4fc5660f40f3458882a8f51601eae4af7be609 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 8 Mar 2023 06:37:34 -0600 Subject: [PATCH] [extractor/chilloutzone] Fix extractor (#6445) Closes #6029 Authored by: bashonly --- yt_dlp/extractor/chilloutzone.py | 128 +++++++++++++++++++------------ 1 file changed, 79 insertions(+), 49 deletions(-) diff --git a/yt_dlp/extractor/chilloutzone.py b/yt_dlp/extractor/chilloutzone.py index 1a2f77c4e..ac4252f1b 100644 --- a/yt_dlp/extractor/chilloutzone.py +++ b/yt_dlp/extractor/chilloutzone.py @@ -1,93 +1,123 @@ -import json +import base64 from .common import InfoExtractor -from .youtube import YoutubeIE -from ..compat import compat_b64decode from ..utils import ( clean_html, - ExtractorError + int_or_none, + traverse_obj, ) class ChilloutzoneIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P[\w|-]+)\.html' + _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P[\w-]+)\.html' _TESTS = [{ - 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', + 'url': 'https://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', 'md5': 'a76f3457e813ea0037e5244f509e66d1', 'info_dict': { 'id': 'enemene-meck-alle-katzen-weg', 'ext': 'mp4', 'title': 'Enemene Meck - Alle Katzen weg', 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?', + 'duration': 24, }, }, { 'note': 'Video hosted at YouTube', - 'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html', + 'url': 'https://www.chilloutzone.net/video/eine-sekunde-bevor.html', 'info_dict': { 'id': '1YVQaAgHyRU', 'ext': 'mp4', 'title': '16 Photos Taken 1 Second Before Disaster', 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814', 'uploader': 'BuzzFeedVideo', - 'uploader_id': 'BuzzFeedVideo', + 'uploader_id': '@BuzzFeedVideo', 'upload_date': '20131105', + 'availability': 'public', + 'thumbnail': 'https://i.ytimg.com/vi/1YVQaAgHyRU/maxresdefault.jpg', + 'tags': 'count:41', + 'like_count': int, + 'playable_in_embed': True, + 'channel_url': 'https://www.youtube.com/channel/UCpko_-a4wgz2u_DgDgd9fqA', + 'chapters': 'count:6', + 'live_status': 'not_live', + 'view_count': int, + 'categories': ['Entertainment'], + 'age_limit': 0, + 'channel_id': 'UCpko_-a4wgz2u_DgDgd9fqA', + 'duration': 100, + 'uploader_url': 'http://www.youtube.com/@BuzzFeedVideo', + 'channel_follower_count': int, + 'channel': 'BuzzFeedVideo', }, }, { - 'note': 'Video hosted at Vimeo', - 'url': 'http://www.chilloutzone.net/video/icon-blending.html', - 'md5': '2645c678b8dc4fefcc0e1b60db18dac1', + 'url': 'https://www.chilloutzone.net/video/icon-blending.html', + 'md5': '2f9d6850ec567b24f0f4fa143b9aa2f9', 'info_dict': { - 'id': '85523671', + 'id': 'LLNkHpSjBfc', 'ext': 'mp4', - 'title': 'The Sunday Times - Icons', - 'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}', - 'uploader': 'Us', - 'uploader_id': 'usfilms', - 'upload_date': '20140131' + 'title': 'The Sunday Times Making of Icons', + 'description': 'md5:b9259fcf63a1669e42001e5db677f02a', + 'uploader': 'MadFoxUA', + 'uploader_id': '@MadFoxUA', + 'upload_date': '20140204', + 'channel_id': 'UCSZa9Y6-Vl7c11kWMcbAfCw', + 'channel_url': 'https://www.youtube.com/channel/UCSZa9Y6-Vl7c11kWMcbAfCw', + 'comment_count': int, + 'uploader_url': 'http://www.youtube.com/@MadFoxUA', + 'duration': 66, + 'live_status': 'not_live', + 'channel_follower_count': int, + 'playable_in_embed': True, + 'view_count': int, + 'like_count': int, + 'thumbnail': 'https://i.ytimg.com/vi/LLNkHpSjBfc/maxresdefault.jpg', + 'categories': ['Comedy'], + 'availability': 'public', + 'tags': [], + 'channel': 'MadFoxUA', + 'age_limit': 0, + }, + }, { + 'url': 'https://www.chilloutzone.net/video/ordentlich-abgeschuettelt.html', + 'info_dict': { + 'id': 'ordentlich-abgeschuettelt', + 'ext': 'mp4', + 'title': 'Ordentlich abgeschüttelt', + 'description': 'md5:d41541966b75d3d1e8ea77a94ea0d329', + 'duration': 18, }, }] def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + b64_data = self._html_search_regex( + r'var cozVidData\s*=\s*"([^"]+)"', webpage, 'video data') + info = self._parse_json(base64.b64decode(b64_data).decode(), video_id) - base64_video_info = self._html_search_regex( - r'var cozVidData = "(.+?)";', webpage, 'video data') - decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8') - video_info_dict = json.loads(decoded_video_info) + video_url = info.get('mediaUrl') + native_platform = info.get('nativePlatform') - # get video information from dict - video_url = video_info_dict['mediaUrl'] - description = clean_html(video_info_dict.get('description')) - title = video_info_dict['title'] - native_platform = video_info_dict['nativePlatform'] - native_video_id = video_info_dict['nativeVideoId'] - source_priority = video_info_dict['sourcePriority'] - - # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) - if native_platform is None: - youtube_url = YoutubeIE._extract_url(webpage) - if youtube_url: - return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) - - # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or - # the own CDN - if source_priority == 'native': + if native_platform and info.get('sourcePriority') == 'native': + native_video_id = info['nativeVideoId'] if native_platform == 'youtube': - return self.url_result(native_video_id, ie='Youtube') - if native_platform == 'vimeo': - return self.url_result( - 'http://vimeo.com/' + native_video_id, ie='Vimeo') + return self.url_result(native_video_id, 'Youtube') + elif native_platform == 'vimeo': + return self.url_result(f'https://vimeo.com/{native_video_id}', 'Vimeo') - if not video_url: - raise ExtractorError('No video found') + elif not video_url: + # Possibly a standard youtube embed? + # TODO: Investigate if site still does this (there are no tests for it) + return self.url_result(url, 'Generic') return { 'id': video_id, 'url': video_url, 'ext': 'mp4', - 'title': title, - 'description': description, + **traverse_obj(info, { + 'title': 'title', + 'description': ('description', {clean_html}), + 'duration': ('videoLength', {int_or_none}), + 'width': ('videoWidth', {int_or_none}), + 'height': ('videoHeight', {int_or_none}), + }), }