[extractor/vlive] Replace with `VLiveWebArchiveIE` (#6196)

vlive has shut down: https://web.archive.org/web/20221031171019/https://www.vlive.tv/notice/4749

Authored by: seproDev
This commit is contained in:
sepro 2023-02-12 05:47:03 +01:00 committed by GitHub
parent a31d0fa6c3
commit b3eaab7ca2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 255 additions and 394 deletions

View File

@ -122,6 +122,7 @@ from .applepodcasts import ApplePodcastsIE
from .archiveorg import ( from .archiveorg import (
ArchiveOrgIE, ArchiveOrgIE,
YoutubeWebArchiveIE, YoutubeWebArchiveIE,
VLiveWebArchiveIE,
) )
from .arcpublishing import ArcPublishingIE from .arcpublishing import ArcPublishingIE
from .arkena import ArkenaIE from .arkena import ArkenaIE
@ -2183,11 +2184,6 @@ from .vk import (
VKUserVideosIE, VKUserVideosIE,
VKWallPostIE, VKWallPostIE,
) )
from .vlive import (
VLiveIE,
VLivePostIE,
VLiveChannelIE,
)
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE
from .vodpl import VODPlIE from .vodpl import VODPlIE
from .vodplatform import VODPlatformIE from .vodplatform import VODPlatformIE

View File

@ -1,8 +1,10 @@
import json import json
import re import re
import urllib.error
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from .naver import NaverBaseIE
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
from ..compat import compat_HTTPError, compat_urllib_parse_unquote from ..compat import compat_HTTPError, compat_urllib_parse_unquote
from ..utils import ( from ..utils import (
@ -945,3 +947,237 @@ class YoutubeWebArchiveIE(InfoExtractor):
if not info.get('title'): if not info.get('title'):
info['title'] = video_id info['title'] = video_id
return info return info
class VLiveWebArchiveIE(InfoExtractor):
IE_NAME = 'web.archive:vlive'
IE_DESC = 'web.archive.org saved vlive videos'
_VALID_URL = r'''(?x)
(?:https?://)?web\.archive\.org/
(?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)? # /web and the version index is optional
(?:https?(?::|%3[Aa])//)?(?:
(?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+) # VLive URL
)
'''
_TESTS = [{
'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
'uploader_url': None,
'uploader': None,
'upload_date': '20150817',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1439816449,
'like_count': int,
'channel': 'Girl\'s Day',
'channel_id': 'FDF27',
'comment_count': int,
'release_timestamp': 1439818140,
'release_date': '20150817',
'duration': 1014,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
'info_dict': {
'id': '16937',
'ext': 'mp4',
'title': '첸백시 걍방',
'creator': 'EXO',
'view_count': int,
'subtitles': 'mincount:12',
'uploader_id': 'muploader_j',
'uploader_url': 'http://vlive.tv',
'uploader': None,
'upload_date': '20161112',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1478923074,
'like_count': int,
'channel': 'EXO',
'channel_id': 'F94BD',
'comment_count': int,
'release_timestamp': 1478924280,
'release_date': '20161112',
'duration': 906,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
'info_dict': {
'id': '101870',
'ext': 'mp4',
'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
'creator': 'Dispatch',
'view_count': int,
'subtitles': 'mincount:6',
'uploader_id': 'V__FRA08071',
'uploader_url': 'http://vlive.tv',
'uploader': None,
'upload_date': '20181130',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1543601327,
'like_count': int,
'channel': 'Dispatch',
'channel_id': 'C796F3',
'comment_count': int,
'release_timestamp': 1543601040,
'release_date': '20181130',
'duration': 279,
},
'params': {
'skip_download': True,
},
}]
# The wayback machine has special timestamp and "mode" values:
# timestamp:
# 1 = the first capture
# 2 = the last capture
# mode:
# id_ = Identity - perform no alterations of the original resource, return it as it was archived.
_WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
for retry in self.RetryManager():
try:
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
raise ExtractorError('Page was not archived', expected=True)
retry.error = e
continue
def _download_archived_json(self, url, video_id, **kwargs):
page = self._download_archived_page(url, video_id, **kwargs)
if not page:
raise ExtractorError('Page was not archived', expected=True)
else:
return self._parse_json(page, video_id)
def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
if not m3u8_doc:
return
# M3U8 document should be changed to archive domain
m3u8_doc = m3u8_doc.splitlines()
url_base = m3u8_url.rsplit('/', 1)[0]
first_segment = None
for i, line in enumerate(m3u8_doc):
if not line.startswith('#'):
m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
first_segment = first_segment or m3u8_doc[i]
# Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
fatal=False, note='Check first segment availablity')
if urlh:
formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
if subtitles:
self._report_ignoring_subs('m3u8')
return formats
# Closely follows the logic of the ArchiveTeam grab script
# See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
def _real_extract(self, url):
video_id, url_date = self._match_valid_url(url).group('id', 'date')
webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
user_country = traverse_obj(player_info, ('common', 'userCountry'))
main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
inkey = self._download_archived_json(
f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
'appId': app_id,
'platformType': 'PC',
'gcc': user_country,
'locale': 'en_US',
}, fatal=False)
vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
vod_data = self._download_archived_json(
f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
'key': inkey.get('inkey'),
'pid': 'rmcPlayer_16692457559726800', # partially unix time and partially random. Fixed value used by archiveteam project
'sid': '2024',
'ver': '2.0',
'devt': 'html5_pc',
'doct': 'json',
'ptc': 'https',
'sptc': 'https',
'cpt': 'vtt',
'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
'pv': '4.26.9',
'dr': '1920x1080',
'cpl': 'en_US',
'lc': 'en_US',
'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
'adu': '%2F',
'videoId': vod_id,
'cc': user_country,
})
formats = []
streams = traverse_obj(vod_data, ('streams', ...))
if len(streams) > 1:
self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
stream = streams[0]
max_stream = max(
stream.get('videos') or [],
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
if max_stream is not None:
params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
# For parts of the project MP4 files were archived
max_video = max(
traverse_obj(vod_data, ('videos', 'list', ...)),
key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
if max_video is not None:
video_url = self._WAYBACK_BASE_URL + max_video.get('source')
urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
fatal=False, note='Check video availablity')
if urlh:
formats.append({'url': video_url})
return {
'id': video_id,
'formats': formats,
**traverse_obj(player_info, ('postDetail', 'post', {
'title': ('officialVideo', 'title', {str}),
'creator': ('author', 'nickname', {str}),
'channel': ('channel', 'channelName', {str}),
'channel_id': ('channel', 'channelCode', {str}),
'duration': ('officialVideo', 'playTime', {int_or_none}),
'view_count': ('officialVideo', 'playCount', {int_or_none}),
'like_count': ('officialVideo', 'likeCount', {int_or_none}),
'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
})),
**traverse_obj(vod_data, ('meta', {
'uploader_id': ('user', 'id', {str}),
'uploader': ('user', 'name', {str}),
'uploader_url': ('user', 'url', {url_or_none}),
'thumbnail': ('cover', 'source', {url_or_none}),
}), expected_type=lambda x: x or None),
**NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
}

View File

@ -21,6 +21,23 @@ from ..utils import (
class NaverBaseIE(InfoExtractor): class NaverBaseIE(InfoExtractor):
_CAPTION_EXT_RE = r'\.(?:ttml|vtt)' _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
@staticmethod # NB: Used in VLiveWebArchiveIE
def process_subtitles(vod_data, process_url):
ret = {'subtitles': {}, 'automatic_captions': {}}
for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
caption_url = caption.get('source')
if not caption_url:
continue
type_ = 'automatic_captions' if caption.get('type') == 'auto' else 'subtitles'
lang = caption.get('locale') or join_nonempty('language', 'country', from_dict=caption) or 'und'
if caption.get('type') == 'fan':
lang += '_fan%d' % next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in ret[type_])
ret[type_].setdefault(lang, []).extend({
'url': sub_url,
'name': join_nonempty('label', 'fanName', from_dict=caption, delim=' - '),
} for sub_url in process_url(caption_url))
return ret
def _extract_video_info(self, video_id, vid, key): def _extract_video_info(self, video_id, vid, key):
video_data = self._download_json( video_data = self._download_json(
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid, 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
@ -79,34 +96,18 @@ class NaverBaseIE(InfoExtractor):
] ]
return [caption_url] return [caption_url]
automatic_captions = {}
subtitles = {}
for caption in get_list('caption'):
caption_url = caption.get('source')
if not caption_url:
continue
sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles
lang = caption.get('locale') or join_nonempty('language', 'country', from_dict=caption) or 'und'
if caption.get('type') == 'fan':
lang += '_fan%d' % next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in sub_dict)
sub_dict.setdefault(lang, []).extend({
'url': sub_url,
'name': join_nonempty('label', 'fanName', from_dict=caption, delim=' - '),
} for sub_url in get_subs(caption_url))
user = meta.get('user', {}) user = meta.get('user', {})
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'subtitles': subtitles,
'automatic_captions': automatic_captions,
'thumbnail': try_get(meta, lambda x: x['cover']['source']), 'thumbnail': try_get(meta, lambda x: x['cover']['source']),
'view_count': int_or_none(meta.get('count')), 'view_count': int_or_none(meta.get('count')),
'uploader_id': user.get('id'), 'uploader_id': user.get('id'),
'uploader': user.get('name'), 'uploader': user.get('name'),
'uploader_url': user.get('url'), 'uploader_url': user.get('url'),
**self.process_subtitles(video_data, get_subs),
} }

View File

@ -1,372 +0,0 @@
import itertools
import json
from .naver import NaverBaseIE
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import (
ExtractorError,
int_or_none,
LazyList,
merge_dicts,
str_or_none,
strip_or_none,
traverse_obj,
try_get,
urlencode_postdata,
url_or_none,
)
class VLiveBaseIE(NaverBaseIE):
_NETRC_MACHINE = 'vlive'
_logged_in = False
def _perform_login(self, username, password):
if self._logged_in:
return
LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
self._request_webpage(
LOGIN_URL, None, note='Downloading login cookies')
self._download_webpage(
LOGIN_URL, None, note='Logging in',
data=urlencode_postdata({'email': username, 'pwd': password}),
headers={
'Referer': LOGIN_URL,
'Content-Type': 'application/x-www-form-urlencoded'
})
login_info = self._download_json(
'https://www.vlive.tv/auth/loginInfo', None,
note='Checking login status',
headers={'Referer': 'https://www.vlive.tv/home'})
if not try_get(login_info, lambda x: x['message']['login'], bool):
raise ExtractorError('Unable to log in', expected=True)
VLiveBaseIE._logged_in = True
def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None):
if note is None:
note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0]
query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'}
if fields:
query['fields'] = fields
if query_add:
query.update(query_add)
try:
return self._download_json(
'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
note, headers={'Referer': 'https://www.vlive.tv/'}, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
raise
class VLiveIE(VLiveBaseIE):
IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
'upload_date': '20150817',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1439816449,
'like_count': int,
'channel': 'Girl\'s Day',
'channel_id': 'FDF27',
'comment_count': int,
'release_timestamp': 1439818140,
'release_date': '20150817',
'duration': 1014,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.vlive.tv/video/16937',
'info_dict': {
'id': '16937',
'ext': 'mp4',
'title': '첸백시 걍방',
'creator': 'EXO',
'view_count': int,
'subtitles': 'mincount:12',
'uploader_id': 'muploader_j',
'upload_date': '20161112',
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
'timestamp': 1478923074,
'like_count': int,
'channel': 'EXO',
'channel_id': 'F94BD',
'comment_count': int,
'release_timestamp': 1478924280,
'release_date': '20161112',
'duration': 906,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.vlive.tv/video/129100',
'md5': 'ca2569453b79d66e5b919e5d308bff6b',
'info_dict': {
'id': '129100',
'ext': 'mp4',
'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
'creator': 'BTS+',
'view_count': int,
'subtitles': 'mincount:10',
},
'skip': 'This video is only available for CH+ subscribers',
}, {
'url': 'https://www.vlive.tv/embed/1326',
'only_matching': True,
}, {
# works only with gcc=KR
'url': 'https://www.vlive.tv/video/225019',
'only_matching': True,
}, {
'url': 'https://www.vlive.tv/video/223906',
'info_dict': {
'id': '58',
'title': 'RUN BTS!'
},
'playlist_mincount': 120
}]
def _real_extract(self, url):
video_id = self._match_id(url)
post = self._call_api(
'post/v1.0/officialVideoPost-%s', video_id,
'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId},playlist{playlistSeq,totalCount,name}')
playlist_id = str_or_none(try_get(post, lambda x: x['playlist']['playlistSeq']))
if not self._yes_playlist(playlist_id, video_id):
video = post['officialVideo']
return self._get_vlive_info(post, video, video_id)
playlist_name = str_or_none(try_get(post, lambda x: x['playlist']['name']))
playlist_count = str_or_none(try_get(post, lambda x: x['playlist']['totalCount']))
playlist = self._call_api(
'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count})
entries = []
for video_data in playlist['data']:
video = video_data.get('officialVideo')
video_id = str_or_none(video.get('videoSeq'))
entries.append(self._get_vlive_info(video_data, video, video_id))
return self.playlist_result(entries, playlist_id, playlist_name)
def _get_vlive_info(self, post, video, video_id):
def get_common_fields():
channel = post.get('channel') or {}
return {
'title': video.get('title'),
'creator': post.get('author', {}).get('nickname'),
'channel': channel.get('channelName'),
'channel_id': channel.get('channelCode'),
'duration': int_or_none(video.get('playTime')),
'view_count': int_or_none(video.get('playCount')),
'like_count': int_or_none(video.get('likeCount')),
'comment_count': int_or_none(video.get('commentCount')),
'timestamp': int_or_none(video.get('createdAt'), scale=1000),
'release_timestamp': int_or_none(traverse_obj(video, 'onAirStartAt', 'willStartAt'), scale=1000),
'thumbnail': video.get('thumb'),
}
video_type = video.get('type')
if video_type == 'VOD':
inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
vod_id = video['vodId']
info_dict = merge_dicts(
get_common_fields(),
self._extract_video_info(video_id, vod_id, inkey))
thumbnail = video.get('thumb')
if thumbnail:
if not info_dict.get('thumbnails') and info_dict.get('thumbnail'):
info_dict['thumbnails'] = [{'url': info_dict.pop('thumbnail')}]
info_dict.setdefault('thumbnails', []).append({'url': thumbnail, 'preference': 1})
return info_dict
elif video_type == 'LIVE':
status = video.get('status')
if status == 'ON_AIR':
stream_url = self._call_api(
'old/v3/live/%s/playInfo',
video_id)['result']['adaptiveStreamUrl']
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
info = get_common_fields()
info.update({
'title': video['title'],
'id': video_id,
'formats': formats,
'is_live': True,
})
return info
elif status == 'ENDED':
raise ExtractorError(
'Uploading for replay. Please wait...', expected=True)
elif status == 'RESERVED':
raise ExtractorError('Coming soon!', expected=True)
elif video.get('exposeStatus') == 'CANCEL':
raise ExtractorError(
'We are sorry, but the live broadcast has been canceled.',
expected=True)
else:
raise ExtractorError('Unknown status ' + status)
class VLivePostIE(VLiveBaseIE):
IE_NAME = 'vlive:post'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
_TESTS = [{
# uploadType = SOS
'url': 'https://www.vlive.tv/post/1-20088044',
'info_dict': {
'id': '1-20088044',
'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
},
'playlist_count': 3,
}, {
# uploadType = V
'url': 'https://www.vlive.tv/post/1-20087926',
'info_dict': {
'id': '1-20087926',
'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
},
'playlist_count': 1,
}]
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
def _real_extract(self, url):
post_id = self._match_id(url)
post = self._call_api(
'post/v1.0/post-%s', post_id,
'attachments{video},officialVideo{videoSeq},plainBody,title')
video_seq = str_or_none(try_get(
post, lambda x: x['officialVideo']['videoSeq']))
if video_seq:
return self.url_result(
'http://www.vlive.tv/video/' + video_seq,
VLiveIE.ie_key(), video_seq)
title = post['title']
entries = []
for idx, video in enumerate(post['attachments']['video'].values()):
video_id = video.get('videoId')
if not video_id:
continue
upload_type = video.get('uploadType')
upload_info = video.get('uploadInfo') or {}
entry = None
if upload_type == 'SOS':
download = self._call_api(
self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download']
formats = []
for f_id, f_url in download.items():
formats.append({
'format_id': f_id,
'url': f_url,
'height': int_or_none(f_id[:-1]),
})
entry = {
'formats': formats,
'id': video_id,
'thumbnail': upload_info.get('imageUrl'),
}
elif upload_type == 'V':
vod_id = upload_info.get('videoId')
if not vod_id:
continue
inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey']
entry = self._extract_video_info(video_id, vod_id, inkey)
if entry:
entry['title'] = '%s_part%s' % (title, idx)
entries.append(entry)
return self.playlist_result(
entries, post_id, title, strip_or_none(post.get('plainBody')))
class VLiveChannelIE(VLiveBaseIE):
IE_NAME = 'vlive:channel'
_VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<channel_id>[0-9A-Z]+)(?:/board/(?P<posts_id>\d+))?'
_TESTS = [{
'url': 'http://channels.vlive.tv/FCD4B',
'info_dict': {
'id': 'FCD4B',
'title': 'MAMAMOO',
},
'playlist_mincount': 110
}, {
'url': 'https://www.vlive.tv/channel/FCD4B',
'only_matching': True,
}, {
'url': 'https://www.vlive.tv/channel/FCD4B/board/3546',
'info_dict': {
'id': 'FCD4B-3546',
'title': 'MAMAMOO - Star Board',
},
'playlist_mincount': 880
}]
def _entries(self, posts_id, board_name):
if board_name:
posts_path = 'post/v1.0/board-%s/posts'
query_add = {'limit': 100, 'sortType': 'LATEST'}
else:
posts_path = 'post/v1.0/channel-%s/starPosts'
query_add = {'limit': 100}
for page_num in itertools.count(1):
video_list = self._call_api(
posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add,
note=f'Downloading playlist page {page_num}')
for video in try_get(video_list, lambda x: x['data'], list) or []:
video_id = str(video.get('postId'))
video_title = str_or_none(video.get('title'))
video_url = url_or_none(video.get('url'))
if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO':
continue
channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str)
yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name)
after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str)
if not after:
break
query_add['after'] = after
def _real_extract(self, url):
channel_id, posts_id = self._match_valid_url(url).groups()
board_name = None
if posts_id:
board = self._call_api(
'board/v1.0/board-%s', posts_id, 'title,boardType')
board_name = board.get('title') or 'Unknown'
if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'):
raise ExtractorError(f'Board {board_name!r} is not supported', expected=True)
entries = LazyList(self._entries(posts_id or channel_id, board_name))
channel_name = entries[0]['channel']
return self.playlist_result(
entries,
f'{channel_id}-{posts_id}' if posts_id else channel_id,
f'{channel_name} - {board_name}' if channel_name and board_name else channel_name)