[extractor/SportDeutschland] Fix extractor (#6041)

Authored by: FriedrichRehren
Closes #3005
This commit is contained in:
Friedrich Rehren 2023-02-17 08:44:26 +01:00 committed by GitHub
parent 31c279a2a2
commit 5e1a54f63e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 86 additions and 71 deletions

View File

@ -1,95 +1,110 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, format_field,
float_or_none, traverse_obj,
int_or_none, unified_timestamp,
parse_iso8601, strip_or_none
parse_qs,
strip_or_none,
try_get,
) )
class SportDeutschlandIE(InfoExtractor): class SportDeutschlandIE(InfoExtractor):
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)' _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0', 'url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga',
'info_dict': { 'info_dict': {
'id': '5318cac0275701382770543d7edaf0a0', 'id': '983758e9-5829-454d-a3cf-eb27bccc3c94',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1', 'title': 'Buchholzer Formationswochenende 2023 - Samstag - 1. Bundesliga / Landesliga',
'duration': 16106.36, 'description': 'md5:a288c794a5ee69e200d8f12982f81a87',
}, 'live_status': 'was_live',
'params': { 'channel': 'Blau-Weiss Buchholz Tanzsport',
'noplaylist': True, 'channel_url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport',
# m3u8 download 'channel_id': '93ec33c9-48be-43b6-b404-e016b64fdfa3',
'skip_download': True, 'display_id': '9839a5c7-0dbb-48a8-ab63-3b408adc7b54',
}, 'duration': 32447,
'upload_date': '20230114',
'timestamp': 1673730018.0,
}
}, { }, {
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0', 'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0',
'info_dict': { 'info_dict': {
'id': 'c6e2fdd01f63013854c47054d2ab776f', 'id': '95b97d9a-04f6-4880-9039-182985c33943',
'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals', 'ext': 'mp4',
'description': 'md5:5263ff4c31c04bb780c9f91130b48530', 'title': 'BWF Tour: 1. Runde Feld 1 - YONEX GAINWARD German Open 2022',
'duration': 31397, 'description': 'md5:2afb5996ceb9ac0b2ac81f563d3a883e',
}, 'live_status': 'was_live',
'playlist_count': 2, 'channel': 'Deutscher Badminton Verband',
}, { 'channel_url': 'https://sportdeutschland.tv/deutscherbadmintonverband',
'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich', 'channel_id': '93ca5866-2551-49fc-8424-6db35af58920',
'only_matching': True, 'display_id': '95c80c52-6b9a-4ae9-9197-984145adfced',
'duration': 41097,
'upload_date': '20220309',
'timestamp': 1646860727.0,
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
data = self._download_json( meta = self._download_json(
'https://backend.sportdeutschland.tv/api/permalinks/' + display_id, 'https://api.sportdeutschland.tv/api/stateless/frontend/assets/' + display_id,
display_id, query={'access_token': 'true'}) display_id, query={'access_token': 'true'})
asset = data['asset']
title = (asset.get('title') or asset['label']).strip() asset_id = traverse_obj(meta, 'id', 'uuid')
asset_id = asset.get('id') or asset.get('uuid')
info = { info = {
'id': asset_id, 'id': asset_id,
'title': title, 'channel_url': format_field(meta, ('profile', 'slug'), 'https://sportdeutschland.tv/%s'),
'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'), **traverse_obj(meta, {
'duration': int_or_none(asset.get('seconds')), 'title': (('title', 'name'), {strip_or_none}),
'description': 'description',
'channel': ('profile', 'name'),
'channel_id': ('profile', 'id'),
'is_live': 'currently_live',
'was_live': 'was_live'
}, get_all=False)
} }
videos = asset.get('videos') or []
if len(videos) > 1:
playlist_id = parse_qs(url).get('playlistId', [None])[0]
if not self._yes_playlist(playlist_id, asset_id):
videos = [videos[int(playlist_id)]]
def entries(): videos = meta.get('videos') or []
for i, video in enumerate(videos, 1):
video_id = video.get('uuid') if len(videos) > 1:
video_url = video.get('url')
if not (video_id and video_url):
continue
formats = self._extract_m3u8_formats(
video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
if not formats and not self.get_param('ignore_no_formats'):
continue
yield {
'id': video_id,
'formats': formats,
'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
'duration': float_or_none(video.get('duration')),
}
info.update({ info.update({
'_type': 'multi_video', '_type': 'multi_video',
'entries': entries(), 'entries': self.processVideoOrStream(asset_id, video)
}) } for video in enumerate(videos) if video.get('formats'))
else:
formats = self._extract_m3u8_formats( elif len(videos) == 1:
videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4') info.update(
section_title = strip_or_none(try_get(data, lambda x: x['section']['title'])) self.processVideoOrStream(asset_id, videos[0])
info.update({ )
'formats': formats,
'display_id': asset.get('permalink'), livestream = meta.get('livestream')
'thumbnail': try_get(asset, lambda x: x['images'][0]),
'categories': [section_title] if section_title else None, if livestream is not None:
'view_count': int_or_none(asset.get('views')), info.update(
'is_live': asset.get('is_live') is True, self.processVideoOrStream(asset_id, livestream)
'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')), )
})
return info return info
def process_video_or_stream(self, asset_id, video):
video_id = video['id']
video_src = video['src']
video_type = video['type']
token = self._download_json(
f'https://api.sportdeutschland.tv/api/frontend/asset-token/{asset_id}',
video_id, query={'type': video_type, 'playback_id': video_src})['token']
formats = self._extract_m3u8_formats(f'https://stream.mux.com/{video_src}.m3u8?token={token}', video_id)
video_data = {
'display_id': video_id,
'formats': formats,
}
if video_type == 'mux_vod':
video_data.update({
'duration': video.get('duration'),
'timestamp': unified_timestamp(video.get('created_at'))
})
return video_data