[extractor/telecaribe] Add extractor (#6311)

Authored by: elyse0
Closes #6001
This commit is contained in:
Elyse 2023-03-04 13:41:41 -06:00 committed by GitHub
parent 1f8489cccb
commit b404712822
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 78 additions and 0 deletions

View File

@ -1854,6 +1854,7 @@ from .ted import (
from .tele5 import Tele5IE from .tele5 import Tele5IE
from .tele13 import Tele13IE from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE from .telebruxelles import TeleBruxellesIE
from .telecaribe import TelecaribePlayIE
from .telecinco import TelecincoIE from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE from .telegraaf import TelegraafIE
from .telegram import TelegramEmbedIE from .telegram import TelegramEmbedIE

View File

@ -0,0 +1,77 @@
import re
from .common import InfoExtractor
from ..utils import traverse_obj
class TelecaribePlayIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?play\.telecaribe\.co/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.play.telecaribe.co/breicok',
'info_dict': {
'id': 'breicok',
'title': 'Breicok',
},
'playlist_count': 7,
}, {
'url': 'https://www.play.telecaribe.co/si-fue-gol-de-yepes',
'info_dict': {
'id': 'si-fue-gol-de-yepes',
'title': 'Sí Fue Gol de Yepes',
},
'playlist_count': 6,
}, {
'url': 'https://www.play.telecaribe.co/ciudad-futura',
'info_dict': {
'id': 'ciudad-futura',
'title': 'Ciudad Futura',
},
'playlist_count': 10,
}, {
'url': 'https://www.play.telecaribe.co/live',
'info_dict': {
'id': 'live',
'title': r're:^Señal en vivo',
'live_status': 'is_live',
'ext': 'mp4',
},
'params': {
'skip_download': 'Livestream',
}
}]
def _download_player_webpage(self, webpage, display_id):
page_id = self._search_regex(
(r'window.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
webpage, 'page_id')
props = self._download_json(self._search_regex(
rf'<link[^>]+href\s*=\s*"([^"]+)"[^>]+id\s*=\s*"features_{page_id}"',
webpage, 'json_props_url'), display_id)['props']['render']['compProps']
return self._download_webpage(traverse_obj(props, (..., 'url'))[-1], display_id)
def _get_clean_title(self, title):
return re.sub(r'\s*\|\s*Telecaribe\s*VOD', '', title or '').strip() or None
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player = self._download_player_webpage(webpage, display_id)
if display_id != 'live':
return self.playlist_from_matches(
re.findall(r'<a[^>]+href\s*=\s*"([^"]+\.mp4)', player), display_id,
self._get_clean_title(self._og_search_title(webpage)))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
self._search_regex(r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url'),
display_id, 'mp4')
return {
'id': display_id,
'title': self._get_clean_title(self._og_search_title(webpage)),
'formats': formats,
'subtitles': subtitles,
'is_live': True,
}