streamrip/streamrip/client/downloadable.py

398 lines
13 KiB
Python

import asyncio
import base64
import functools
import hashlib
import itertools
import json
import logging
import os
import re
import shutil
import tempfile
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Callable, Optional
import aiofiles
import aiohttp
import m3u8
import requests
from Cryptodome.Cipher import AES, Blowfish
from Cryptodome.Util import Counter
from .. import converter
from ..exceptions import NonStreamableError
logger = logging.getLogger("streamrip")
BLOWFISH_SECRET = "g4el58wc0zvf9na1"
def generate_temp_path(url: str):
return os.path.join(
tempfile.gettempdir(),
f"__streamrip_{hash(url)}_{time.time()}.download",
)
@dataclass(slots=True)
class Downloadable(ABC):
session: aiohttp.ClientSession
url: str
extension: str
chunk_size = 2**17
_size: Optional[int] = None
async def download(self, path: str, callback: Callable[[int], Any]):
await self._download(path, callback)
async def size(self) -> int:
if hasattr(self, "_size") and self._size is not None:
return self._size
async with self.session.head(self.url) as response:
response.raise_for_status()
content_length = response.headers.get("Content-Length", 0)
self._size = int(content_length)
return self._size
@abstractmethod
async def _download(self, path: str, callback: Callable[[int], None]):
raise NotImplementedError
class BasicDownloadable(Downloadable):
"""Just downloads a URL."""
def __init__(self, session: aiohttp.ClientSession, url: str, extension: str):
self.session = session
self.url = url
self.extension = extension
self._size = None
async def _download(self, path: str, callback):
# Attempt to fix async performance issues by manually and infrequently
# yielding to event loop selector
counter = 0
yield_every = 16
with open(path, "wb") as file:
with requests.get(self.url, allow_redirects=True, stream=True) as resp:
for chunk in resp.iter_content(chunk_size=self.chunk_size):
file.write(chunk)
callback(len(chunk))
if counter % yield_every == 0:
await asyncio.sleep(0)
counter += 1
class DeezerDownloadable(Downloadable):
is_encrypted = re.compile("/m(?:obile|edia)/")
# chunk_size = 2048 * 3
def __init__(self, session: aiohttp.ClientSession, info: dict):
logger.debug("Deezer info for downloadable: %s", info)
self.session = session
self.url = info["url"]
max_quality_available = max(
i for i, size in enumerate(info["quality_to_size"]) if size > 0
)
self.quality = min(info["quality"], max_quality_available)
self._size = info["quality_to_size"][self.quality]
if self.quality <= 1:
self.extension = "mp3"
else:
self.extension = "flac"
self.id = str(info["id"])
async def _download(self, path: str, callback):
# with requests.Session().get(self.url, allow_redirects=True) as resp:
async with self.session.get(self.url, allow_redirects=True) as resp:
resp.raise_for_status()
self._size = int(resp.headers.get("Content-Length", 0))
if self._size < 20000 and not self.url.endswith(".jpg"):
try:
info = await resp.json()
try:
# Usually happens with deezloader downloads
raise NonStreamableError(f"{info['error']} - {info['message']}")
except KeyError:
raise NonStreamableError(info)
except json.JSONDecodeError:
raise NonStreamableError("File not found.")
if self.is_encrypted.search(self.url) is None:
logger.debug(f"Deezer file at {self.url} not encrypted.")
async with aiofiles.open(path, "wb") as file:
async for chunk in resp.content.iter_chunked(self.chunk_size):
await file.write(chunk)
# typically a bar.update()
callback(len(chunk))
else:
blowfish_key = self._generate_blowfish_key(self.id)
logger.debug(
"Deezer file (id %s) at %s is encrypted. Decrypting with %s",
self.id,
self.url,
blowfish_key,
)
buf = bytearray()
async for data, _ in resp.content.iter_chunks():
buf += data
callback(len(data))
async with aiofiles.open(path, "wb") as audio:
buflen = len(buf)
for i in range(0, buflen, self.chunk_size):
data = buf[i : min(i + self.chunk_size, buflen)]
if len(data) >= 2048:
decrypted_chunk = (
self._decrypt_chunk(blowfish_key, data[:2048])
+ data[2048:]
)
else:
decrypted_chunk = data
await audio.write(decrypted_chunk)
@staticmethod
def _decrypt_chunk(key, data):
"""Decrypt a chunk of a Deezer stream.
:param key:
:param data:
"""
return Blowfish.new(
key,
Blowfish.MODE_CBC,
b"\x00\x01\x02\x03\x04\x05\x06\x07",
).decrypt(data)
@staticmethod
def _generate_blowfish_key(track_id: str) -> bytes:
"""Generate the blowfish key for Deezer downloads.
:param track_id:
:type track_id: str
"""
md5_hash = hashlib.md5(track_id.encode()).hexdigest()
# good luck :)
return "".join(
chr(functools.reduce(lambda x, y: x ^ y, map(ord, t)))
for t in zip(md5_hash[:16], md5_hash[16:], BLOWFISH_SECRET)
).encode()
class TidalDownloadable(Downloadable):
"""A wrapper around BasicDownloadable that includes Tidal-specific
error messages.
"""
def __init__(
self,
session: aiohttp.ClientSession,
url: str | None,
codec: str,
encryption_key: str | None,
restrictions,
):
self.session = session
codec = codec.lower()
if codec in ("flac", "mqa"):
self.extension = "flac"
else:
self.extension = "m4a"
if url is None:
# Turn CamelCase code into a readable sentence
if restrictions:
words = re.findall(r"([A-Z][a-z]+)", restrictions[0]["code"])
raise NonStreamableError(
words[0] + " " + " ".join(map(str.lower, words[1:])),
)
raise NonStreamableError(
f"Tidal download: dl_info = {url, codec, encryption_key}"
)
self.url = url
self.enc_key = encryption_key
self.downloadable = BasicDownloadable(session, url, self.extension)
async def _download(self, path: str, callback):
await self.downloadable._download(path, callback)
if self.enc_key is not None:
dec_bytes = await self._decrypt_mqa_file(path, self.enc_key)
async with aiofiles.open(path, "wb") as audio:
await audio.write(dec_bytes)
@property
def _size(self):
return self.downloadable._size
@_size.setter
def _size(self, v):
self.downloadable._size = v
@staticmethod
async def _decrypt_mqa_file(in_path, encryption_key):
"""Decrypt an MQA file.
:param in_path:
:param out_path:
:param encryption_key:
"""
# Do not change this
master_key = "UIlTTEMmmLfGowo/UC60x2H45W6MdGgTRfo/umg4754="
# Decode the base64 strings to ascii strings
master_key = base64.b64decode(master_key)
security_token = base64.b64decode(encryption_key)
# Get the IV from the first 16 bytes of the securityToken
iv = security_token[:16]
encrypted_st = security_token[16:]
# Initialize decryptor
decryptor = AES.new(master_key, AES.MODE_CBC, iv)
# Decrypt the security token
decrypted_st = decryptor.decrypt(encrypted_st)
# Get the audio stream decryption key and nonce from the decrypted security token
key = decrypted_st[:16]
nonce = decrypted_st[16:24]
counter = Counter.new(64, prefix=nonce, initial_value=0)
decryptor = AES.new(key, AES.MODE_CTR, counter=counter)
async with aiofiles.open(in_path, "rb") as enc_file:
dec_bytes = decryptor.decrypt(await enc_file.read())
return dec_bytes
class SoundcloudDownloadable(Downloadable):
def __init__(self, session, info: dict):
self.session = session
self.file_type = info["type"]
if self.file_type == "mp3":
self.extension = "mp3"
elif self.file_type == "original":
self.extension = "flac"
else:
raise Exception(f"Invalid file type: {self.file_type}")
self.url = info["url"]
async def _download(self, path, callback):
if self.file_type == "mp3":
await self._download_mp3(path, callback)
else:
await self._download_original(path, callback)
async def _download_original(self, path: str, callback):
downloader = BasicDownloadable(self.session, self.url, "flac")
await downloader.download(path, callback)
self.size = downloader.size
engine = converter.FLAC(path)
await engine.convert(path)
async def _download_mp3(self, path: str, callback):
# TODO: make progress bar reflect bytes
async with self.session.get(self.url) as resp:
content = await resp.text("utf-8")
parsed_m3u = m3u8.loads(content)
self._size = len(parsed_m3u.segments)
tasks = [
asyncio.create_task(self._download_segment(segment.uri))
for segment in parsed_m3u.segments
]
segment_paths = []
for coro in asyncio.as_completed(tasks):
segment_paths.append(await coro)
callback(1)
await concat_audio_files(segment_paths, path, "mp3")
async def _download_segment(self, segment_uri: str) -> str:
tmp = generate_temp_path(segment_uri)
async with self.session.get(segment_uri) as resp:
resp.raise_for_status()
async with aiofiles.open(tmp, "wb") as file:
content = await resp.content.read()
await file.write(content)
return tmp
async def size(self) -> int:
if self.file_type == "mp3":
async with self.session.get(self.url) as resp:
content = await resp.text("utf-8")
parsed_m3u = m3u8.loads(content)
self._size = len(parsed_m3u.segments)
return await super().size()
async def concat_audio_files(paths: list[str], out: str, ext: str, max_files_open=128):
"""Concatenate audio files using FFmpeg. Batched by max files open.
Recurses log_{max_file_open}(len(paths)) times.
"""
if shutil.which("ffmpeg") is None:
raise Exception("FFmpeg must be installed.")
# Base case
if len(paths) == 1:
shutil.move(paths[0], out)
return
it = iter(paths)
num_batches = len(paths) // max_files_open + (
1 if len(paths) % max_files_open != 0 else 0
)
tempdir = tempfile.gettempdir()
outpaths = [
os.path.join(
tempdir,
f"__streamrip_ffmpeg_{hash(paths[i*max_files_open])}.{ext}",
)
for i in range(num_batches)
]
for p in outpaths:
try:
os.remove(p) # in case of failure
except FileNotFoundError:
pass
proc_futures = []
for i in range(num_batches):
command = (
"ffmpeg",
"-i",
f"concat:{'|'.join(itertools.islice(it, max_files_open))}",
"-acodec",
"copy",
"-loglevel",
"warning",
outpaths[i],
)
fut = asyncio.create_subprocess_exec(*command, stderr=asyncio.subprocess.PIPE)
proc_futures.append(fut)
# Create all processes concurrently
processes = await asyncio.gather(*proc_futures)
# wait for all of them to finish
await asyncio.gather(*[p.communicate() for p in processes])
for proc in processes:
if proc.returncode != 0:
raise Exception(
f"FFMPEG returned with status code {proc.returncode} error: {proc.stderr} output: {proc.stdout}",
)
# Recurse on remaining batches
await concat_audio_files(outpaths, out, ext)