streamrip/streamrip/client/downloadable.py

398 lines
13 KiB
Python
Raw Normal View History

2023-10-05 20:13:52 +02:00
import asyncio
2023-12-21 07:21:22 +01:00
import base64
2023-10-05 20:13:52 +02:00
import functools
import hashlib
import itertools
import json
2023-12-02 04:34:38 +01:00
import logging
2023-09-22 04:19:30 +02:00
import os
2023-10-05 20:13:52 +02:00
import re
2023-09-22 04:19:30 +02:00
import shutil
2023-10-05 20:13:52 +02:00
import tempfile
2023-09-22 04:19:30 +02:00
import time
from abc import ABC, abstractmethod
2023-12-02 04:34:38 +01:00
from dataclasses import dataclass
2023-10-22 04:18:06 +02:00
from typing import Any, Callable, Optional
2023-09-22 04:19:30 +02:00
import aiofiles
import aiohttp
2023-10-05 20:13:52 +02:00
import m3u8
import requests
2023-12-21 07:21:22 +01:00
from Cryptodome.Cipher import AES, Blowfish
from Cryptodome.Util import Counter
2023-10-05 20:13:52 +02:00
2023-11-25 03:50:06 +01:00
from .. import converter
2023-12-22 05:48:02 +01:00
from ..exceptions import NonStreamableError
2023-09-22 04:19:30 +02:00
2023-12-02 04:34:38 +01:00
logger = logging.getLogger("streamrip")
2023-09-22 04:19:30 +02:00
2023-12-21 21:48:19 +01:00
BLOWFISH_SECRET = "g4el58wc0zvf9na1"
2023-09-22 04:19:30 +02:00
def generate_temp_path(url: str):
2023-10-05 20:13:52 +02:00
return os.path.join(
2023-12-21 07:21:22 +01:00
tempfile.gettempdir(),
f"__streamrip_{hash(url)}_{time.time()}.download",
2023-10-05 20:13:52 +02:00
)
2023-09-22 04:19:30 +02:00
2023-12-02 04:34:38 +01:00
@dataclass(slots=True)
2023-09-22 04:19:30 +02:00
class Downloadable(ABC):
session: aiohttp.ClientSession
url: str
2023-10-05 20:13:52 +02:00
extension: str
chunk_size = 2**17
2023-09-22 04:19:30 +02:00
_size: Optional[int] = None
2023-10-22 04:18:06 +02:00
async def download(self, path: str, callback: Callable[[int], Any]):
2023-12-21 01:55:34 +01:00
await self._download(path, callback)
2023-09-22 04:19:30 +02:00
async def size(self) -> int:
2023-12-24 20:43:03 +01:00
if hasattr(self, "_size") and self._size is not None:
2023-09-22 04:19:30 +02:00
return self._size
2023-09-22 04:19:30 +02:00
async with self.session.head(self.url) as response:
response.raise_for_status()
2023-10-05 20:13:52 +02:00
content_length = response.headers.get("Content-Length", 0)
2023-09-22 04:19:30 +02:00
self._size = int(content_length)
return self._size
@abstractmethod
2023-10-05 20:13:52 +02:00
async def _download(self, path: str, callback: Callable[[int], None]):
2023-12-21 01:55:34 +01:00
raise NotImplementedError
2023-09-22 04:19:30 +02:00
class BasicDownloadable(Downloadable):
"""Just downloads a URL."""
2023-10-22 04:18:06 +02:00
def __init__(self, session: aiohttp.ClientSession, url: str, extension: str):
2023-09-22 04:19:30 +02:00
self.session = session
self.url = url
2023-10-22 04:18:06 +02:00
self.extension = extension
2023-12-02 04:34:38 +01:00
self._size = None
2023-09-22 04:19:30 +02:00
async def _download(self, path: str, callback):
# Attempt to fix async performance issues by manually and infrequently
# yielding to event loop selector
counter = 0
yield_every = 16
with open(path, "wb") as file:
with requests.get(self.url, allow_redirects=True, stream=True) as resp:
for chunk in resp.iter_content(chunk_size=self.chunk_size):
file.write(chunk)
2023-12-02 04:34:38 +01:00
callback(len(chunk))
if counter % yield_every == 0:
await asyncio.sleep(0)
counter += 1
2023-09-22 04:19:30 +02:00
class DeezerDownloadable(Downloadable):
2023-10-05 20:13:52 +02:00
is_encrypted = re.compile("/m(?:obile|edia)/")
# chunk_size = 2048 * 3
2023-09-22 04:19:30 +02:00
2023-10-05 20:13:52 +02:00
def __init__(self, session: aiohttp.ClientSession, info: dict):
2023-12-02 04:34:38 +01:00
logger.debug("Deezer info for downloadable: %s", info)
2023-10-05 20:13:52 +02:00
self.session = session
self.url = info["url"]
2023-12-24 19:52:27 +01:00
max_quality_available = max(
i for i, size in enumerate(info["quality_to_size"]) if size > 0
)
self.quality = min(info["quality"], max_quality_available)
self._size = info["quality_to_size"][self.quality]
2023-10-05 20:13:52 +02:00
if self.quality <= 1:
self.extension = "mp3"
else:
self.extension = "flac"
2023-12-21 01:55:34 +01:00
self.id = str(info["id"])
2023-10-05 20:13:52 +02:00
async def _download(self, path: str, callback):
2023-12-02 04:34:38 +01:00
# with requests.Session().get(self.url, allow_redirects=True) as resp:
async with self.session.get(self.url, allow_redirects=True) as resp:
2023-10-05 20:13:52 +02:00
resp.raise_for_status()
self._size = int(resp.headers.get("Content-Length", 0))
if self._size < 20000 and not self.url.endswith(".jpg"):
try:
info = await resp.json()
try:
# Usually happens with deezloader downloads
2023-12-22 05:48:02 +01:00
raise NonStreamableError(f"{info['error']} - {info['message']}")
2023-10-05 20:13:52 +02:00
except KeyError:
2023-12-22 05:48:02 +01:00
raise NonStreamableError(info)
2023-10-05 20:13:52 +02:00
except json.JSONDecodeError:
2023-12-22 05:48:02 +01:00
raise NonStreamableError("File not found.")
2023-10-05 20:13:52 +02:00
2023-12-02 04:34:38 +01:00
if self.is_encrypted.search(self.url) is None:
logger.debug(f"Deezer file at {self.url} not encrypted.")
async with aiofiles.open(path, "wb") as file:
2023-10-05 20:13:52 +02:00
async for chunk in resp.content.iter_chunked(self.chunk_size):
await file.write(chunk)
# typically a bar.update()
2023-12-02 04:34:38 +01:00
callback(len(chunk))
else:
blowfish_key = self._generate_blowfish_key(self.id)
logger.debug(
2023-12-21 01:55:34 +01:00
"Deezer file (id %s) at %s is encrypted. Decrypting with %s",
2023-12-02 04:34:38 +01:00
self.id,
self.url,
blowfish_key,
)
buf = bytearray()
async for data, _ in resp.content.iter_chunks():
buf += data
callback(len(data))
async with aiofiles.open(path, "wb") as audio:
buflen = len(buf)
for i in range(0, buflen, self.chunk_size):
data = buf[i : min(i + self.chunk_size, buflen)]
if len(data) >= 2048:
decrypted_chunk = (
self._decrypt_chunk(blowfish_key, data[:2048])
+ data[2048:]
)
else:
decrypted_chunk = data
await audio.write(decrypted_chunk)
2023-10-05 20:13:52 +02:00
@staticmethod
def _decrypt_chunk(key, data):
"""Decrypt a chunk of a Deezer stream.
:param key:
:param data:
"""
return Blowfish.new(
key,
Blowfish.MODE_CBC,
b"\x00\x01\x02\x03\x04\x05\x06\x07",
).decrypt(data)
@staticmethod
def _generate_blowfish_key(track_id: str) -> bytes:
"""Generate the blowfish key for Deezer downloads.
:param track_id:
:type track_id: str
"""
md5_hash = hashlib.md5(track_id.encode()).hexdigest()
# good luck :)
return "".join(
chr(functools.reduce(lambda x, y: x ^ y, map(ord, t)))
2023-12-21 21:48:19 +01:00
for t in zip(md5_hash[:16], md5_hash[16:], BLOWFISH_SECRET)
2023-10-05 20:13:52 +02:00
).encode()
2023-09-22 04:19:30 +02:00
class TidalDownloadable(Downloadable):
2023-10-05 20:13:52 +02:00
"""A wrapper around BasicDownloadable that includes Tidal-specific
2023-12-21 01:55:34 +01:00
error messages.
"""
2023-09-22 04:19:30 +02:00
2023-12-21 21:48:19 +01:00
def __init__(
self,
session: aiohttp.ClientSession,
url: str | None,
codec: str,
encryption_key: str | None,
restrictions,
):
2023-10-05 20:13:52 +02:00
self.session = session
2023-12-21 21:48:19 +01:00
codec = codec.lower()
if codec in ("flac", "mqa"):
2023-12-21 21:48:19 +01:00
self.extension = "flac"
else:
self.extension = "m4a"
if url is None:
# Turn CamelCase code into a readable sentence
if restrictions:
words = re.findall(r"([A-Z][a-z]+)", restrictions[0]["code"])
2023-12-22 05:48:02 +01:00
raise NonStreamableError(
2023-12-21 21:48:19 +01:00
words[0] + " " + " ".join(map(str.lower, words[1:])),
)
2023-12-22 05:48:02 +01:00
raise NonStreamableError(
2023-12-21 21:48:19 +01:00
f"Tidal download: dl_info = {url, codec, encryption_key}"
)
2023-12-21 07:21:22 +01:00
self.url = url
2023-12-21 21:48:19 +01:00
self.enc_key = encryption_key
self.downloadable = BasicDownloadable(session, url, self.extension)
2023-10-05 20:13:52 +02:00
async def _download(self, path: str, callback):
await self.downloadable._download(path, callback)
2023-12-21 21:48:19 +01:00
if self.enc_key is not None:
dec_bytes = await self._decrypt_mqa_file(path, self.enc_key)
async with aiofiles.open(path, "wb") as audio:
await audio.write(dec_bytes)
@property
def _size(self):
return self.downloadable._size
@_size.setter
def _size(self, v):
self.downloadable._size = v
2023-09-22 04:19:30 +02:00
2023-12-21 07:21:22 +01:00
@staticmethod
2023-12-21 21:48:19 +01:00
async def _decrypt_mqa_file(in_path, encryption_key):
2023-12-21 07:21:22 +01:00
"""Decrypt an MQA file.
:param in_path:
:param out_path:
:param encryption_key:
"""
# Do not change this
master_key = "UIlTTEMmmLfGowo/UC60x2H45W6MdGgTRfo/umg4754="
# Decode the base64 strings to ascii strings
master_key = base64.b64decode(master_key)
security_token = base64.b64decode(encryption_key)
# Get the IV from the first 16 bytes of the securityToken
iv = security_token[:16]
encrypted_st = security_token[16:]
# Initialize decryptor
decryptor = AES.new(master_key, AES.MODE_CBC, iv)
# Decrypt the security token
decrypted_st = decryptor.decrypt(encrypted_st)
# Get the audio stream decryption key and nonce from the decrypted security token
key = decrypted_st[:16]
nonce = decrypted_st[16:24]
counter = Counter.new(64, prefix=nonce, initial_value=0)
decryptor = AES.new(key, AES.MODE_CTR, counter=counter)
2023-12-21 21:48:19 +01:00
async with aiofiles.open(in_path, "rb") as enc_file:
2023-12-21 07:21:22 +01:00
dec_bytes = decryptor.decrypt(await enc_file.read())
2023-12-21 21:48:19 +01:00
return dec_bytes
2023-12-21 07:21:22 +01:00
2023-09-22 04:19:30 +02:00
class SoundcloudDownloadable(Downloadable):
2023-10-05 20:13:52 +02:00
def __init__(self, session, info: dict):
self.session = session
self.file_type = info["type"]
if self.file_type == "mp3":
self.extension = "mp3"
elif self.file_type == "original":
self.extension = "flac"
else:
raise Exception(f"Invalid file type: {self.file_type}")
self.url = info["url"]
2023-09-22 04:19:30 +02:00
2023-10-05 20:13:52 +02:00
async def _download(self, path, callback):
if self.file_type == "mp3":
await self._download_mp3(path, callback)
else:
await self._download_original(path, callback)
async def _download_original(self, path: str, callback):
2023-10-22 04:18:06 +02:00
downloader = BasicDownloadable(self.session, self.url, "flac")
2023-10-05 20:13:52 +02:00
await downloader.download(path, callback)
2023-12-24 20:43:03 +01:00
self.size = downloader.size
2023-10-05 20:13:52 +02:00
engine = converter.FLAC(path)
2023-11-18 05:40:46 +01:00
await engine.convert(path)
2023-10-05 20:13:52 +02:00
async def _download_mp3(self, path: str, callback):
2023-12-21 01:55:34 +01:00
# TODO: make progress bar reflect bytes
2023-10-05 20:13:52 +02:00
async with self.session.get(self.url) as resp:
content = await resp.text("utf-8")
parsed_m3u = m3u8.loads(content)
self._size = len(parsed_m3u.segments)
tasks = [
asyncio.create_task(self._download_segment(segment.uri))
for segment in parsed_m3u.segments
]
segment_paths = []
for coro in asyncio.as_completed(tasks):
segment_paths.append(await coro)
callback(1)
2023-12-02 04:34:38 +01:00
await concat_audio_files(segment_paths, path, "mp3")
2023-10-05 20:13:52 +02:00
async def _download_segment(self, segment_uri: str) -> str:
tmp = generate_temp_path(segment_uri)
async with self.session.get(segment_uri) as resp:
resp.raise_for_status()
async with aiofiles.open(tmp, "wb") as file:
content = await resp.content.read()
await file.write(content)
return tmp
2023-11-18 05:40:46 +01:00
async def size(self) -> int:
if self.file_type == "mp3":
async with self.session.get(self.url) as resp:
content = await resp.text("utf-8")
2023-11-18 05:40:46 +01:00
parsed_m3u = m3u8.loads(content)
self._size = len(parsed_m3u.segments)
2023-11-18 05:40:46 +01:00
return await super().size()
2023-10-05 20:13:52 +02:00
2023-12-02 04:34:38 +01:00
async def concat_audio_files(paths: list[str], out: str, ext: str, max_files_open=128):
2023-10-05 20:13:52 +02:00
"""Concatenate audio files using FFmpeg. Batched by max files open.
Recurses log_{max_file_open}(len(paths)) times.
"""
if shutil.which("ffmpeg") is None:
raise Exception("FFmpeg must be installed.")
# Base case
if len(paths) == 1:
shutil.move(paths[0], out)
return
it = iter(paths)
num_batches = len(paths) // max_files_open + (
1 if len(paths) % max_files_open != 0 else 0
)
tempdir = tempfile.gettempdir()
outpaths = [
os.path.join(
2023-12-21 07:21:22 +01:00
tempdir,
f"__streamrip_ffmpeg_{hash(paths[i*max_files_open])}.{ext}",
2023-10-05 20:13:52 +02:00
)
for i in range(num_batches)
]
for p in outpaths:
try:
os.remove(p) # in case of failure
except FileNotFoundError:
pass
2023-12-02 04:34:38 +01:00
proc_futures = []
2023-10-05 20:13:52 +02:00
for i in range(num_batches):
2023-12-02 04:34:38 +01:00
command = (
"ffmpeg",
"-i",
f"concat:{'|'.join(itertools.islice(it, max_files_open))}",
"-acodec",
"copy",
"-loglevel",
"warning",
outpaths[i],
2023-10-05 20:13:52 +02:00
)
2023-12-02 04:34:38 +01:00
fut = asyncio.create_subprocess_exec(*command, stderr=asyncio.subprocess.PIPE)
proc_futures.append(fut)
# Create all processes concurrently
processes = await asyncio.gather(*proc_futures)
# wait for all of them to finish
await asyncio.gather(*[p.communicate() for p in processes])
for proc in processes:
2023-10-05 20:13:52 +02:00
if proc.returncode != 0:
2023-11-18 05:40:46 +01:00
raise Exception(
2023-12-21 01:55:34 +01:00
f"FFMPEG returned with status code {proc.returncode} error: {proc.stderr} output: {proc.stdout}",
2023-11-18 05:40:46 +01:00
)
2023-10-05 20:13:52 +02:00
# Recurse on remaining batches
2023-12-02 04:34:38 +01:00
await concat_audio_files(outpaths, out, ext)