Deezer downloads working

This commit is contained in:
Nathan Thomas 2023-12-01 19:34:38 -08:00
parent 318b7ebadc
commit 7f1c29df2d
10 changed files with 228 additions and 60 deletions

View File

@ -1,5 +1,7 @@
import binascii
import hashlib
import json
import logging
import deezer
from Cryptodome.Cipher import AES
@ -9,6 +11,8 @@ from ..exceptions import AuthenticationError, MissingCredentials, NonStreamable
from .client import Client
from .downloadable import DeezerDownloadable
logger = logging.getLogger("streamrip")
class DeezerClient(Client):
source = "deezer"
@ -21,6 +25,8 @@ class DeezerClient(Client):
self.config = config.session.deezer
async def login(self):
# Used for track downloads
self.session = await self.get_session()
arl = self.config.arl
if not arl:
raise MissingCredentials
@ -29,7 +35,8 @@ class DeezerClient(Client):
raise AuthenticationError
self.logged_in = True
async def get_metadata(self, info: dict, media_type: str) -> dict:
async def get_metadata(self, item_id: str, media_type: str) -> dict:
# TODO: open asyncio PR to deezer py and integrate
request_functions = {
"track": self.client.api.get_track,
"album": self.client.api.get_album,
@ -38,17 +45,20 @@ class DeezerClient(Client):
}
get_item = request_functions[media_type]
item = get_item(info["id"])
item = get_item(item_id)
if media_type in ("album", "playlist"):
tracks = getattr(self.client.api, f"get_{media_type}_tracks")(
info["id"], limit=-1
item_id, limit=-1
)
item["tracks"] = tracks["data"]
item["track_total"] = len(tracks["data"])
elif media_type == "artist":
albums = self.client.api.get_artist_albums(info["id"])
albums = self.client.api.get_artist_albums(item_id)
item["albums"] = albums["data"]
elif media_type == "track":
# Because they give incomplete information about the album
# we need to make another request
item["album"] = await self.get_metadata(item["album"]["id"], "album")
return item
async def search(self, media_type: str, query: str, limit: int = 200):
@ -71,20 +81,19 @@ class DeezerClient(Client):
return response
async def get_downloadable(
self, info: dict, quality: int = 2
self, item_id: str, quality: int = 2
) -> DeezerDownloadable:
item_id = info["id"]
# TODO: optimize such that all of the ids are requested at once
dl_info: dict = {"quality": quality, "id": item_id}
track_info = self.client.gw.get_track(item_id)
dl_info["fallback_id"] = track_info["FALLBACK"]["SNG_ID"]
dl_info["fallback_id"] = track_info.get("FALLBACK", {}).get("SNG_ID")
quality_map = [
(9, "MP3_128"),
(3, "MP3_320"),
(1, "FLAC"),
(9, "MP3_128"), # quality 0
(3, "MP3_320"), # quality 1
(1, "FLAC"), # quality 2
]
# available_formats = [
@ -98,6 +107,10 @@ class DeezerClient(Client):
_, format_str = quality_map[quality]
dl_info["quality_to_size"] = [
track_info[f"FILESIZE_{format}"] for _, format in quality_map
]
# dl_info["size_to_quality"] = {
# int(track_info.get(f"FILESIZE_{format}")): self._quality_id_from_filetype(
# format
@ -114,6 +127,10 @@ class DeezerClient(Client):
"Deezer HiFi is required for quality 2. Otherwise, the maximum "
"quality allowed is 1."
)
except deezer.WrongGeolocation:
raise NonStreamable(
"The requested track is not available. This may be due to your country/location."
)
if url is None:
url = self._get_encrypted_file_url(
@ -126,6 +143,7 @@ class DeezerClient(Client):
def _get_encrypted_file_url(
self, meta_id: str, track_hash: str, media_version: str
):
logger.debug("Unable to fetch URL. Trying encryption method.")
format_number = 1
url_bytes = b"\xa4".join(

View File

@ -3,13 +3,14 @@ import functools
import hashlib
import itertools
import json
import logging
import os
import re
import shutil
import subprocess
import tempfile
import time
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Callable, Optional
import aiofiles
@ -20,6 +21,8 @@ from Cryptodome.Cipher import Blowfish
from .. import converter
from ..exceptions import NonStreamable
logger = logging.getLogger("streamrip")
def generate_temp_path(url: str):
return os.path.join(
@ -27,6 +30,7 @@ def generate_temp_path(url: str):
)
@dataclass(slots=True)
class Downloadable(ABC):
session: aiohttp.ClientSession
url: str
@ -53,9 +57,6 @@ class Downloadable(ABC):
async def _download(self, path: str, callback: Callable[[int], None]):
raise NotImplemented
def __repr__(self):
return f"{self.__class__.__name__}({self.__dict__})"
class BasicDownloadable(Downloadable):
"""Just downloads a URL."""
@ -64,6 +65,7 @@ class BasicDownloadable(Downloadable):
self.session = session
self.url = url
self.extension = extension
self._size = None
async def _download(self, path: str, callback: Callable[[int], None]):
async with self.session.get(self.url, allow_redirects=True) as response:
@ -72,7 +74,7 @@ class BasicDownloadable(Downloadable):
async for chunk in response.content.iter_chunked(self.chunk_size):
await file.write(chunk)
# typically a bar.update()
callback(self.chunk_size)
callback(len(chunk))
class DeezerDownloadable(Downloadable):
@ -80,10 +82,12 @@ class DeezerDownloadable(Downloadable):
chunk_size = 2048 * 3
def __init__(self, session: aiohttp.ClientSession, info: dict):
logger.debug("Deezer info for downloadable: %s", info)
self.session = session
self.url = info["url"]
self.fallback_id = info["fallback_id"]
self.quality = info["quality"]
self._size = int(info["quality_to_size"][self.quality])
if self.quality <= 1:
self.extension = "mp3"
else:
@ -91,9 +95,8 @@ class DeezerDownloadable(Downloadable):
self.id = info["id"]
async def _download(self, path: str, callback):
async with self.session.get(
self.url, allow_redirects=True, stream=True
) as resp:
# with requests.Session().get(self.url, allow_redirects=True) as resp:
async with self.session.get(self.url, allow_redirects=True) as resp:
resp.raise_for_status()
self._size = int(resp.headers.get("Content-Length", 0))
if self._size < 20000 and not self.url.endswith(".jpg"):
@ -108,24 +111,45 @@ class DeezerDownloadable(Downloadable):
except json.JSONDecodeError:
raise NonStreamable("File not found.")
async with aiofiles.open(path, "wb") as file:
if self.is_encrypted.search(self.url) is None:
if self.is_encrypted.search(self.url) is None:
logger.debug(f"Deezer file at {self.url} not encrypted.")
async with aiofiles.open(path, "wb") as file:
async for chunk in resp.content.iter_chunked(self.chunk_size):
await file.write(chunk)
# typically a bar.update()
callback(self.chunk_size)
else:
blowfish_key = self._generate_blowfish_key(self.id)
async for chunk in resp.content.iter_chunked(self.chunk_size):
if len(chunk) >= 2048:
decrypted_chunk = (
self._decrypt_chunk(blowfish_key, chunk[:2048])
+ chunk[2048:]
)
else:
decrypted_chunk = chunk
await file.write(decrypted_chunk)
callback(self.chunk_size)
callback(len(chunk))
else:
blowfish_key = self._generate_blowfish_key(self.id)
logger.debug(
f"Deezer file (id %s) at %s is encrypted. Decrypting with %s",
self.id,
self.url,
blowfish_key,
)
assert self.chunk_size == 2048 * 3
# Write data from server to tempfile because there's no
# efficient way to guarantee a fixed chunk size for all iterations
# in async
async with aiofiles.tempfile.TemporaryFile("wb+") as tmp:
async for chunk in resp.content.iter_chunks():
data, _ = chunk
await tmp.write(data)
callback(len(data))
await tmp.seek(0)
async with aiofiles.open(path, "wb") as audio:
while chunk := await tmp.read(self.chunk_size):
if len(chunk) >= 2048:
decrypted_chunk = (
self._decrypt_chunk(blowfish_key, chunk[:2048])
+ chunk[2048:]
)
else:
decrypted_chunk = chunk
await audio.write(decrypted_chunk)
@staticmethod
def _decrypt_chunk(key, data):
@ -168,7 +192,7 @@ class TidalDownloadable(Downloadable):
# Turn CamelCase code into a readable sentence
words = re.findall(r"([A-Z][a-z]+)", restrictions[0]["code"])
raise NonStreamable(
words[0] + " " + " ".join(map(str.lower, words[1:])) + "."
words[0] + " " + " ".join(map(str.lower, words[1:]))
)
raise NonStreamable(f"Tidal download: dl_info = {info}")
@ -220,7 +244,7 @@ class SoundcloudDownloadable(Downloadable):
segment_paths.append(await coro)
callback(1)
concat_audio_files(segment_paths, path, "mp3")
await concat_audio_files(segment_paths, path, "mp3")
async def _download_segment(self, segment_uri: str) -> str:
tmp = generate_temp_path(segment_uri)
@ -241,7 +265,7 @@ class SoundcloudDownloadable(Downloadable):
return await super().size()
def concat_audio_files(paths: list[str], out: str, ext: str, max_files_open=128):
async def concat_audio_files(paths: list[str], out: str, ext: str, max_files_open=128):
"""Concatenate audio files using FFmpeg. Batched by max files open.
Recurses log_{max_file_open}(len(paths)) times.
@ -273,24 +297,31 @@ def concat_audio_files(paths: list[str], out: str, ext: str, max_files_open=128)
except FileNotFoundError:
pass
proc_futures = []
for i in range(num_batches):
proc = subprocess.run(
(
"ffmpeg",
"-i",
f"concat:{'|'.join(itertools.islice(it, max_files_open))}",
"-acodec",
"copy",
"-loglevel",
"warning",
outpaths[i],
),
capture_output=True,
command = (
"ffmpeg",
"-i",
f"concat:{'|'.join(itertools.islice(it, max_files_open))}",
"-acodec",
"copy",
"-loglevel",
"warning",
outpaths[i],
)
fut = asyncio.create_subprocess_exec(*command, stderr=asyncio.subprocess.PIPE)
proc_futures.append(fut)
# Create all processes concurrently
processes = await asyncio.gather(*proc_futures)
# wait for all of them to finish
await asyncio.gather(*[p.communicate() for p in processes])
for proc in processes:
if proc.returncode != 0:
raise Exception(
f"FFMPEG returned with status code {proc.returncode} error: {proc.stderr} output: {proc.stdout}"
)
# Recurse on remaining batches
concat_audio_files(outpaths, out, ext)
await concat_audio_files(outpaths, out, ext)

View File

@ -12,7 +12,7 @@ concurrency = true
# If you have very fast internet, you will benefit from a higher value,
# A value that is too high for your bandwidth may cause slowdowns
# Set to -1 for no limit
max_connections = 3
max_connections = 6
# Max number of API requests to handle per minute
# Set to -1 for no limit
requests_per_minute = 60

View File

@ -130,6 +130,12 @@ class PendingSingle(Pending):
db: Database
async def resolve(self) -> Track | None:
if self.db.downloaded(self.id):
logger.info(
f"Skipping track {self.id}. Marked as downloaded in the database."
)
return None
try:
resp = await self.client.get_metadata(self.id, "track")
except NonStreamable as e:

View File

@ -1,5 +1,6 @@
from __future__ import annotations
import json
import logging
import re
from dataclasses import dataclass
@ -32,14 +33,12 @@ class AlbumInfo:
@dataclass(slots=True)
class AlbumMetadata:
info: AlbumInfo
album: str
albumartist: str
year: str
genre: list[str]
covers: Covers
tracktotal: int
disctotal: int = 1
albumcomposer: Optional[str] = None
comment: Optional[str] = None
@ -163,8 +162,65 @@ class AlbumMetadata:
)
@classmethod
def from_deezer(cls, resp) -> AlbumMetadata:
raise NotImplementedError
def from_deezer(cls, resp: dict) -> AlbumMetadata:
album = resp.get("title", "Unknown Album")
tracktotal = typed(resp.get("track_total", 0) or resp.get("nb_tracks", 0), int)
disctotal = typed(resp["tracks"][-1]["disk_number"], int)
genres = [typed(g["name"], str) for g in resp["genres"]["data"]]
date = typed(resp["release_date"], str)
year = date[:4]
_copyright = None
description = None
albumartist = typed(safe_get(resp, "artist", "name"), str)
albumcomposer = None
label = resp.get("label")
booklets = None
# url = resp.get("link")
explicit = typed(
resp.get("parental_warning", False) or resp.get("explicit_lyrics", False),
bool,
)
# not embedded
quality = 2
bit_depth = 16
sampling_rate = 44100
container = "FLAC"
cover_urls = Covers.from_deezer(resp)
# streamable = True
item_id = str(resp["id"])
info = AlbumInfo(
id=item_id,
quality=quality,
container=container,
label=label,
explicit=explicit,
sampling_rate=sampling_rate,
bit_depth=bit_depth,
booklets=booklets,
)
return AlbumMetadata(
info,
album,
albumartist,
year,
genre=genres,
covers=cover_urls,
albumcomposer=albumcomposer,
comment=None,
compilation=None,
copyright=_copyright,
date=date,
description=description,
disctotal=disctotal,
encoder=None,
grouping=None,
lyrics=None,
purchase_date=None,
tracktotal=tracktotal,
)
@classmethod
def from_soundcloud(cls, resp) -> AlbumMetadata:

View File

@ -64,6 +64,16 @@ class Covers:
c.set_cover_url("thumbnail", img["thumbnail"])
return c
@classmethod
def from_deezer(cls, resp):
c = cls()
# c.set_cover_url("original", "org".join(resp["cover_xl"].rsplit("600", 1)))
c.set_cover_url("original", resp["cover_xl"])
c.set_cover_url("large", resp["cover_big"])
c.set_cover_url("small", resp["cover_medium"])
c.set_cover_url("thumbnail", resp["cover_small"])
return c
@classmethod
def from_soundcloud(cls, resp):
c = cls()

View File

@ -3,6 +3,7 @@ from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
from ..exceptions import NonStreamable
from .album_metadata import AlbumMetadata
from .util import safe_get, typed
@ -81,7 +82,33 @@ class TrackMetadata:
@classmethod
def from_deezer(cls, album: AlbumMetadata, resp) -> TrackMetadata:
raise NotImplemented
track_id = str(resp["id"])
bit_depth = 16
sampling_rate = 44.1
explicit = typed(resp["explicit_lyrics"], bool)
work = None
title = typed(resp["title"], str)
artist = typed(resp["artist"]["name"], str)
tracknumber = typed(resp["track_position"], int)
discnumber = typed(resp["disk_number"], int)
composer = None
info = TrackInfo(
id=track_id,
quality=album.info.quality,
bit_depth=bit_depth,
explicit=explicit,
sampling_rate=sampling_rate,
work=work,
)
return cls(
info=info,
title=title,
album=album,
artist=artist,
tracknumber=tracknumber,
discnumber=discnumber,
composer=composer,
)
@classmethod
def from_soundcloud(cls, album: AlbumMetadata, resp: dict) -> TrackMetadata:

View File

@ -3,7 +3,14 @@ from typing import Callable
from rich.console import Group
from rich.live import Live
from rich.progress import Progress
from rich.progress import (
BarColumn,
DownloadColumn,
Progress,
TextColumn,
TimeRemainingColumn,
TransferSpeedColumn,
)
from rich.rule import Rule
from rich.text import Text
@ -14,6 +21,19 @@ class ProgressManager:
def __init__(self):
self.started = False
self.progress = Progress(console=console)
self.progress = Progress(
TextColumn("[cyan]{task.description}"),
BarColumn(bar_width=None),
"[progress.percentage]{task.percentage:>3.1f}%",
"",
# DownloadColumn(),
# "•",
TransferSpeedColumn(),
"",
TimeRemainingColumn(),
console=console,
)
self.task_titles = []
self.prefix = Text.assemble(("Downloading ", "bold cyan"), overflow="ellipsis")
self._text_cache = self.gen_title_text()

View File

@ -76,7 +76,7 @@ def rip(ctx, config_path, folder, no_db, quality, convert, no_progress, verbose)
logger.debug("Showing all debug logs")
else:
install(console=console, suppress=[click, asyncio], max_frames=1)
logger.setLevel(logging.WARNING)
logger.setLevel(logging.INFO)
if not os.path.isfile(config_path):
console.print(

View File

@ -3,7 +3,7 @@ import logging
import os
from .. import db
from ..client import Client, QobuzClient, SoundcloudClient
from ..client import Client, DeezerClient, QobuzClient, SoundcloudClient
from ..config import Config
from ..console import console
from ..media import Media, Pending, PendingLastfmPlaylist, remove_artwork_tempdirs
@ -34,7 +34,7 @@ class Main:
self.clients: dict[str, Client] = {
"qobuz": QobuzClient(config),
# "tidal": TidalClient(config),
# "deezer": DeezerClient(config),
"deezer": DeezerClient(config),
"soundcloud": SoundcloudClient(config),
}