Deezer downloads working

2023-12-01 19:34:38 -08:00 · 2023-12-01 19:34:38 -08:00 · 7f1c29df2d
parent 318b7ebadc
commit 7f1c29df2d
10 changed files with 228 additions and 60 deletions
--- a/streamrip/client/deezer.py
+++ b/streamrip/client/deezer.py
@ -1,5 +1,7 @@
 import binascii
 import hashlib
+import json
+import logging

 import deezer
 from Cryptodome.Cipher import AES
@ -9,6 +11,8 @@ from ..exceptions import AuthenticationError, MissingCredentials, NonStreamable
 from .client import Client
 from .downloadable import DeezerDownloadable

+logger = logging.getLogger("streamrip")
+

 class DeezerClient(Client):
    source = "deezer"
@ -21,6 +25,8 @@ class DeezerClient(Client):
        self.config = config.session.deezer

    async def login(self):
+        # Used for track downloads
+        self.session = await self.get_session()
        arl = self.config.arl
        if not arl:
            raise MissingCredentials
@ -29,7 +35,8 @@ class DeezerClient(Client):
            raise AuthenticationError
        self.logged_in = True

-    async def get_metadata(self, info: dict, media_type: str) -> dict:
+    async def get_metadata(self, item_id: str, media_type: str) -> dict:
+        # TODO: open asyncio PR to deezer py and integrate
        request_functions = {
            "track": self.client.api.get_track,
            "album": self.client.api.get_album,
@ -38,17 +45,20 @@ class DeezerClient(Client):
        }

        get_item = request_functions[media_type]
-        item = get_item(info["id"])
+        item = get_item(item_id)
        if media_type in ("album", "playlist"):
            tracks = getattr(self.client.api, f"get_{media_type}_tracks")(
-                info["id"], limit=-1
+                item_id, limit=-1
            )
            item["tracks"] = tracks["data"]
            item["track_total"] = len(tracks["data"])
        elif media_type == "artist":
-            albums = self.client.api.get_artist_albums(info["id"])
+            albums = self.client.api.get_artist_albums(item_id)
            item["albums"] = albums["data"]
-
+        elif media_type == "track":
+            # Because they give incomplete information about the album
+            # we need to make another request
+            item["album"] = await self.get_metadata(item["album"]["id"], "album")
        return item

    async def search(self, media_type: str, query: str, limit: int = 200):
@ -71,20 +81,19 @@ class DeezerClient(Client):
        return response

    async def get_downloadable(
-        self, info: dict, quality: int = 2
+        self, item_id: str, quality: int = 2
    ) -> DeezerDownloadable:
-        item_id = info["id"]
        # TODO: optimize such that all of the ids are requested at once
        dl_info: dict = {"quality": quality, "id": item_id}

        track_info = self.client.gw.get_track(item_id)

-        dl_info["fallback_id"] = track_info["FALLBACK"]["SNG_ID"]
+        dl_info["fallback_id"] = track_info.get("FALLBACK", {}).get("SNG_ID")

        quality_map = [
-            (9, "MP3_128"),
-            (3, "MP3_320"),
-            (1, "FLAC"),
+            (9, "MP3_128"),  # quality 0
+            (3, "MP3_320"),  # quality 1
+            (1, "FLAC"),  # quality 2
        ]

        # available_formats = [
@ -98,6 +107,10 @@ class DeezerClient(Client):

        _, format_str = quality_map[quality]

+        dl_info["quality_to_size"] = [
+            track_info[f"FILESIZE_{format}"] for _, format in quality_map
+        ]
+
        # dl_info["size_to_quality"] = {
        #     int(track_info.get(f"FILESIZE_{format}")): self._quality_id_from_filetype(
        #         format
@ -114,6 +127,10 @@ class DeezerClient(Client):
                "Deezer HiFi is required for quality 2. Otherwise, the maximum "
                "quality allowed is 1."
            )
+        except deezer.WrongGeolocation:
+            raise NonStreamable(
+                "The requested track is not available. This may be due to your country/location."
+            )

        if url is None:
            url = self._get_encrypted_file_url(
@ -126,6 +143,7 @@ class DeezerClient(Client):
    def _get_encrypted_file_url(
        self, meta_id: str, track_hash: str, media_version: str
    ):
+        logger.debug("Unable to fetch URL. Trying encryption method.")
        format_number = 1

        url_bytes = b"\xa4".join(
--- a/streamrip/client/downloadable.py
+++ b/streamrip/client/downloadable.py
@ -3,13 +3,14 @@ import functools
 import hashlib
 import itertools
 import json
+import logging
 import os
 import re
 import shutil
-import subprocess
 import tempfile
 import time
 from abc import ABC, abstractmethod
+from dataclasses import dataclass
 from typing import Any, Callable, Optional

 import aiofiles
@ -20,6 +21,8 @@ from Cryptodome.Cipher import Blowfish
 from .. import converter
 from ..exceptions import NonStreamable

+logger = logging.getLogger("streamrip")
+

 def generate_temp_path(url: str):
    return os.path.join(
@ -27,6 +30,7 @@ def generate_temp_path(url: str):
    )


+@dataclass(slots=True)
 class Downloadable(ABC):
    session: aiohttp.ClientSession
    url: str
@ -53,9 +57,6 @@ class Downloadable(ABC):
    async def _download(self, path: str, callback: Callable[[int], None]):
        raise NotImplemented

-    def __repr__(self):
-        return f"{self.__class__.__name__}({self.__dict__})"
-

 class BasicDownloadable(Downloadable):
    """Just downloads a URL."""
@ -64,6 +65,7 @@ class BasicDownloadable(Downloadable):
        self.session = session
        self.url = url
        self.extension = extension
+        self._size = None

    async def _download(self, path: str, callback: Callable[[int], None]):
        async with self.session.get(self.url, allow_redirects=True) as response:
@ -72,7 +74,7 @@ class BasicDownloadable(Downloadable):
                async for chunk in response.content.iter_chunked(self.chunk_size):
                    await file.write(chunk)
                    # typically a bar.update()
-                    callback(self.chunk_size)
+                    callback(len(chunk))


 class DeezerDownloadable(Downloadable):
@ -80,10 +82,12 @@ class DeezerDownloadable(Downloadable):
    chunk_size = 2048 * 3

    def __init__(self, session: aiohttp.ClientSession, info: dict):
+        logger.debug("Deezer info for downloadable: %s", info)
        self.session = session
        self.url = info["url"]
        self.fallback_id = info["fallback_id"]
        self.quality = info["quality"]
+        self._size = int(info["quality_to_size"][self.quality])
        if self.quality <= 1:
            self.extension = "mp3"
        else:
@ -91,9 +95,8 @@ class DeezerDownloadable(Downloadable):
        self.id = info["id"]

    async def _download(self, path: str, callback):
-        async with self.session.get(
-            self.url, allow_redirects=True, stream=True
-        ) as resp:
+        # with requests.Session().get(self.url, allow_redirects=True) as resp:
+        async with self.session.get(self.url, allow_redirects=True) as resp:
            resp.raise_for_status()
            self._size = int(resp.headers.get("Content-Length", 0))
            if self._size < 20000 and not self.url.endswith(".jpg"):
@ -108,24 +111,45 @@ class DeezerDownloadable(Downloadable):
                except json.JSONDecodeError:
                    raise NonStreamable("File not found.")

-            async with aiofiles.open(path, "wb") as file:
-                if self.is_encrypted.search(self.url) is None:
+            if self.is_encrypted.search(self.url) is None:
+                logger.debug(f"Deezer file at {self.url} not encrypted.")
+                async with aiofiles.open(path, "wb") as file:
                    async for chunk in resp.content.iter_chunked(self.chunk_size):
                        await file.write(chunk)
                        # typically a bar.update()
-                        callback(self.chunk_size)
-                else:
-                    blowfish_key = self._generate_blowfish_key(self.id)
-                    async for chunk in resp.content.iter_chunked(self.chunk_size):
-                        if len(chunk) >= 2048:
-                            decrypted_chunk = (
-                                self._decrypt_chunk(blowfish_key, chunk[:2048])
-                                + chunk[2048:]
-                            )
-                        else:
-                            decrypted_chunk = chunk
-                        await file.write(decrypted_chunk)
-                        callback(self.chunk_size)
+                        callback(len(chunk))
+            else:
+                blowfish_key = self._generate_blowfish_key(self.id)
+                logger.debug(
+                    f"Deezer file (id %s) at %s is encrypted. Decrypting with %s",
+                    self.id,
+                    self.url,
+                    blowfish_key,
+                )
+
+                assert self.chunk_size == 2048 * 3
+
+                # Write data from server to tempfile because there's no
+                # efficient way to guarantee a fixed chunk size for all iterations
+                # in async
+                async with aiofiles.tempfile.TemporaryFile("wb+") as tmp:
+                    async for chunk in resp.content.iter_chunks():
+                        data, _ = chunk
+                        await tmp.write(data)
+                        callback(len(data))
+
+                    await tmp.seek(0)
+                    async with aiofiles.open(path, "wb") as audio:
+                        while chunk := await tmp.read(self.chunk_size):
+                            if len(chunk) >= 2048:
+                                decrypted_chunk = (
+                                    self._decrypt_chunk(blowfish_key, chunk[:2048])
+                                    + chunk[2048:]
+                                )
+                            else:
+                                decrypted_chunk = chunk
+
+                            await audio.write(decrypted_chunk)

    @staticmethod
    def _decrypt_chunk(key, data):
@ -168,7 +192,7 @@ class TidalDownloadable(Downloadable):
                # Turn CamelCase code into a readable sentence
                words = re.findall(r"([A-Z][a-z]+)", restrictions[0]["code"])
                raise NonStreamable(
-                    words[0] + " " + " ".join(map(str.lower, words[1:])) + "."
+                    words[0] + " " + " ".join(map(str.lower, words[1:]))
                )

            raise NonStreamable(f"Tidal download: dl_info = {info}")
@ -220,7 +244,7 @@ class SoundcloudDownloadable(Downloadable):
            segment_paths.append(await coro)
            callback(1)

-        concat_audio_files(segment_paths, path, "mp3")
+        await concat_audio_files(segment_paths, path, "mp3")

    async def _download_segment(self, segment_uri: str) -> str:
        tmp = generate_temp_path(segment_uri)
@ -241,7 +265,7 @@ class SoundcloudDownloadable(Downloadable):
        return await super().size()


-def concat_audio_files(paths: list[str], out: str, ext: str, max_files_open=128):
+async def concat_audio_files(paths: list[str], out: str, ext: str, max_files_open=128):
    """Concatenate audio files using FFmpeg. Batched by max files open.

    Recurses log_{max_file_open}(len(paths)) times.
@ -273,24 +297,31 @@ def concat_audio_files(paths: list[str], out: str, ext: str, max_files_open=128)
        except FileNotFoundError:
            pass

+    proc_futures = []
    for i in range(num_batches):
-        proc = subprocess.run(
-            (
-                "ffmpeg",
-                "-i",
-                f"concat:{'|'.join(itertools.islice(it, max_files_open))}",
-                "-acodec",
-                "copy",
-                "-loglevel",
-                "warning",
-                outpaths[i],
-            ),
-            capture_output=True,
+        command = (
+            "ffmpeg",
+            "-i",
+            f"concat:{'|'.join(itertools.islice(it, max_files_open))}",
+            "-acodec",
+            "copy",
+            "-loglevel",
+            "warning",
+            outpaths[i],
        )
+        fut = asyncio.create_subprocess_exec(*command, stderr=asyncio.subprocess.PIPE)
+        proc_futures.append(fut)
+
+    # Create all processes concurrently
+    processes = await asyncio.gather(*proc_futures)
+
+    # wait for all of them to finish
+    await asyncio.gather(*[p.communicate() for p in processes])
+    for proc in processes:
        if proc.returncode != 0:
            raise Exception(
                f"FFMPEG returned with status code {proc.returncode} error: {proc.stderr} output: {proc.stdout}"
            )

    # Recurse on remaining batches
-    concat_audio_files(outpaths, out, ext)
+    await concat_audio_files(outpaths, out, ext)
--- a/streamrip/config.toml
+++ b/streamrip/config.toml
@ -12,7 +12,7 @@ concurrency = true
 # If you have very fast internet, you will benefit from a higher value,
 # A value that is too high for your bandwidth may cause slowdowns
 # Set to -1 for no limit
-max_connections = 3
+max_connections = 6
 # Max number of API requests to handle per minute
 # Set to -1 for no limit
 requests_per_minute = 60
--- a/streamrip/media/track.py
+++ b/streamrip/media/track.py
@ -130,6 +130,12 @@ class PendingSingle(Pending):
    db: Database

    async def resolve(self) -> Track | None:
+        if self.db.downloaded(self.id):
+            logger.info(
+                f"Skipping track {self.id}. Marked as downloaded in the database."
+            )
+            return None
+
        try:
            resp = await self.client.get_metadata(self.id, "track")
        except NonStreamable as e:
--- a/streamrip/metadata/album_metadata.py
+++ b/streamrip/metadata/album_metadata.py
@ -1,5 +1,6 @@
 from __future__ import annotations

+import json
 import logging
 import re
 from dataclasses import dataclass
@ -32,14 +33,12 @@ class AlbumInfo:
@dataclass(slots=True)
 class AlbumMetadata:
    info: AlbumInfo
-
    album: str
    albumartist: str
    year: str
    genre: list[str]
    covers: Covers
    tracktotal: int
-
    disctotal: int = 1
    albumcomposer: Optional[str] = None
    comment: Optional[str] = None
@ -163,8 +162,65 @@ class AlbumMetadata:
        )

    @classmethod
-    def from_deezer(cls, resp) -> AlbumMetadata:
-        raise NotImplementedError
+    def from_deezer(cls, resp: dict) -> AlbumMetadata:
+        album = resp.get("title", "Unknown Album")
+        tracktotal = typed(resp.get("track_total", 0) or resp.get("nb_tracks", 0), int)
+        disctotal = typed(resp["tracks"][-1]["disk_number"], int)
+        genres = [typed(g["name"], str) for g in resp["genres"]["data"]]
+        date = typed(resp["release_date"], str)
+        year = date[:4]
+        _copyright = None
+        description = None
+        albumartist = typed(safe_get(resp, "artist", "name"), str)
+        albumcomposer = None
+        label = resp.get("label")
+        booklets = None
+        # url = resp.get("link")
+        explicit = typed(
+            resp.get("parental_warning", False) or resp.get("explicit_lyrics", False),
+            bool,
+        )
+
+        # not embedded
+        quality = 2
+        bit_depth = 16
+        sampling_rate = 44100
+        container = "FLAC"
+
+        cover_urls = Covers.from_deezer(resp)
+        # streamable = True
+        item_id = str(resp["id"])
+
+        info = AlbumInfo(
+            id=item_id,
+            quality=quality,
+            container=container,
+            label=label,
+            explicit=explicit,
+            sampling_rate=sampling_rate,
+            bit_depth=bit_depth,
+            booklets=booklets,
+        )
+        return AlbumMetadata(
+            info,
+            album,
+            albumartist,
+            year,
+            genre=genres,
+            covers=cover_urls,
+            albumcomposer=albumcomposer,
+            comment=None,
+            compilation=None,
+            copyright=_copyright,
+            date=date,
+            description=description,
+            disctotal=disctotal,
+            encoder=None,
+            grouping=None,
+            lyrics=None,
+            purchase_date=None,
+            tracktotal=tracktotal,
+        )

    @classmethod
    def from_soundcloud(cls, resp) -> AlbumMetadata:
--- a/streamrip/metadata/covers.py
+++ b/streamrip/metadata/covers.py
@ -64,6 +64,16 @@ class Covers:
        c.set_cover_url("thumbnail", img["thumbnail"])
        return c

+    @classmethod
+    def from_deezer(cls, resp):
+        c = cls()
+        # c.set_cover_url("original", "org".join(resp["cover_xl"].rsplit("600", 1)))
+        c.set_cover_url("original", resp["cover_xl"])
+        c.set_cover_url("large", resp["cover_big"])
+        c.set_cover_url("small", resp["cover_medium"])
+        c.set_cover_url("thumbnail", resp["cover_small"])
+        return c
+
    @classmethod
    def from_soundcloud(cls, resp):
        c = cls()
--- a/streamrip/metadata/track_metadata.py
+++ b/streamrip/metadata/track_metadata.py
@ -3,6 +3,7 @@ from __future__ import annotations
 from dataclasses import dataclass
 from typing import Optional

+from ..exceptions import NonStreamable
 from .album_metadata import AlbumMetadata
 from .util import safe_get, typed

@ -81,7 +82,33 @@ class TrackMetadata:

    @classmethod
    def from_deezer(cls, album: AlbumMetadata, resp) -> TrackMetadata:
-        raise NotImplemented
+        track_id = str(resp["id"])
+        bit_depth = 16
+        sampling_rate = 44.1
+        explicit = typed(resp["explicit_lyrics"], bool)
+        work = None
+        title = typed(resp["title"], str)
+        artist = typed(resp["artist"]["name"], str)
+        tracknumber = typed(resp["track_position"], int)
+        discnumber = typed(resp["disk_number"], int)
+        composer = None
+        info = TrackInfo(
+            id=track_id,
+            quality=album.info.quality,
+            bit_depth=bit_depth,
+            explicit=explicit,
+            sampling_rate=sampling_rate,
+            work=work,
+        )
+        return cls(
+            info=info,
+            title=title,
+            album=album,
+            artist=artist,
+            tracknumber=tracknumber,
+            discnumber=discnumber,
+            composer=composer,
+        )

    @classmethod
    def from_soundcloud(cls, album: AlbumMetadata, resp: dict) -> TrackMetadata:
--- a/streamrip/progress.py
+++ b/streamrip/progress.py
@ -3,7 +3,14 @@ from typing import Callable

 from rich.console import Group
 from rich.live import Live
-from rich.progress import Progress
+from rich.progress import (
+    BarColumn,
+    DownloadColumn,
+    Progress,
+    TextColumn,
+    TimeRemainingColumn,
+    TransferSpeedColumn,
+)
 from rich.rule import Rule
 from rich.text import Text

@ -14,6 +21,19 @@ class ProgressManager:
    def __init__(self):
        self.started = False
        self.progress = Progress(console=console)
+        self.progress = Progress(
+            TextColumn("[cyan]{task.description}"),
+            BarColumn(bar_width=None),
+            "[progress.percentage]{task.percentage:>3.1f}%",
+            "•",
+            # DownloadColumn(),
+            # "•",
+            TransferSpeedColumn(),
+            "•",
+            TimeRemainingColumn(),
+            console=console,
+        )
+
        self.task_titles = []
        self.prefix = Text.assemble(("Downloading ", "bold cyan"), overflow="ellipsis")
        self._text_cache = self.gen_title_text()
--- a/streamrip/rip/cli.py
+++ b/streamrip/rip/cli.py
@ -76,7 +76,7 @@ def rip(ctx, config_path, folder, no_db, quality, convert, no_progress, verbose)
        logger.debug("Showing all debug logs")
    else:
        install(console=console, suppress=[click, asyncio], max_frames=1)
-        logger.setLevel(logging.WARNING)
+        logger.setLevel(logging.INFO)

    if not os.path.isfile(config_path):
        console.print(
--- a/streamrip/rip/main.py
+++ b/streamrip/rip/main.py
@ -3,7 +3,7 @@ import logging
 import os

 from .. import db
-from ..client import Client, QobuzClient, SoundcloudClient
+from ..client import Client, DeezerClient, QobuzClient, SoundcloudClient
 from ..config import Config
 from ..console import console
 from ..media import Media, Pending, PendingLastfmPlaylist, remove_artwork_tempdirs
@ -34,7 +34,7 @@ class Main:
        self.clients: dict[str, Client] = {
            "qobuz": QobuzClient(config),
            # "tidal": TidalClient(config),
-            # "deezer": DeezerClient(config),
+            "deezer": DeezerClient(config),
            "soundcloud": SoundcloudClient(config),
        }