Generate subfolder within folder_path, track_path and playlist_path, using folder separator between keywords

Better path sanitization
Generate playlist m3u8 file with playlist keywords
Add albumtitle to folder keywords
Skip track instead of fail, if deezer response is 403-forbidden (wrong zone, out-of-date track id etc.)
Skip tag / conversion instead of fail (FLAC detection seems not 100% reliable)
This commit is contained in:
slanglade 2024-02-26 16:59:56 +01:00
parent 3443331501
commit 8946899d6d
12 changed files with 320 additions and 138 deletions

1
.gitignore vendored
View File

@ -21,3 +21,4 @@ StreamripDownloads
/.mypy_cache
.DS_Store
pyrightconfig.json
/.vscode

View File

@ -52,9 +52,12 @@ class Downloadable(ABC):
return self._size
async with self.session.head(self.url) as response:
response.raise_for_status()
content_length = response.headers.get("Content-Length", 0)
self._size = int(content_length)
try:
response.raise_for_status()
content_length = response.headers.get("Content-Length", 0)
self._size = int(content_length)
except Exception as e:
self._size = 0
return self._size
@abstractmethod
@ -73,12 +76,15 @@ class BasicDownloadable(Downloadable):
async def _download(self, path: str, callback: Callable[[int], None]):
async with self.session.get(self.url, allow_redirects=True) as response:
response.raise_for_status()
async with aiofiles.open(path, "wb") as file:
async for chunk in response.content.iter_chunked(self.chunk_size):
await file.write(chunk)
# typically a bar.update()
callback(len(chunk))
try:
response.raise_for_status()
async with aiofiles.open(path, "wb") as file:
async for chunk in response.content.iter_chunked(self.chunk_size):
await file.write(chunk)
# typically a bar.update()
callback(len(chunk))
except:
self._size = None
class DeezerDownloadable(Downloadable):
@ -89,67 +95,83 @@ class DeezerDownloadable(Downloadable):
logger.debug("Deezer info for downloadable: %s", info)
self.session = session
self.url = info["url"]
max_quality_available = max(
i for i, size in enumerate(info["quality_to_size"]) if size > 0
)
self.quality = min(info["quality"], max_quality_available)
self._size = info["quality_to_size"][self.quality]
if self.quality <= 1:
self.extension = "mp3"
else:
try:
max_quality_available = max(
i for i, size in enumerate(info["quality_to_size"]) if size > 0
)
self.quality = min(info["quality"], max_quality_available)
self._size = info["quality_to_size"][self.quality]
if self.quality <= 1:
self.extension = "mp3"
else:
self.extension = "flac"
self.id = str(info["id"])
except Exception as e:
logger.error("Error occured while preparing download for item id %s, assuming mp3 with 320kbs : %s", info["id"], e)
self.id = str(info["id"])
self.extension = "flac"
self.id = str(info["id"])
self.quality = 1
async def _download(self, path: str, callback):
# with requests.Session().get(self.url, allow_redirects=True) as resp:
async with self.session.get(self.url, allow_redirects=True) as resp:
resp.raise_for_status()
self._size = int(resp.headers.get("Content-Length", 0))
if self._size < 20000 and not self.url.endswith(".jpg"):
try:
info = await resp.json()
try:
resp.raise_for_status()
self._size = int(resp.headers.get("Content-Length", 0))
if self._size < 20000 and not self.url.endswith(".jpg"):
try:
# Usually happens with deezloader downloads
raise NonStreamableError(f"{info['error']} - {info['message']}")
except KeyError:
raise NonStreamableError(info)
info = await resp.json()
try:
# Usually happens with deezloader downloads
raise NonStreamableError(f"{info['error']} - {info['message']}")
except KeyError:
raise NonStreamableError(info)
except json.JSONDecodeError:
raise NonStreamableError("File not found.")
except json.JSONDecodeError:
raise NonStreamableError("File not found.")
if self.is_encrypted.search(self.url) is None:
logger.debug(f"Deezer file at {self.url} not encrypted.")
async with aiofiles.open(path, "wb") as file:
async for chunk in resp.content.iter_chunked(self.chunk_size):
await file.write(chunk)
# typically a bar.update()
callback(len(chunk))
else:
blowfish_key = self._generate_blowfish_key(self.id)
logger.debug(
"Deezer file (id %s) at %s is encrypted. Decrypting with %s",
self.id,
self.url,
blowfish_key,
)
if self.is_encrypted.search(self.url) is None:
logger.debug(f"Deezer file at {self.url} not encrypted.")
async with aiofiles.open(path, "wb") as file:
async for chunk in resp.content.iter_chunked(self.chunk_size):
await file.write(chunk)
# typically a bar.update()
callback(len(chunk))
else:
blowfish_key = self._generate_blowfish_key(self.id)
logger.debug(
"Deezer file (id %s) at %s is encrypted. Decrypting with %s",
self.id,
self.url,
blowfish_key,
)
buf = bytearray()
async for data, _ in resp.content.iter_chunks():
buf += data
callback(len(data))
buf = bytearray()
async for data, _ in resp.content.iter_chunks():
buf += data
callback(len(data))
async with aiofiles.open(path, "wb") as audio:
buflen = len(buf)
for i in range(0, buflen, self.chunk_size):
data = buf[i : min(i + self.chunk_size, buflen)]
if len(data) >= 2048:
decrypted_chunk = (
self._decrypt_chunk(blowfish_key, data[:2048])
+ data[2048:]
)
else:
decrypted_chunk = data
try:
await audio.write(decrypted_chunk)
except Exception as e:
logger.error("Error occured while writing file for item %s : %s", self.id, e)
self._size = None
except Exception as e:
logger.error("Error occured while downloading item %s : %s", self.id, e)
self._size = None
async with aiofiles.open(path, "wb") as audio:
buflen = len(buf)
for i in range(0, buflen, self.chunk_size):
data = buf[i : min(i + self.chunk_size, buflen)]
if len(data) >= 2048:
decrypted_chunk = (
self._decrypt_chunk(blowfish_key, data[:2048])
+ data[2048:]
)
else:
decrypted_chunk = data
await audio.write(decrypted_chunk)
@staticmethod
def _decrypt_chunk(key, data):

View File

@ -165,9 +165,13 @@ class FilepathsConfig:
# Available keys: "albumartist", "title", "year", "bit_depth", "sampling_rate",
# "container", "id", and "albumcomposer"
folder_format: str
# Available keys: "tracknumber", "artist", "albumartist", "composer", "title",
# Available keys: "tracknumber", "artist", "albumartist", "composer", "title", "albumtitle"
# and "albumcomposer"
track_format: str
# Available keys : "owner", "playlist", leave empty if no needed upper level format to be prepended to track_format
playlist_format: str
# Available keys : "owner", "playlist"
m3u8_format: str
# Only allow printable ASCII characters in filenames.
restrict_characters: bool
# Truncate the filename if it is greater than 120 characters

View File

@ -1,6 +1,6 @@
[downloads]
# Folder where tracks are downloaded to
folder = ""
folder = "/music"
# Put Qobuz albums in a 'Qobuz' folder, Tidal albums in 'Tidal' etc.
source_subdirectories = false
@ -156,12 +156,16 @@ exclude = []
# Create folders for single tracks within the downloads directory using the folder_format
# template
add_singles_to_folder = false
# Available keys: "albumartist", "title", "year", "bit_depth", "sampling_rate",
# "id", and "albumcomposer"
# Available keys: "albumartist", "title", "year", "bit_depth", "sampling_rate"
# "id", "albumcomposer", "playlist", "owner"
folder_format = "{albumartist} - {title} ({year}) [{container}] [{bit_depth}B-{sampling_rate}kHz]"
# Available keys: "tracknumber", "artist", "albumartist", "composer", "title",
# Available keys: "tracknumber", "artist", "albumartist", "composer", "title", "albumtitle"
# and "albumcomposer", "explicit"
track_format = "{tracknumber:02}. {artist} - {title}{explicit}"
# Available keys : "owner", "playlist", leave empty if no needed upper level format to be prepended to track_format
playlist_format = "{playlist}"
# Available keys : "owner", "playlist"
m3u8_format = "{owner} - {playlist}"
# Only allow printable ASCII characters in filenames.
restrict_characters = false
# Truncate the filename if it is greater than this number of characters

View File

@ -38,8 +38,8 @@ class Dummy(DatabaseInterface):
def create(self):
pass
def contains(self, **_):
return False
def getPath(self, **_):
return ""
def add(self, *_):
pass
@ -164,8 +164,32 @@ class Downloads(DatabaseBase):
name = "downloads"
structure: Final[dict] = {
"id": ["text", "unique"],
"filepath": ["text"],
}
def getPath(self, **items) -> str:
"""Check whether items matches an entry in the table, return associated filepath
:param items: a dict of column-name + expected value
:rtype: string
"""
allowed_keys = set(self.structure.keys())
assert all(
key in allowed_keys for key in items.keys()
), f"Invalid key. Valid keys: {allowed_keys}"
items = {k: str(v) for k, v in items.items()}
with sqlite3.connect(self.path) as conn:
conditions = " AND ".join(f"{key}=?" for key in items.keys())
command = f"SELECT filepath FROM {self.name} WHERE {conditions}"
logger.debug("Executing %s", command)
row = conn.execute(command, tuple(items.values())).fetchone()
if row:
return row[0]
else:
return ""
class Failed(DatabaseBase):
"""A table that stores information about failed downloads."""
@ -180,14 +204,14 @@ class Failed(DatabaseBase):
@dataclass(slots=True)
class Database:
downloads: DatabaseInterface
failed: DatabaseInterface
downloads: Downloads
failed: Failed
def downloaded(self, item_id: str) -> bool:
return self.downloads.contains(id=item_id)
def downloaded(self, item_id: str) -> str:
return self.downloads.getPath(id=item_id)
def set_downloaded(self, item_id: str):
self.downloads.add((item_id,))
def set_downloaded(self, item_id, filepath: str):
self.downloads.add((item_id,filepath,))
def get_failed_downloads(self) -> list[tuple[str, str, str]]:
return self.failed.all()

View File

@ -2,20 +2,40 @@ from string import printable
from pathvalidate import sanitize_filename, sanitize_filepath # type: ignore
import os
ALLOWED_CHARS = set(printable)
def clean_filename(fn: str, restrict: bool = False) -> str:
path = str(sanitize_filename(fn))
if restrict:
path = "".join(c for c in path if c in ALLOWED_CHARS)
return path
if fn:
parts = os.path.normpath(fn).split(os.path.sep)
for index, part in enumerate(parts):
if index < len(parts)-1:
path = str(sanitize_filepath(part))
else:
path = str(sanitize_filename(part))
if restrict:
path = "".join(c for c in path if c in ALLOWED_CHARS)
parts[index] = path
return os.path.sep.join(parts)
else:
return fn
def clean_pathsep(fn: str) -> str:
if fn:
return fn.replace("/", "_").replace("\\", "_")
else:
return fn
def clean_filepath(fn: str, restrict: bool = False) -> str:
path = str(sanitize_filepath(fn))
if restrict:
path = "".join(c for c in path if c in ALLOWED_CHARS)
return path
if fn:
parts = os.path.normpath(fn).split(os.path.sep)
for index, part in enumerate(parts):
path = str(sanitize_filepath(part))
if restrict:
path = "".join(c for c in path if c in ALLOWED_CHARS)
parts[index] = path
return os.path.sep.join(parts)
else:
return fn

View File

@ -84,6 +84,7 @@ class PendingAlbum(Pending):
client=self.client,
config=self.config,
folder=album_folder,
m3u8="",
db=self.db,
cover_path=embed_cover,
)

View File

@ -4,7 +4,7 @@ import logging
import os
import random
import re
from contextlib import ExitStack
from contextlib import ExitStack, suppress
from dataclasses import dataclass
import aiohttp
@ -37,14 +37,24 @@ class PendingPlaylistTrack(Pending):
client: Client
config: Config
folder: str
m3u8: str
playlist_name: str
position: int
db: Database
async def resolve(self) -> Track | None:
if self.db.downloaded(self.id):
logger.info(f"Track ({self.id}) already logged in database. Skipping.")
alreadyDownloaded = self.db.downloaded(self.id)
if alreadyDownloaded:
logger.info(f"Track ({self.id}) already logged in database, stored in {alreadyDownloaded}. Skipping.")
if self.m3u8:
try:
with open(self.m3u8, 'a+') as f:
# Write filepath using relative path. Given m3u8 file is located in the same folder structure, simply replace its path with "."
f.write(alreadyDownloaded.replace(os.path.dirname(self.m3u8), ".") + "\n")
except Exception as e:
logger.error("Error occured while appending line to m3u8 file: %s", e)
return None
try:
resp = await self.client.get_metadata(self.id, "track")
except NonStreamableError as e:
@ -82,14 +92,15 @@ class PendingPlaylistTrack(Pending):
logger.error("Error fetching download info for track: %s", e)
self.db.set_failed(self.client.source, "track", self.id)
return None
return Track(
meta,
downloadable,
self.config,
self.folder,
embedded_cover_path,
self.db,
meta=meta,
downloadable=downloadable,
config=self.config,
folder=self.folder,
m3u8=self.m3u8,
cover_path=embedded_cover_path,
db=self.db,
)
async def _download_cover(self, covers: Covers, folder: str) -> str | None:
@ -109,6 +120,7 @@ class Playlist(Media):
config: Config
client: Client
tracks: list[PendingPlaylistTrack]
m3u8: str
async def preprocess(self):
progress.add_title(self.name)
@ -157,21 +169,54 @@ class PendingPlaylist(Pending):
meta = PlaylistMetadata.from_resp(resp, self.client.source)
name = meta.name
parent = self.config.session.downloads.folder
folder = os.path.join(parent, clean_filepath(name))
folder = self.config.session.downloads.folder
playlist_folder = self._playlist_folder(folder, meta)
os.makedirs(playlist_folder, exist_ok=True)
# Construct m3u8 filename and delete it, as we're rebuilding it from scratch
m3u8 = self._m3u8_file(folder, meta)
with suppress(FileNotFoundError):
os.remove(m3u8)
tracks = [
PendingPlaylistTrack(
id,
self.client,
self.config,
folder,
name,
position + 1,
self.db,
id=id,
client=self.client,
config=self.config,
folder=playlist_folder,
m3u8=m3u8,
playlist_name=name,
position=position + 1,
db=self.db,
)
for position, id in enumerate(meta.ids())
]
return Playlist(name, self.config, self.client, tracks)
return Playlist(name, self.config, self.client, tracks, m3u8)
def _playlist_folder(self, parent: str, meta: PlaylistMetadata) -> str:
config = self.config.session
if config.downloads.source_subdirectories:
parent = os.path.join(parent, self.client.source.capitalize())
formatter = config.filepaths.playlist_format
folder = clean_filepath(
meta.format_folder_path(formatter), config.filepaths.restrict_characters
)
return os.path.join(parent, folder)
def _m3u8_file(self, parent: str, meta: PlaylistMetadata) -> str:
config = self.config.session
if config.filepaths.m3u8_format:
config = self.config.session
if config.downloads.source_subdirectories:
parent = os.path.join(parent, self.client.source.capitalize())
formatter = config.filepaths.m3u8_format
folder = clean_filepath(
meta.format_folder_path(formatter), config.filepaths.restrict_characters
)
return os.path.join(parent, folder) + '.m3u8'
else:
return ''
@dataclass(slots=True)
@ -229,8 +274,10 @@ class PendingLastfmPlaylist(Pending):
requests.append(self._make_query(f"{title} {artist}", s, callback))
results: list[tuple[str | None, bool]] = await asyncio.gather(*requests)
parent = self.config.session.downloads.folder
folder = os.path.join(parent, clean_filepath(playlist_title))
folder = self.config.session.downloads.folder
playlist_folder = self._playlist_folder(folder, meta)
os.makedirs(playlist_folder, exist_ok=True)
m3u8 = self._m3u8_lastfm(folder, playlist_title)
pending_tracks = []
for pos, (id, from_fallback) in enumerate(results, start=1):
@ -249,14 +296,15 @@ class PendingLastfmPlaylist(Pending):
id,
client,
self.config,
folder,
playlist_folder,
m3u8,
playlist_title,
pos,
self.db,
),
)
return Playlist(playlist_title, self.config, self.client, pending_tracks)
return Playlist(playlist_title, self.config, self.client, pending_tracks, m3u8)
async def _make_query(
self,
@ -391,3 +439,23 @@ class PendingLastfmPlaylist(Pending):
s.failed += 1
callback()
return None, False
def _playlist_folder(self, parent: str, meta: PlaylistMetadata) -> str:
config = self.config.session
if config.downloads.source_subdirectories:
parent = os.path.join(parent, self.client.source.capitalize())
formatter = config.filepaths.playlist_format
folder = clean_filepath(
meta.format_folder_path(formatter), config.filepaths.restrict_characters
)
return os.path.join(parent, folder)
def _m3u8_lastfm(self, parent: str, name: str) -> str:
config = self.config.session
if config.filepaths.m3u8_format:
if config.downloads.source_subdirectories:
parent = os.path.join(parent, self.client.source.capitalize())
return os.path.join(parent, name) + '.m3u8'
else:
return ''

View File

@ -24,6 +24,7 @@ class Track(Media):
downloadable: Downloadable
config: Config
folder: str
m3u8: str
# Is None if a cover doesn't exist for the track
cover_path: str | None
db: Database
@ -48,14 +49,27 @@ class Track(Media):
await self.downloadable.download(self.download_path, callback)
async def postprocess(self):
if self.is_single:
remove_title(self.meta.title)
if self.downloadable._size != None:
if self.is_single:
remove_title(self.meta.title)
await tag_file(self.download_path, self.meta, self.cover_path)
if self.config.session.conversion.enabled:
await self._convert()
try:
await tag_file(self.download_path, self.meta, self.cover_path)
if self.config.session.conversion.enabled:
await self._convert()
except Exception as e:
logger.warning(
f"Unable to tag or convert file {self.download_path}, file is left as-is, please check result carefully : {e}",
)
self.db.set_downloaded(self.meta.info.id)
self.db.set_downloaded(self.meta.info.id, self.download_path)
if self.m3u8:
try:
with open(self.m3u8, 'a+') as f:
# Write filepath using relative path. Given m3u8 file is located in the same folder structure, simply replace its path with "."
f.write(self.download_path.replace(os.path.dirname(self.m3u8), ".") + "\n")
except Exception as e:
logger.warning(f"Unable to append line {self.download_path} to m3u8 file {self.m3u8} : {e}")
async def _convert(self):
c = self.config.session.conversion
@ -84,6 +98,9 @@ class Track(Media):
f"{track_path}.{self.downloadable.extension}",
)
os.makedirs(os.path.dirname(self.download_path), exist_ok=True) # To deal with subfolders in track name
@dataclass(slots=True)
class PendingTrack(Pending):
@ -92,6 +109,7 @@ class PendingTrack(Pending):
client: Client
config: Config
folder: str
m3u8: str
db: Database
# cover_path is None <==> Artwork for this track doesn't exist in API
cover_path: str | None
@ -119,12 +137,13 @@ class PendingTrack(Pending):
quality = self.config.session.get_source(source).quality
downloadable = await self.client.get_downloadable(self.id, quality)
return Track(
meta,
downloadable,
self.config,
self.folder,
self.cover_path,
self.db,
meta=meta,
downloadable=downloadable,
config=self.config,
folder=self.folder,
m3u8=self.m3u8,
cover_path=self.cover_path,
db=self.db,
)
@ -187,12 +206,13 @@ class PendingSingle(Pending):
self.client.get_downloadable(self.id, quality),
)
return Track(
meta,
downloadable,
self.config,
folder,
embedded_cover_path,
self.db,
meta=meta,
downloadable=downloadable,
config=self.config,
folder=folder,
m3u8="",
cover_path=embedded_cover_path,
db=self.db,
is_single=True,
)

View File

@ -7,7 +7,7 @@ from typing import Optional
from .covers import Covers
from .util import get_quality_id, safe_get, typed
from ..filepath_utils import clean_filename
from ..filepath_utils import clean_pathsep
PHON_COPYRIGHT = "\u2117"
COPYRIGHT = "\u00a9"
@ -68,12 +68,12 @@ class AlbumMetadata:
none_str = "Unknown"
info: dict[str, str | int | float] = {
"albumartist": clean_filename(self.albumartist),
"albumcomposer": clean_filename(self.albumcomposer) or none_str,
"albumartist": clean_pathsep(self.albumartist),
"albumcomposer": clean_pathsep(self.albumcomposer) or none_str,
"bit_depth": self.info.bit_depth or none_str,
"id": self.info.id,
"sampling_rate": self.info.sampling_rate or none_str,
"title": clean_filename(self.album),
"title": clean_pathsep(self.album),
"year": self.year,
"container": self.info.container,
}

View File

@ -4,6 +4,7 @@ from dataclasses import dataclass
from .album import AlbumMetadata
from .track import TrackMetadata
from .util import typed
from ..filepath_utils import clean_pathsep
NON_STREAMABLE = "_non_streamable"
ORIGINAL_DOWNLOAD = "_original_download"
@ -43,12 +44,14 @@ def parse_soundcloud_id(item_id: str) -> tuple[str, str]:
@dataclass(slots=True)
class PlaylistMetadata:
name: str
owner: str
tracks: list[TrackMetadata] | list[str]
@classmethod
def from_qobuz(cls, resp: dict):
logger.debug(resp)
name = typed(resp["name"], str)
owner = typed(resp["owner"]["name"], str)
tracks = []
for i, track in enumerate(resp["tracks"]["items"]):
@ -61,7 +64,7 @@ class PlaylistMetadata:
continue
tracks.append(meta)
return cls(name, tracks)
return cls(name, owner, tracks)
@classmethod
def from_soundcloud(cls, resp: dict):
@ -80,23 +83,26 @@ class PlaylistMetadata:
PlaylistMetadata object.
"""
name = typed(resp["title"], str)
owner = typed(resp["user_id"], str)
tracks = [
TrackMetadata.from_soundcloud(AlbumMetadata.from_soundcloud(track), track)
for track in resp["tracks"]
]
return cls(name, tracks)
return cls(name, owner, tracks)
@classmethod
def from_deezer(cls, resp: dict):
name = typed(resp["title"], str)
owner = typed(resp["creator"]["name"], str)
tracks = [str(track["id"]) for track in resp["tracks"]]
return cls(name, tracks)
return cls(name, owner, tracks)
@classmethod
def from_tidal(cls, resp: dict):
name = typed(resp["title"], str)
owner = typed(resp["creator"]["id"], str)
tracks = [str(track["id"]) for track in resp["tracks"]]
return cls(name, tracks)
return cls(name, owner, tracks)
def ids(self) -> list[str]:
if len(self.tracks) == 0:
@ -118,3 +124,13 @@ class PlaylistMetadata:
return cls.from_tidal(resp)
else:
raise NotImplementedError(source)
def format_folder_path(self, formatter: str) -> str:
# Available keys: "playlist", "owner"
none_str = "Unknown"
info: dict[str, str | int | float] = {
"playlist": clean_pathsep(self.name),
"owner": clean_pathsep(self.owner) or none_str,
}
return formatter.format(**info)

View File

@ -6,6 +6,7 @@ from typing import Optional
from .album import AlbumMetadata
from .util import safe_get, typed
from ..filepath_utils import clean_pathsep
logger = logging.getLogger("streamrip")
@ -228,12 +229,13 @@ class TrackMetadata:
# and "explicit", "albumcomposer"
none_text = "Unknown"
info = {
"title": self.title,
"title": clean_pathsep(self.title),
"tracknumber": self.tracknumber,
"artist": self.artist,
"albumartist": self.album.albumartist,
"albumcomposer": self.album.albumcomposer or none_text,
"composer": self.composer or none_text,
"artist": clean_pathsep(self.artist),
"albumtitle": clean_pathsep(self.album.album),
"albumartist": clean_pathsep(self.album.albumartist),
"albumcomposer": clean_pathsep(self.album.albumcomposer) or none_text,
"composer": clean_pathsep(self.composer) or none_text,
"explicit": " (Explicit) " if self.info.explicit else "",
}
return format_string.format(**info)