mirror of https://github.com/nathom/streamrip.git
193 lines
5.4 KiB
Python
193 lines
5.4 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
from abc import ABC, abstractmethod
|
|
|
|
from ..client import Client, SoundcloudClient
|
|
from ..config import Config
|
|
from ..db import Database
|
|
from ..media import (
|
|
Pending,
|
|
PendingAlbum,
|
|
PendingArtist,
|
|
PendingLabel,
|
|
PendingPlaylist,
|
|
PendingSingle,
|
|
)
|
|
|
|
logger = logging.getLogger("streamrip")
|
|
URL_REGEX = re.compile(
|
|
r"https?://(?:www|open|play|listen)?\.?(qobuz|tidal|deezer)\.com(?:(?:/(album|artist|track|playlist|video|label))|(?:\/[-\w]+?))+\/([-\w]+)",
|
|
)
|
|
SOUNDCLOUD_URL_REGEX = re.compile(r"https://soundcloud.com/[-\w:/]+")
|
|
LASTFM_URL_REGEX = re.compile(r"https://www.last.fm/user/\w+/playlists/\w+")
|
|
QOBUZ_INTERPRETER_URL_REGEX = re.compile(
|
|
r"https?://www\.qobuz\.com/\w\w-\w\w/interpreter/[-\w]+/([-\w]+)",
|
|
)
|
|
DEEZER_DYNAMIC_LINK_REGEX = re.compile(r"https://deezer\.page\.link/\w+")
|
|
YOUTUBE_URL_REGEX = re.compile(r"https://www\.youtube\.com/watch\?v=[-\w]+")
|
|
|
|
|
|
class URL(ABC):
|
|
match: re.Match
|
|
source: str
|
|
|
|
def __init__(self, match: re.Match, source: str):
|
|
self.match = match
|
|
self.source = source
|
|
|
|
@classmethod
|
|
@abstractmethod
|
|
def from_str(cls, url: str) -> URL | None:
|
|
raise NotImplementedError
|
|
|
|
@abstractmethod
|
|
async def into_pending(
|
|
self,
|
|
client: Client,
|
|
config: Config,
|
|
db: Database,
|
|
) -> Pending:
|
|
raise NotImplementedError
|
|
|
|
|
|
class GenericURL(URL):
|
|
@classmethod
|
|
def from_str(cls, url: str) -> URL | None:
|
|
generic_url = URL_REGEX.match(url)
|
|
if generic_url is None:
|
|
return None
|
|
|
|
source, media_type, item_id = generic_url.groups()
|
|
if source is None or media_type is None or item_id is None:
|
|
return None
|
|
|
|
return cls(generic_url, source)
|
|
|
|
async def into_pending(
|
|
self,
|
|
client: Client,
|
|
config: Config,
|
|
db: Database,
|
|
) -> Pending:
|
|
source, media_type, item_id = self.match.groups()
|
|
assert client.source == source
|
|
|
|
if media_type == "track":
|
|
return PendingSingle(item_id, client, config, db)
|
|
elif media_type == "album":
|
|
return PendingAlbum(item_id, client, config, db)
|
|
elif media_type == "playlist":
|
|
return PendingPlaylist(item_id, client, config, db)
|
|
elif media_type == "artist":
|
|
return PendingArtist(item_id, client, config, db)
|
|
elif media_type == "label":
|
|
return PendingLabel(item_id, client, config, db)
|
|
raise NotImplementedError
|
|
|
|
|
|
class QobuzInterpreterURL(URL):
|
|
interpreter_artist_regex = re.compile(r"getSimilarArtist\(\s*'(\w+)'")
|
|
|
|
@classmethod
|
|
def from_str(cls, url: str) -> URL | None:
|
|
qobuz_interpreter_url = QOBUZ_INTERPRETER_URL_REGEX.match(url)
|
|
if qobuz_interpreter_url is None:
|
|
return None
|
|
|
|
return cls(qobuz_interpreter_url, "qobuz")
|
|
|
|
async def into_pending(
|
|
self,
|
|
client: Client,
|
|
config: Config,
|
|
db: Database,
|
|
) -> Pending:
|
|
url = self.match.group(0)
|
|
possible_id = self.match.group(1)
|
|
if possible_id.isdigit():
|
|
logger.debug("Found artist ID %s in interpreter url %s", possible_id, url)
|
|
artist_id = possible_id
|
|
else:
|
|
artist_id = await self.extract_interpreter_url(url, client)
|
|
return PendingArtist(artist_id, client, config, db)
|
|
|
|
@staticmethod
|
|
async def extract_interpreter_url(url: str, client: Client) -> str:
|
|
"""Extract artist ID from a Qobuz interpreter url.
|
|
|
|
:param url: Urls of the form "https://www.qobuz.com/us-en/interpreter/{artist}/download-streaming-albums"
|
|
:type url: str
|
|
:rtype: str
|
|
"""
|
|
async with client.session.get(url) as resp:
|
|
match = QobuzInterpreterURL.interpreter_artist_regex.search(
|
|
await resp.text(),
|
|
)
|
|
|
|
if match:
|
|
return match.group(1)
|
|
|
|
raise Exception(
|
|
"Unable to extract artist id from interpreter url. Use a "
|
|
"url that contains an artist id.",
|
|
)
|
|
|
|
|
|
class DeezerDynamicURL(URL):
|
|
pass
|
|
|
|
|
|
class SoundcloudURL(URL):
|
|
source = "soundcloud"
|
|
|
|
def __init__(self, url: str):
|
|
self.url = url
|
|
|
|
async def into_pending(
|
|
self,
|
|
client: SoundcloudClient,
|
|
config: Config,
|
|
db: Database,
|
|
) -> Pending:
|
|
resolved = await client.resolve_url(self.url)
|
|
media_type = resolved["kind"]
|
|
item_id = str(resolved["id"])
|
|
if media_type == "track":
|
|
return PendingSingle(item_id, client, config, db)
|
|
elif media_type == "playlist":
|
|
return PendingPlaylist(item_id, client, config, db)
|
|
else:
|
|
raise NotImplementedError(media_type)
|
|
|
|
@classmethod
|
|
def from_str(cls, url: str):
|
|
soundcloud_url = SOUNDCLOUD_URL_REGEX.match(url)
|
|
if soundcloud_url is None:
|
|
return None
|
|
return cls(soundcloud_url.group(0))
|
|
|
|
|
|
class LastFmURL(URL):
|
|
pass
|
|
|
|
|
|
def parse_url(url: str) -> URL | None:
|
|
"""Return a URL type given a url string.
|
|
|
|
Args:
|
|
----
|
|
url (str): Url to parse
|
|
|
|
Returns: A URL type, or None if nothing matched.
|
|
"""
|
|
url = url.strip()
|
|
parsed_urls: list[URL | None] = [
|
|
GenericURL.from_str(url),
|
|
QobuzInterpreterURL.from_str(url),
|
|
SoundcloudURL.from_str(url),
|
|
# TODO: the rest of the url types
|
|
]
|
|
return next((u for u in parsed_urls if u is not None), None)
|