navidrome/core/external_metadata.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

589 lines
16 KiB
Go
Raw Normal View History

package core
import (
"context"
2022-12-31 22:58:07 +01:00
"errors"
2023-01-13 20:30:26 +01:00
"net/url"
"sort"
"strings"
"time"
2020-10-20 22:00:29 +02:00
"github.com/Masterminds/squirrel"
"github.com/navidrome/navidrome/conf"
2021-02-08 22:33:09 +01:00
"github.com/navidrome/navidrome/core/agents"
_ "github.com/navidrome/navidrome/core/agents/lastfm"
_ "github.com/navidrome/navidrome/core/agents/listenbrainz"
_ "github.com/navidrome/navidrome/core/agents/spotify"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
2021-05-29 05:51:56 +02:00
"github.com/navidrome/navidrome/utils"
. "github.com/navidrome/navidrome/utils/gg"
2024-05-12 02:04:21 +02:00
"github.com/navidrome/navidrome/utils/random"
2023-02-03 22:04:58 +01:00
"golang.org/x/sync/errgroup"
)
const (
unavailableArtistID = "-1"
maxSimilarArtists = 100
2023-02-03 21:26:53 +01:00
refreshDelay = 5 * time.Second
refreshTimeout = 15 * time.Second
refreshQueueLength = 2000
)
type ExternalMetadata interface {
UpdateAlbumInfo(ctx context.Context, id string) (*model.Album, error)
UpdateArtistInfo(ctx context.Context, id string, count int, includeNotPresent bool) (*model.Artist, error)
2020-10-20 19:38:44 +02:00
SimilarSongs(ctx context.Context, id string, count int) (model.MediaFiles, error)
2020-10-21 04:53:52 +02:00
TopSongs(ctx context.Context, artist string, count int) (model.MediaFiles, error)
2023-01-13 20:30:26 +01:00
ArtistImage(ctx context.Context, id string) (*url.URL, error)
AlbumImage(ctx context.Context, id string) (*url.URL, error)
}
type externalMetadata struct {
2023-02-03 21:26:53 +01:00
ds model.DataStore
ag *agents.Agents
artistQueue chan<- *auxArtist
albumQueue chan<- *auxAlbum
}
type auxAlbum struct {
model.Album
Name string
}
2021-02-08 22:33:09 +01:00
type auxArtist struct {
model.Artist
Name string
}
func NewExternalMetadata(ds model.DataStore, agents *agents.Agents) ExternalMetadata {
2023-02-03 21:26:53 +01:00
e := &externalMetadata{ds: ds, ag: agents}
e.artistQueue = startRefreshQueue(context.TODO(), e.populateArtistInfo)
e.albumQueue = startRefreshQueue(context.TODO(), e.populateAlbumInfo)
return e
2021-02-08 22:33:09 +01:00
}
func (e *externalMetadata) getAlbum(ctx context.Context, id string) (*auxAlbum, error) {
var entity interface{}
entity, err := model.GetEntityByID(ctx, e.ds, id)
if err != nil {
return nil, err
}
var album auxAlbum
switch v := entity.(type) {
case *model.Album:
album.Album = *v
album.Name = clearName(v.Name)
case *model.MediaFile:
return e.getAlbum(ctx, v.AlbumID)
default:
return nil, model.ErrNotFound
}
return &album, nil
}
func (e *externalMetadata) UpdateAlbumInfo(ctx context.Context, id string) (*model.Album, error) {
album, err := e.getAlbum(ctx, id)
if err != nil {
log.Info(ctx, "Not found", "id", id)
return nil, err
}
updatedAt := V(album.ExternalInfoUpdatedAt)
if updatedAt.IsZero() {
log.Debug(ctx, "AlbumInfo not cached. Retrieving it now", "updatedAt", updatedAt, "id", id, "name", album.Name)
2023-02-03 21:26:53 +01:00
err = e.populateAlbumInfo(ctx, album)
if err != nil {
return nil, err
}
}
if time.Since(updatedAt) > conf.Server.DevAlbumInfoTimeToLive {
log.Debug("Found expired cached AlbumInfo, refreshing in the background", "updatedAt", album.ExternalInfoUpdatedAt, "name", album.Name)
2023-02-03 21:26:53 +01:00
enqueueRefresh(e.albumQueue, album)
}
return &album.Album, nil
}
2023-02-03 21:26:53 +01:00
func (e *externalMetadata) populateAlbumInfo(ctx context.Context, album *auxAlbum) error {
start := time.Now()
info, err := e.ag.GetAlbumInfo(ctx, album.Name, album.AlbumArtist, album.MbzAlbumID)
if errors.Is(err, agents.ErrNotFound) {
return nil
}
if err != nil {
2023-02-03 21:26:53 +01:00
log.Error("Error refreshing AlbumInfo", "id", album.ID, "name", album.Name, "artist", album.AlbumArtist,
"elapsed", time.Since(start), err)
return err
}
album.ExternalInfoUpdatedAt = P(time.Now())
album.ExternalUrl = info.URL
if info.Description != "" {
album.Description = info.Description
}
if len(info.Images) > 0 {
sort.Slice(info.Images, func(i, j int) bool {
return info.Images[i].Size > info.Images[j].Size
})
album.LargeImageUrl = info.Images[0].URL
if len(info.Images) >= 2 {
album.MediumImageUrl = info.Images[1].URL
}
if len(info.Images) >= 3 {
album.SmallImageUrl = info.Images[2].URL
}
}
err = e.ds.Album(ctx).Put(&album.Album)
if err != nil {
2023-02-03 21:26:53 +01:00
log.Error(ctx, "Error trying to update album external information", "id", album.ID, "name", album.Name,
"elapsed", time.Since(start), err)
} else {
log.Trace(ctx, "AlbumInfo collected", "album", album, "elapsed", time.Since(start))
}
return nil
}
func (e *externalMetadata) getArtist(ctx context.Context, id string) (*auxArtist, error) {
2021-02-08 22:33:09 +01:00
var entity interface{}
entity, err := model.GetEntityByID(ctx, e.ds, id)
2021-02-08 22:33:09 +01:00
if err != nil {
return nil, err
}
var artist auxArtist
switch v := entity.(type) {
case *model.Artist:
artist.Artist = *v
artist.Name = clearName(v.Name)
case *model.MediaFile:
return e.getArtist(ctx, v.ArtistID)
case *model.Album:
return e.getArtist(ctx, v.AlbumArtistID)
default:
return nil, model.ErrNotFound
}
return &artist, nil
}
2021-02-08 22:33:09 +01:00
// Replace some Unicode chars with their equivalent ASCII
func clearName(name string) string {
name = strings.ReplaceAll(name, "", "-")
name = strings.ReplaceAll(name, "", "-")
name = strings.ReplaceAll(name, "“", `"`)
name = strings.ReplaceAll(name, "”", `"`)
name = strings.ReplaceAll(name, "", `'`)
name = strings.ReplaceAll(name, "", `'`)
return name
}
func (e *externalMetadata) UpdateArtistInfo(ctx context.Context, id string, similarCount int, includeNotPresent bool) (*model.Artist, error) {
artist, err := e.refreshArtistInfo(ctx, id)
if err != nil {
return nil, err
}
err = e.loadSimilar(ctx, artist, similarCount, includeNotPresent)
return &artist.Artist, err
}
func (e *externalMetadata) refreshArtistInfo(ctx context.Context, id string) (*auxArtist, error) {
artist, err := e.getArtist(ctx, id)
if err != nil {
return nil, err
}
// If we don't have any info, retrieves it now
updatedAt := V(artist.ExternalInfoUpdatedAt)
if updatedAt.IsZero() {
log.Debug(ctx, "ArtistInfo not cached. Retrieving it now", "updatedAt", updatedAt, "id", id, "name", artist.Name)
err := e.populateArtistInfo(ctx, artist)
if err != nil {
return nil, err
}
}
// If info is expired, trigger a populateArtistInfo in the background
if time.Since(updatedAt) > conf.Server.DevArtistInfoTimeToLive {
log.Debug("Found expired cached ArtistInfo, refreshing in the background", "updatedAt", updatedAt, "name", artist.Name)
2023-02-03 21:26:53 +01:00
enqueueRefresh(e.artistQueue, artist)
}
return artist, nil
}
func (e *externalMetadata) populateArtistInfo(ctx context.Context, artist *auxArtist) error {
2023-02-03 21:26:53 +01:00
start := time.Now()
2021-02-08 22:33:09 +01:00
// Get MBID first, if it is not yet available
if artist.MbzArtistID == "" {
mbid, err := e.ag.GetArtistMBID(ctx, artist.ID, artist.Name)
if mbid != "" && err == nil {
artist.MbzArtistID = mbid
}
2021-02-08 22:33:09 +01:00
}
2021-02-08 22:33:09 +01:00
// Call all registered agents and collect information
2023-02-03 22:04:58 +01:00
g := errgroup.Group{}
g.SetLimit(2)
g.Go(func() error { e.callGetImage(ctx, e.ag, artist); return nil })
g.Go(func() error { e.callGetBiography(ctx, e.ag, artist); return nil })
g.Go(func() error { e.callGetURL(ctx, e.ag, artist); return nil })
g.Go(func() error { e.callGetSimilar(ctx, e.ag, artist, maxSimilarArtists, true); return nil })
_ = g.Wait()
if utils.IsCtxDone(ctx) {
2023-02-03 21:26:53 +01:00
log.Warn(ctx, "ArtistInfo update canceled", "elapsed", "id", artist.ID, "name", artist.Name, time.Since(start), ctx.Err())
return ctx.Err()
2021-02-08 22:33:09 +01:00
}
artist.ExternalInfoUpdatedAt = P(time.Now())
err := e.ds.Artist(ctx).Put(&artist.Artist)
if err != nil {
2023-02-03 21:26:53 +01:00
log.Error(ctx, "Error trying to update artist external information", "id", artist.ID, "name", artist.Name,
"elapsed", time.Since(start), err)
} else {
log.Trace(ctx, "ArtistInfo collected", "artist", artist, "elapsed", time.Since(start))
}
return nil
}
func (e *externalMetadata) SimilarSongs(ctx context.Context, id string, count int) (model.MediaFiles, error) {
2021-02-08 22:33:09 +01:00
artist, err := e.getArtist(ctx, id)
2020-10-20 19:38:44 +02:00
if err != nil {
return nil, err
}
e.callGetSimilar(ctx, e.ag, artist, 15, false)
if utils.IsCtxDone(ctx) {
2021-02-08 22:33:09 +01:00
log.Warn(ctx, "SimilarSongs call canceled", ctx.Err())
return nil, ctx.Err()
2020-10-20 22:00:29 +02:00
}
weightedSongs := random.NewWeightedChooser[model.MediaFile]()
addArtist := func(a model.Artist, weightedSongs *random.WeightedChooser[model.MediaFile], count, artistWeight int) error {
if utils.IsCtxDone(ctx) {
2021-05-29 03:48:23 +02:00
log.Warn(ctx, "SimilarSongs call canceled", ctx.Err())
return ctx.Err()
2021-05-29 03:48:23 +02:00
}
2024-02-17 03:48:25 +01:00
topCount := max(count, 20)
topSongs, err := e.getMatchingTopSongs(ctx, e.ag, &auxArtist{Name: a.Name, Artist: a}, topCount)
2021-05-29 03:48:23 +02:00
if err != nil {
log.Warn(ctx, "Error getting artist's top songs", "artist", a.Name, err)
return nil
2021-05-29 03:48:23 +02:00
}
weight := topCount * (4 + artistWeight)
2021-05-29 03:48:23 +02:00
for _, mf := range topSongs {
weightedSongs.Add(mf, weight)
2021-05-29 03:48:23 +02:00
weight -= 4
}
return nil
}
err = addArtist(artist.Artist, weightedSongs, count, 10)
if err != nil {
return nil, err
}
for _, a := range artist.SimilarArtists {
err := addArtist(a, weightedSongs, count, 0)
if err != nil {
return nil, err
}
2020-10-20 22:00:29 +02:00
}
2021-05-29 03:48:23 +02:00
var similarSongs model.MediaFiles
for len(similarSongs) < count && weightedSongs.Size() > 0 {
s, err := weightedSongs.Pick()
2021-05-29 03:48:23 +02:00
if err != nil {
log.Warn(ctx, "Error getting weighted song", err)
continue
2021-02-08 22:33:09 +01:00
}
similarSongs = append(similarSongs, s)
2020-10-20 22:00:29 +02:00
}
2021-05-29 03:48:23 +02:00
return similarSongs, nil
2020-10-20 19:38:44 +02:00
}
2023-01-13 20:30:26 +01:00
func (e *externalMetadata) ArtistImage(ctx context.Context, id string) (*url.URL, error) {
artist, err := e.getArtist(ctx, id)
2023-01-13 20:30:26 +01:00
if err != nil {
return nil, err
}
e.callGetImage(ctx, e.ag, artist)
2023-01-13 20:30:26 +01:00
if utils.IsCtxDone(ctx) {
log.Warn(ctx, "ArtistImage call canceled", ctx.Err())
return nil, ctx.Err()
}
imageUrl := artist.ArtistImageUrl()
if imageUrl == "" {
return nil, agents.ErrNotFound
}
return url.Parse(imageUrl)
}
func (e *externalMetadata) AlbumImage(ctx context.Context, id string) (*url.URL, error) {
album, err := e.getAlbum(ctx, id)
if err != nil {
return nil, err
}
info, err := e.ag.GetAlbumInfo(ctx, album.Name, album.AlbumArtist, album.MbzAlbumID)
if errors.Is(err, agents.ErrNotFound) {
return nil, err
}
if utils.IsCtxDone(ctx) {
log.Warn(ctx, "AlbumImage call canceled", ctx.Err())
return nil, ctx.Err()
}
// Return the biggest image
var img agents.ExternalImage
for _, i := range info.Images {
if img.Size <= i.Size {
img = i
}
}
if img.URL == "" {
return nil, agents.ErrNotFound
}
return url.Parse(img.URL)
}
func (e *externalMetadata) TopSongs(ctx context.Context, artistName string, count int) (model.MediaFiles, error) {
artist, err := e.findArtistByName(ctx, artistName)
if err != nil {
log.Error(ctx, "Artist not found", "name", artistName, err)
return nil, nil
}
return e.getMatchingTopSongs(ctx, e.ag, artist, count)
2021-05-29 03:48:23 +02:00
}
func (e *externalMetadata) getMatchingTopSongs(ctx context.Context, agent agents.ArtistTopSongsRetriever, artist *auxArtist, count int) (model.MediaFiles, error) {
songs, err := agent.GetArtistTopSongs(ctx, artist.ID, artist.Name, artist.MbzArtistID, count)
2022-12-31 22:58:07 +01:00
if errors.Is(err, agents.ErrNotFound) {
return nil, nil
}
2020-10-21 04:53:52 +02:00
if err != nil {
return nil, err
}
2021-02-08 22:33:09 +01:00
var mfs model.MediaFiles
for _, t := range songs {
mf, err := e.findMatchingTrack(ctx, t.MBID, artist.ID, t.Name)
if err != nil {
continue
}
2021-02-08 22:33:09 +01:00
mfs = append(mfs, *mf)
2021-05-29 03:48:23 +02:00
if len(mfs) == count {
break
}
}
2021-08-19 14:17:22 +02:00
if len(mfs) == 0 {
log.Debug(ctx, "No matching top songs found", "name", artist.Name)
} else {
log.Debug(ctx, "Found matching top songs", "name", artist.Name, "numSongs", len(mfs))
}
2021-02-08 22:33:09 +01:00
return mfs, nil
}
func (e *externalMetadata) findMatchingTrack(ctx context.Context, mbid string, artistID, title string) (*model.MediaFile, error) {
if mbid != "" {
mfs, err := e.ds.MediaFile(ctx).GetAll(model.QueryOptions{
Filters: squirrel.Eq{"mbz_recording_id": mbid},
})
if err == nil && len(mfs) > 0 {
return &mfs[0], nil
}
return e.findMatchingTrack(ctx, "", artistID, title)
}
mfs, err := e.ds.MediaFile(ctx).GetAll(model.QueryOptions{
Filters: squirrel.And{
squirrel.Or{
squirrel.Eq{"artist_id": artistID},
squirrel.Eq{"album_artist_id": artistID},
2020-10-21 04:53:52 +02:00
},
squirrel.Like{"order_title": utils.SanitizeFieldForSorting(title)},
},
Sort: "starred desc, rating desc, year asc, compilation asc ",
Max: 1,
})
if err != nil || len(mfs) == 0 {
return nil, model.ErrNotFound
2020-10-21 04:53:52 +02:00
}
return &mfs[0], nil
2020-10-21 04:53:52 +02:00
}
func (e *externalMetadata) callGetURL(ctx context.Context, agent agents.ArtistURLRetriever, artist *auxArtist) {
2023-02-03 21:26:53 +01:00
artisURL, err := agent.GetArtistURL(ctx, artist.ID, artist.Name, artist.MbzArtistID)
if err != nil {
return
}
2023-02-03 21:26:53 +01:00
artist.ExternalUrl = artisURL
}
func (e *externalMetadata) callGetBiography(ctx context.Context, agent agents.ArtistBiographyRetriever, artist *auxArtist) {
bio, err := agent.GetArtistBiography(ctx, artist.ID, clearName(artist.Name), artist.MbzArtistID)
if err != nil {
return
2020-10-20 21:31:49 +02:00
}
2021-10-27 01:33:21 +02:00
bio = utils.SanitizeText(bio)
bio = strings.ReplaceAll(bio, "\n", " ")
artist.Biography = strings.ReplaceAll(bio, "<a ", "<a target='_blank' ")
2021-02-08 22:33:09 +01:00
}
2020-10-20 21:31:49 +02:00
func (e *externalMetadata) callGetImage(ctx context.Context, agent agents.ArtistImageRetriever, artist *auxArtist) {
images, err := agent.GetArtistImages(ctx, artist.ID, artist.Name, artist.MbzArtistID)
if err != nil {
return
2020-10-20 21:31:49 +02:00
}
sort.Slice(images, func(i, j int) bool { return images[i].Size > images[j].Size })
2020-10-20 21:31:49 +02:00
if len(images) >= 1 {
artist.LargeImageUrl = images[0].URL
}
if len(images) >= 2 {
artist.MediumImageUrl = images[1].URL
}
if len(images) >= 3 {
artist.SmallImageUrl = images[2].URL
}
}
func (e *externalMetadata) callGetSimilar(ctx context.Context, agent agents.ArtistSimilarRetriever, artist *auxArtist,
limit int, includeNotPresent bool) {
similar, err := agent.GetSimilarArtists(ctx, artist.ID, artist.Name, artist.MbzArtistID, limit)
if len(similar) == 0 || err != nil {
return
}
start := time.Now()
sa, err := e.mapSimilarArtists(ctx, similar, includeNotPresent)
2023-02-03 15:57:29 +01:00
log.Debug(ctx, "Mapped Similar Artists", "artist", artist.Name, "numSimilar", len(sa), "elapsed", time.Since(start))
if err != nil {
return
}
artist.SimilarArtists = sa
}
func (e *externalMetadata) mapSimilarArtists(ctx context.Context, similar []agents.Artist, includeNotPresent bool) (model.Artists, error) {
2021-02-08 22:33:09 +01:00
var result model.Artists
var notPresent []string
2021-02-08 22:33:09 +01:00
// First select artists that are present.
for _, s := range similar {
sa, err := e.findArtistByName(ctx, s.Name)
if err != nil {
notPresent = append(notPresent, s.Name)
continue
}
result = append(result, sa.Artist)
2020-10-19 04:02:30 +02:00
}
2021-02-08 22:33:09 +01:00
// Then fill up with non-present artists
if includeNotPresent {
for _, s := range notPresent {
sa := model.Artist{ID: unavailableArtistID, Name: s}
result = append(result, sa)
}
2020-10-19 04:02:30 +02:00
}
2021-02-08 22:33:09 +01:00
return result, nil
}
func (e *externalMetadata) findArtistByName(ctx context.Context, artistName string) (*auxArtist, error) {
2021-02-08 22:33:09 +01:00
artists, err := e.ds.Artist(ctx).GetAll(model.QueryOptions{
2021-08-19 14:17:22 +02:00
Filters: squirrel.Like{"artist.name": artistName},
2021-02-08 22:33:09 +01:00
Max: 1,
})
if err != nil {
return nil, err
}
2021-02-08 22:33:09 +01:00
if len(artists) == 0 {
return nil, model.ErrNotFound
}
artist := &auxArtist{
Artist: artists[0],
Name: clearName(artists[0].Name),
}
return artist, nil
}
func (e *externalMetadata) loadSimilar(ctx context.Context, artist *auxArtist, count int, includeNotPresent bool) error {
var ids []string
for _, sa := range artist.SimilarArtists {
2021-02-07 22:46:15 +01:00
if sa.ID == unavailableArtistID {
continue
}
ids = append(ids, sa.ID)
}
similar, err := e.ds.Artist(ctx).GetAll(model.QueryOptions{
Filters: squirrel.Eq{"artist.id": ids},
})
if err != nil {
log.Error("Error loading similar artists", "id", artist.ID, "name", artist.Name, err)
return err
}
// Use a map and iterate through original array, to keep the same order
artistMap := make(map[string]model.Artist)
for _, sa := range similar {
artistMap[sa.ID] = sa
}
var loaded model.Artists
for _, sa := range artist.SimilarArtists {
if len(loaded) >= count {
break
}
la, ok := artistMap[sa.ID]
if !ok {
if !includeNotPresent {
continue
}
la = sa
2021-02-07 22:46:15 +01:00
la.ID = unavailableArtistID
}
loaded = append(loaded, la)
}
artist.SimilarArtists = loaded
return nil
}
2023-02-03 21:26:53 +01:00
func startRefreshQueue[T any](ctx context.Context, processFn func(context.Context, T) error) chan<- T {
queue := make(chan T, refreshQueueLength)
go func() {
for {
time.Sleep(refreshDelay)
ctx, cancel := context.WithTimeout(ctx, refreshTimeout)
select {
case a := <-queue:
_ = processFn(ctx, a)
cancel()
case <-ctx.Done():
cancel()
break
}
}
}()
return queue
}
func enqueueRefresh[T any](queue chan<- T, item T) {
select {
case queue <- item:
default: // It is ok to miss a refresh
}
}