navidrome/core/external_metadata.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

506 lines
13 KiB
Go
Raw Normal View History

package core
import (
"context"
"sort"
"strings"
"sync"
"time"
2021-05-29 03:48:23 +02:00
"github.com/navidrome/navidrome/utils"
2020-10-20 22:00:29 +02:00
"github.com/Masterminds/squirrel"
"github.com/microcosm-cc/bluemonday"
2021-02-08 22:33:09 +01:00
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/consts"
2021-02-08 22:33:09 +01:00
"github.com/navidrome/navidrome/core/agents"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
)
const (
unavailableArtistID = "-1"
maxSimilarArtists = 100
)
type ExternalMetadata interface {
UpdateArtistInfo(ctx context.Context, id string, count int, includeNotPresent bool) (*model.Artist, error)
2020-10-20 19:38:44 +02:00
SimilarSongs(ctx context.Context, id string, count int) (model.MediaFiles, error)
2020-10-21 04:53:52 +02:00
TopSongs(ctx context.Context, artist string, count int) (model.MediaFiles, error)
}
type externalMetadata struct {
2021-02-08 22:33:09 +01:00
ds model.DataStore
}
2021-02-08 22:33:09 +01:00
type auxArtist struct {
model.Artist
Name string
}
func NewExternalMetadata(ds model.DataStore) ExternalMetadata {
return &externalMetadata{ds: ds}
2021-02-08 22:33:09 +01:00
}
func (e *externalMetadata) initAgents(ctx context.Context) []agents.Interface {
2021-02-08 22:33:09 +01:00
order := strings.Split(conf.Server.Agents, ",")
order = append(order, agents.PlaceholderAgentName)
var res []agents.Interface
for _, name := range order {
init, ok := agents.Map[name]
if !ok {
log.Error(ctx, "Agent not available. Check configuration", "name", name)
continue
}
res = append(res, init(ctx))
}
return res
}
func (e *externalMetadata) getArtist(ctx context.Context, id string) (*auxArtist, error) {
2021-02-08 22:33:09 +01:00
var entity interface{}
entity, err := GetEntityByID(ctx, e.ds, id)
if err != nil {
return nil, err
}
var artist auxArtist
switch v := entity.(type) {
case *model.Artist:
artist.Artist = *v
artist.Name = clearName(v.Name)
case *model.MediaFile:
return e.getArtist(ctx, v.ArtistID)
case *model.Album:
return e.getArtist(ctx, v.AlbumArtistID)
default:
return nil, model.ErrNotFound
}
return &artist, nil
}
2021-02-08 22:33:09 +01:00
// Replace some Unicode chars with their equivalent ASCII
func clearName(name string) string {
name = strings.ReplaceAll(name, "", "-")
name = strings.ReplaceAll(name, "", "-")
name = strings.ReplaceAll(name, "“", `"`)
name = strings.ReplaceAll(name, "”", `"`)
name = strings.ReplaceAll(name, "", `'`)
name = strings.ReplaceAll(name, "", `'`)
return name
}
func (e *externalMetadata) UpdateArtistInfo(ctx context.Context, id string, similarCount int, includeNotPresent bool) (*model.Artist, error) {
artist, err := e.getArtist(ctx, id)
if err != nil {
return nil, err
}
// If we have fresh info, just return it and trigger a refresh in the background
if time.Since(artist.ExternalInfoUpdatedAt) < consts.ArtistInfoTimeToLive {
go func() {
2021-05-28 17:12:44 +02:00
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
defer cancel()
err := e.refreshArtistInfo(ctx, artist)
if err != nil {
log.Error("Error refreshing ArtistInfo", "id", id, "name", artist.Name, err)
}
}()
log.Debug("Found cached ArtistInfo, refreshing in the background", "updatedAt", artist.ExternalInfoUpdatedAt, "name", artist.Name)
err := e.loadSimilar(ctx, artist, similarCount, includeNotPresent)
2021-02-08 22:33:09 +01:00
return &artist.Artist, err
}
2021-02-08 22:33:09 +01:00
log.Debug(ctx, "ArtistInfo not cached or expired", "updatedAt", artist.ExternalInfoUpdatedAt, "id", id, "name", artist.Name)
err = e.refreshArtistInfo(ctx, artist)
if err != nil {
return nil, err
}
err = e.loadSimilar(ctx, artist, similarCount, includeNotPresent)
return &artist.Artist, err
}
func (e *externalMetadata) refreshArtistInfo(ctx context.Context, artist *auxArtist) error {
allAgents := e.initAgents(ctx)
2021-02-08 22:33:09 +01:00
// Get MBID first, if it is not yet available
if artist.MbzArtistID == "" {
e.callGetMBID(ctx, allAgents, artist)
}
2021-02-08 22:33:09 +01:00
// Call all registered agents and collect information
wg := &sync.WaitGroup{}
e.callGetBiography(ctx, allAgents, artist, wg)
e.callGetURL(ctx, allAgents, artist, wg)
e.callGetImage(ctx, allAgents, artist, wg)
2021-05-29 03:48:23 +02:00
e.callGetSimilar(ctx, allAgents, artist, maxSimilarArtists, true, wg)
wg.Wait()
2021-02-08 22:33:09 +01:00
if isDone(ctx) {
log.Warn(ctx, "ArtistInfo update canceled", ctx.Err())
return ctx.Err()
2021-02-08 22:33:09 +01:00
}
artist.ExternalInfoUpdatedAt = time.Now()
err := e.ds.Artist(ctx).Put(&artist.Artist)
if err != nil {
log.Error(ctx, "Error trying to update artist external information", "id", artist.ID, "name", artist.Name, err)
}
log.Trace(ctx, "ArtistInfo collected", "artist", artist)
return nil
}
func (e *externalMetadata) SimilarSongs(ctx context.Context, id string, count int) (model.MediaFiles, error) {
2021-02-08 22:33:09 +01:00
allAgents := e.initAgents(ctx)
artist, err := e.getArtist(ctx, id)
2020-10-20 19:38:44 +02:00
if err != nil {
return nil, err
}
2021-02-08 22:33:09 +01:00
wg := &sync.WaitGroup{}
2021-05-29 03:48:23 +02:00
e.callGetSimilar(ctx, allAgents, artist, 15, false, wg)
2021-02-08 22:33:09 +01:00
wg.Wait()
2020-10-20 19:38:44 +02:00
2021-02-08 22:33:09 +01:00
if isDone(ctx) {
log.Warn(ctx, "SimilarSongs call canceled", ctx.Err())
return nil, ctx.Err()
2020-10-20 22:00:29 +02:00
}
2021-05-29 03:48:23 +02:00
artists := model.Artists{artist.Artist}
artists = append(artists, artist.SimilarArtists...)
weightedSongs := utils.NewWeightedRandomChooser()
for _, a := range artists {
if isDone(ctx) {
log.Warn(ctx, "SimilarSongs call canceled", ctx.Err())
return nil, ctx.Err()
}
topCount := utils.MaxInt(count, 20)
topSongs, err := e.getMatchingTopSongs(ctx, allAgents, &auxArtist{Name: a.Name, Artist: a}, topCount)
if err != nil {
log.Warn(ctx, "Error getting artist's top songs", "artist", a.Name, err)
continue
}
weight := topCount * 4
for _, mf := range topSongs {
weightedSongs.Put(mf, weight)
weight -= 4
}
2020-10-20 22:00:29 +02:00
}
2021-05-29 03:48:23 +02:00
var similarSongs model.MediaFiles
for len(similarSongs) < count && weightedSongs.Size() > 0 {
s, err := weightedSongs.GetAndRemove()
if err != nil {
log.Warn(ctx, "Error getting weighted song", err)
continue
2021-02-08 22:33:09 +01:00
}
2021-05-29 03:48:23 +02:00
similarSongs = append(similarSongs, s.(model.MediaFile))
2020-10-20 22:00:29 +02:00
}
2021-05-29 03:48:23 +02:00
return similarSongs, nil
2020-10-20 19:38:44 +02:00
}
func (e *externalMetadata) TopSongs(ctx context.Context, artistName string, count int) (model.MediaFiles, error) {
2021-05-29 05:00:39 +02:00
allAgents := e.initAgents(ctx)
artist, err := e.findArtistByName(ctx, artistName)
if err != nil {
log.Error(ctx, "Artist not found", "name", artistName, err)
return nil, nil
}
2021-05-29 05:00:39 +02:00
return e.getMatchingTopSongs(ctx, allAgents, artist, count)
2021-05-29 03:48:23 +02:00
}
func (e *externalMetadata) getMatchingTopSongs(ctx context.Context, allAgents []agents.Interface, artist *auxArtist, count int) (model.MediaFiles, error) {
songs, err := e.callGetTopSongs(ctx, allAgents, artist, 50)
2020-10-21 04:53:52 +02:00
if err != nil {
return nil, err
}
2021-02-08 22:33:09 +01:00
var mfs model.MediaFiles
for _, t := range songs {
mf, err := e.findMatchingTrack(ctx, t.MBID, artist.ID, t.Name)
if err != nil {
continue
}
2021-02-08 22:33:09 +01:00
mfs = append(mfs, *mf)
2021-05-29 03:48:23 +02:00
if len(mfs) == count {
break
}
}
2021-02-08 22:33:09 +01:00
return mfs, nil
}
func (e *externalMetadata) findMatchingTrack(ctx context.Context, mbid string, artistID, title string) (*model.MediaFile, error) {
if mbid != "" {
mfs, err := e.ds.MediaFile(ctx).GetAll(model.QueryOptions{
Filters: squirrel.Eq{"mbz_track_id": mbid},
})
if err == nil && len(mfs) > 0 {
return &mfs[0], nil
}
}
mfs, err := e.ds.MediaFile(ctx).GetAll(model.QueryOptions{
Filters: squirrel.And{
squirrel.Or{
squirrel.Eq{"artist_id": artistID},
squirrel.Eq{"album_artist_id": artistID},
2020-10-21 04:53:52 +02:00
},
squirrel.Like{"title": title},
},
Sort: "starred desc, rating desc, year asc",
})
if err != nil || len(mfs) == 0 {
return nil, model.ErrNotFound
2020-10-21 04:53:52 +02:00
}
return &mfs[0], nil
2020-10-21 04:53:52 +02:00
}
2021-02-08 22:33:09 +01:00
func isDone(ctx context.Context) bool {
select {
case <-ctx.Done():
return true
default:
return false
}
}
func (e *externalMetadata) callGetMBID(ctx context.Context, allAgents []agents.Interface, artist *auxArtist) {
2021-02-08 22:33:09 +01:00
start := time.Now()
for _, a := range allAgents {
if isDone(ctx) {
break
}
agent, ok := a.(agents.ArtistMBIDRetriever)
if !ok {
continue
}
2021-02-09 17:19:32 +01:00
mbid, err := agent.GetMBID(artist.ID, artist.Name)
2021-02-08 22:33:09 +01:00
if mbid != "" && err == nil {
artist.MbzArtistID = mbid
log.Debug(ctx, "Got MBID", "agent", a.AgentName(), "artist", artist.Name, "mbid", mbid, "elapsed", time.Since(start))
break
}
2020-10-20 21:31:49 +02:00
}
2021-02-08 22:33:09 +01:00
}
2020-10-20 21:31:49 +02:00
func (e *externalMetadata) callGetTopSongs(ctx context.Context, allAgents []agents.Interface, artist *auxArtist,
2021-02-08 22:33:09 +01:00
count int) ([]agents.Song, error) {
start := time.Now()
for _, a := range allAgents {
if isDone(ctx) {
break
}
agent, ok := a.(agents.ArtistTopSongsRetriever)
if !ok {
continue
}
2021-02-09 17:19:32 +01:00
songs, err := agent.GetTopSongs(artist.ID, artist.Name, artist.MbzArtistID, count)
2021-02-08 22:33:09 +01:00
if len(songs) > 0 && err == nil {
log.Debug(ctx, "Got Top Songs", "agent", a.AgentName(), "artist", artist.Name, "songs", songs, "elapsed", time.Since(start))
return songs, err
}
2020-10-20 21:31:49 +02:00
}
2021-02-08 22:33:09 +01:00
return nil, nil
2020-10-20 21:31:49 +02:00
}
func (e *externalMetadata) callGetURL(ctx context.Context, allAgents []agents.Interface, artist *auxArtist, wg *sync.WaitGroup) {
2021-02-08 22:33:09 +01:00
wg.Add(1)
go func() {
defer wg.Done()
start := time.Now()
for _, a := range allAgents {
if isDone(ctx) {
break
}
2021-02-08 22:33:09 +01:00
agent, ok := a.(agents.ArtistURLRetriever)
if !ok {
continue
}
2021-02-09 17:19:32 +01:00
url, err := agent.GetURL(artist.ID, artist.Name, artist.MbzArtistID)
2021-02-08 22:33:09 +01:00
if url != "" && err == nil {
artist.ExternalUrl = url
log.Debug(ctx, "Got External Url", "agent", a.AgentName(), "artist", artist.Name, "url", url, "elapsed", time.Since(start))
break
}
}
}()
}
func (e *externalMetadata) callGetBiography(ctx context.Context, allAgents []agents.Interface, artist *auxArtist, wg *sync.WaitGroup) {
2021-02-08 22:33:09 +01:00
wg.Add(1)
go func() {
defer wg.Done()
start := time.Now()
for _, a := range allAgents {
if isDone(ctx) {
break
}
agent, ok := a.(agents.ArtistBiographyRetriever)
if !ok {
continue
}
2021-02-09 17:19:32 +01:00
bio, err := agent.GetBiography(artist.ID, clearName(artist.Name), artist.MbzArtistID)
2021-02-08 22:33:09 +01:00
if bio != "" && err == nil {
policy := bluemonday.UGCPolicy()
bio = policy.Sanitize(bio)
bio = strings.ReplaceAll(bio, "\n", " ")
artist.Biography = strings.ReplaceAll(bio, "<a ", "<a target='_blank' ")
log.Debug(ctx, "Got Biography", "agent", a.AgentName(), "artist", artist.Name, "len", len(bio), "elapsed", time.Since(start))
break
}
2021-02-08 22:33:09 +01:00
}
}()
}
func (e *externalMetadata) callGetImage(ctx context.Context, allAgents []agents.Interface, artist *auxArtist, wg *sync.WaitGroup) {
2021-02-08 22:33:09 +01:00
wg.Add(1)
go func() {
defer wg.Done()
start := time.Now()
for _, a := range allAgents {
if isDone(ctx) {
break
}
2021-02-08 22:33:09 +01:00
agent, ok := a.(agents.ArtistImageRetriever)
if !ok {
continue
}
2021-02-09 17:19:32 +01:00
images, err := agent.GetImages(artist.ID, artist.Name, artist.MbzArtistID)
2021-02-08 22:33:09 +01:00
if len(images) == 0 || err != nil {
continue
}
2021-02-08 22:33:09 +01:00
log.Debug(ctx, "Got Images", "agent", a.AgentName(), "artist", artist.Name, "images", images, "elapsed", time.Since(start))
sort.Slice(images, func(i, j int) bool { return images[i].Size > images[j].Size })
if len(images) >= 1 {
artist.LargeImageUrl = images[0].URL
}
if len(images) >= 2 {
artist.MediumImageUrl = images[1].URL
}
if len(images) >= 3 {
artist.SmallImageUrl = images[2].URL
}
break
}
}()
}
2021-05-29 03:48:23 +02:00
func (e *externalMetadata) callGetSimilar(ctx context.Context, allAgents []agents.Interface, artist *auxArtist, limit int, includeNotPresent bool, wg *sync.WaitGroup) {
2021-02-08 22:33:09 +01:00
wg.Add(1)
go func() {
defer wg.Done()
start := time.Now()
for _, a := range allAgents {
if isDone(ctx) {
break
}
agent, ok := a.(agents.ArtistSimilarRetriever)
if !ok {
continue
}
2021-02-09 17:19:32 +01:00
similar, err := agent.GetSimilar(artist.ID, artist.Name, artist.MbzArtistID, limit)
2021-02-08 22:33:09 +01:00
if len(similar) == 0 || err != nil {
continue
}
2021-05-29 03:48:23 +02:00
sa, err := e.mapSimilarArtists(ctx, similar, includeNotPresent)
2021-02-08 22:33:09 +01:00
if err != nil {
continue
}
log.Debug(ctx, "Got Similar Artists", "agent", a.AgentName(), "artist", artist.Name, "similar", similar, "elapsed", time.Since(start))
artist.SimilarArtists = sa
break
}
}()
}
func (e *externalMetadata) mapSimilarArtists(ctx context.Context, similar []agents.Artist, includeNotPresent bool) (model.Artists, error) {
2021-02-08 22:33:09 +01:00
var result model.Artists
var notPresent []string
2021-02-08 22:33:09 +01:00
// First select artists that are present.
for _, s := range similar {
sa, err := e.findArtistByName(ctx, s.Name)
if err != nil {
notPresent = append(notPresent, s.Name)
continue
}
result = append(result, sa.Artist)
2020-10-19 04:02:30 +02:00
}
2021-02-08 22:33:09 +01:00
// Then fill up with non-present artists
if includeNotPresent {
for _, s := range notPresent {
sa := model.Artist{ID: unavailableArtistID, Name: s}
result = append(result, sa)
}
2020-10-19 04:02:30 +02:00
}
2021-02-08 22:33:09 +01:00
return result, nil
}
func (e *externalMetadata) findArtistByName(ctx context.Context, artistName string) (*auxArtist, error) {
2021-02-08 22:33:09 +01:00
artists, err := e.ds.Artist(ctx).GetAll(model.QueryOptions{
Filters: squirrel.Like{"name": artistName},
Max: 1,
})
if err != nil {
return nil, err
}
2021-02-08 22:33:09 +01:00
if len(artists) == 0 {
return nil, model.ErrNotFound
}
artist := &auxArtist{
Artist: artists[0],
Name: clearName(artists[0].Name),
}
return artist, nil
}
func (e *externalMetadata) loadSimilar(ctx context.Context, artist *auxArtist, count int, includeNotPresent bool) error {
var ids []string
for _, sa := range artist.SimilarArtists {
2021-02-07 22:46:15 +01:00
if sa.ID == unavailableArtistID {
continue
}
ids = append(ids, sa.ID)
}
similar, err := e.ds.Artist(ctx).GetAll(model.QueryOptions{
Filters: squirrel.Eq{"id": ids},
})
if err != nil {
return err
}
// Use a map and iterate through original array, to keep the same order
artistMap := make(map[string]model.Artist)
for _, sa := range similar {
artistMap[sa.ID] = sa
}
var loaded model.Artists
for _, sa := range artist.SimilarArtists {
if len(loaded) >= count {
break
}
la, ok := artistMap[sa.ID]
if !ok {
if !includeNotPresent {
continue
}
la = sa
2021-02-07 22:46:15 +01:00
la.ID = unavailableArtistID
}
loaded = append(loaded, la)
}
artist.SimilarArtists = loaded
return nil
}