Loading tags from files

This commit is contained in:
Deluan 2023-12-26 13:47:21 -05:00
parent 0e8042d344
commit 33e40d3eb6
4 changed files with 270 additions and 9 deletions

View File

@ -3,6 +3,8 @@ package scanner2
import (
"io/fs"
"time"
"github.com/navidrome/navidrome/model"
)
type folderEntry struct {
@ -15,6 +17,10 @@ type folderEntry struct {
imageFiles map[string]fs.DirEntry
playlists []fs.DirEntry
imagesUpdatedAt time.Time
tracks model.MediaFiles
albums model.Albums
artists model.Artists
missingTracks model.MediaFiles
}
func (f *folderEntry) isExpired() bool {

196
scanner2/mapping.go Normal file
View File

@ -0,0 +1,196 @@
package scanner2
import (
"crypto/md5"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/deluan/sanitize"
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/consts"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/scanner/metadata"
"github.com/navidrome/navidrome/utils"
)
type mediaFileMapper struct {
rootFolder string
}
func newMediaFileMapper(entry *folderEntry) *mediaFileMapper {
return &mediaFileMapper{
rootFolder: entry.path,
}
}
func (s mediaFileMapper) toMediaFile(md metadata.Tags) model.MediaFile {
mf := &model.MediaFile{}
mf.ID = s.trackID(md)
mf.Year, mf.Date, mf.OriginalYear, mf.OriginalDate, mf.ReleaseYear, mf.ReleaseDate = s.mapDates(md)
mf.Title = s.mapTrackTitle(md)
mf.Album = md.Album()
mf.AlbumID = s.albumID(md, mf.ReleaseDate)
mf.Album = s.mapAlbumName(md)
mf.ArtistID = s.artistID(md)
mf.Artist = s.mapArtistName(md)
mf.AlbumArtistID = s.albumArtistID(md)
mf.AlbumArtist = s.mapAlbumArtistName(md)
mf.Genre, mf.Genres = s.mapGenres(md.Genres())
mf.Compilation = md.Compilation()
mf.TrackNumber, _ = md.TrackNumber()
mf.DiscNumber, _ = md.DiscNumber()
mf.DiscSubtitle = md.DiscSubtitle()
mf.Duration = md.Duration()
mf.BitRate = md.BitRate()
mf.Channels = md.Channels()
mf.Path = md.FilePath()
mf.Suffix = md.Suffix()
mf.Size = md.Size()
mf.HasCoverArt = md.HasPicture()
mf.SortTitle = md.SortTitle()
mf.SortAlbumName = md.SortAlbum()
mf.SortArtistName = md.SortArtist()
mf.SortAlbumArtistName = md.SortAlbumArtist()
mf.OrderTitle = strings.TrimSpace(sanitize.Accents(mf.Title))
mf.OrderAlbumName = sanitizeFieldForSorting(mf.Album)
mf.OrderArtistName = sanitizeFieldForSorting(mf.Artist)
mf.OrderAlbumArtistName = sanitizeFieldForSorting(mf.AlbumArtist)
mf.CatalogNum = md.CatalogNum()
mf.MbzRecordingID = md.MbzRecordingID()
mf.MbzReleaseTrackID = md.MbzReleaseTrackID()
mf.MbzAlbumID = md.MbzAlbumID()
mf.MbzArtistID = md.MbzArtistID()
mf.MbzAlbumArtistID = md.MbzAlbumArtistID()
mf.MbzAlbumType = md.MbzAlbumType()
mf.MbzAlbumComment = md.MbzAlbumComment()
mf.RGAlbumGain = md.RGAlbumGain()
mf.RGAlbumPeak = md.RGAlbumPeak()
mf.RGTrackGain = md.RGTrackGain()
mf.RGTrackPeak = md.RGTrackPeak()
mf.Comment = utils.SanitizeText(md.Comment())
mf.Lyrics = utils.SanitizeText(md.Lyrics())
mf.Bpm = md.Bpm()
mf.CreatedAt = md.BirthTime()
mf.UpdatedAt = md.ModificationTime()
return *mf
}
func sanitizeFieldForSorting(originalValue string) string {
v := strings.TrimSpace(sanitize.Accents(originalValue))
return utils.NoArticle(v)
}
func (s mediaFileMapper) mapTrackTitle(md metadata.Tags) string {
if md.Title() == "" {
s := strings.TrimPrefix(md.FilePath(), s.rootFolder+string(os.PathSeparator))
e := filepath.Ext(s)
return strings.TrimSuffix(s, e)
}
return md.Title()
}
func (s mediaFileMapper) mapAlbumArtistName(md metadata.Tags) string {
switch {
case md.AlbumArtist() != "":
return md.AlbumArtist()
case md.Compilation():
return consts.VariousArtists
case md.Artist() != "":
return md.Artist()
default:
return consts.UnknownArtist
}
}
func (s mediaFileMapper) mapArtistName(md metadata.Tags) string {
if md.Artist() != "" {
return md.Artist()
}
return consts.UnknownArtist
}
func (s mediaFileMapper) mapAlbumName(md metadata.Tags) string {
name := md.Album()
if name == "" {
return consts.UnknownAlbum
}
return name
}
func (s mediaFileMapper) trackID(md metadata.Tags) string {
return fmt.Sprintf("%x", md5.Sum([]byte(md.FilePath())))
}
func (s mediaFileMapper) albumID(md metadata.Tags, releaseDate string) string {
albumPath := strings.ToLower(fmt.Sprintf("%s\\%s", s.mapAlbumArtistName(md), s.mapAlbumName(md)))
if !conf.Server.Scanner.GroupAlbumReleases {
if len(releaseDate) != 0 {
albumPath = fmt.Sprintf("%s\\%s", albumPath, releaseDate)
}
}
return fmt.Sprintf("%x", md5.Sum([]byte(albumPath)))
}
func (s mediaFileMapper) artistID(md metadata.Tags) string {
return fmt.Sprintf("%x", md5.Sum([]byte(strings.ToLower(s.mapArtistName(md)))))
}
func (s mediaFileMapper) albumArtistID(md metadata.Tags) string {
return fmt.Sprintf("%x", md5.Sum([]byte(strings.ToLower(s.mapAlbumArtistName(md)))))
}
func (s mediaFileMapper) mapGenres(genres []string) (string, model.Genres) {
var result model.Genres
unique := map[string]struct{}{}
var all []string
for i := range genres {
gs := strings.FieldsFunc(genres[i], func(r rune) bool {
return strings.ContainsRune(conf.Server.Scanner.GenreSeparators, r)
})
for j := range gs {
g := strings.TrimSpace(gs[j])
key := strings.ToLower(g)
if _, ok := unique[key]; ok {
continue
}
all = append(all, g)
unique[key] = struct{}{}
}
}
for _, g := range all {
result = append(result, model.Genre{Name: g})
}
if len(result) == 0 {
return "", nil
}
return result[0].Name, result
}
func (s mediaFileMapper) mapDates(md metadata.Tags) (year int, date string,
originalYear int, originalDate string,
releaseYear int, releaseDate string) {
// Start with defaults
year, date = md.Date()
originalYear, originalDate = md.OriginalDate()
releaseYear, releaseDate = md.ReleaseDate()
// MusicBrainz Picard writes the Release Date of an album to the Date tag, and leaves the Release Date tag empty
taggedLikePicard := (originalYear != 0) &&
(releaseYear == 0) &&
(year >= originalYear)
if taggedLikePicard {
return originalYear, originalDate, originalYear, originalDate, year, date
}
// when there's no Date, first fall back to Original Date, then to Release Date.
if year == 0 {
if originalYear > 0 {
year, date = originalYear, originalDate
} else {
year, date = releaseYear, releaseDate
}
}
return year, date, originalYear, originalDate, releaseYear, releaseDate
}

View File

@ -2,11 +2,19 @@ package scanner2
import (
"context"
"path/filepath"
"github.com/google/go-pipeline/pkg/pipeline"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
"github.com/navidrome/navidrome/scanner/metadata"
"github.com/navidrome/navidrome/utils/slice"
"golang.org/x/exp/maps"
)
const (
// filesBatchSize used for batching file metadata extraction
filesBatchSize = 100
)
func (s *scanner2) processFolder(ctx context.Context) pipeline.StageFn[*folderEntry] {
@ -14,17 +22,18 @@ func (s *scanner2) processFolder(ctx context.Context) pipeline.StageFn[*folderEn
// Load children mediafiles from DB
mfs, err := entry.scanCtx.ds.MediaFile(ctx).GetByFolder(entry.id)
if err != nil {
log.Warn(ctx, "Scanner: Error loading mediafiles from DB. Skipping", "folder", entry.path, err)
return entry, nil
log.Error(ctx, "Scanner: Error loading mediafiles from DB", "folder", entry.path, err)
return entry, err
}
dbTracks := slice.ToMap(mfs, func(mf model.MediaFile) (string, model.MediaFile) { return mf.Path, mf })
// Get list of files to import, leave dbTracks with tracks to be removed
var filesToImport []string
for afPath, af := range entry.audioFiles {
fullPath := filepath.Join(entry.path, afPath)
dbTrack, foundInDB := dbTracks[afPath]
if !foundInDB || entry.scanCtx.fullRescan {
filesToImport = append(filesToImport, afPath)
filesToImport = append(filesToImport, fullPath)
} else {
info, err := af.Info()
if err != nil {
@ -32,18 +41,50 @@ func (s *scanner2) processFolder(ctx context.Context) pipeline.StageFn[*folderEn
return nil, err
}
if info.ModTime().After(dbTrack.UpdatedAt) {
filesToImport = append(filesToImport, afPath)
filesToImport = append(filesToImport, fullPath)
}
}
delete(dbTracks, afPath)
}
//tracksToRemove := dbTracks // Just to name things properly
// Load tags from files to import
// Add new/updated files to DB
// Remove deleted mediafiles from DB
// Update folder info in DB
// Remaining dbTracks are tracks that were not found in the folder, so they should be marked as missing
entry.missingTracks = maps.Values(dbTracks)
entry.tracks, err = loadTagsFromFiles(ctx, entry, filesToImport)
if err != nil {
log.Warn(ctx, "Scanner: Error loading tags from files. Skipping", "folder", entry.path, err)
return entry, nil
}
entry.albums = loadAlbumsFromTags(ctx, entry)
entry.artists = loadArtistsFromTags(ctx, entry)
return entry, nil
}
}
func loadTagsFromFiles(ctx context.Context, entry *folderEntry, toImport []string) (model.MediaFiles, error) {
tracks := model.MediaFiles{}
mapper := newMediaFileMapper(entry)
err := slice.RangeByChunks(toImport, filesBatchSize, func(chunk []string) error {
allTags, err := metadata.Extract(toImport...)
if err != nil {
log.Warn(ctx, "Scanner: Error extracting tags from files. Skipping", "folder", entry.path, err)
return err
}
for _, tags := range allTags {
track := mapper.toMediaFile(tags)
tracks = append(tracks, track)
}
return nil
})
return tracks, err
}
func loadAlbumsFromTags(ctx context.Context, entry *folderEntry) model.Albums {
return nil // TODO
}
func loadArtistsFromTags(ctx context.Context, entry *folderEntry) model.Artists {
return nil // TODO
}

View File

@ -45,6 +45,24 @@ var _ = Describe("Slice Utils", func() {
})
})
Describe("ToMap", func() {
It("returns empty map for an empty input", func() {
transformFunc := func(v int) (int, string) { return v, strconv.Itoa(v) }
result := slice.ToMap([]int{}, transformFunc)
Expect(result).To(BeEmpty())
})
It("returns a map with the result of the transform function", func() {
transformFunc := func(v int) (int, string) { return v * 2, strconv.Itoa(v * 2) }
result := slice.ToMap([]int{1, 2, 3, 4}, transformFunc)
Expect(result).To(HaveLen(4))
Expect(result).To(HaveKeyWithValue(2, "2"))
Expect(result).To(HaveKeyWithValue(4, "4"))
Expect(result).To(HaveKeyWithValue(6, "6"))
Expect(result).To(HaveKeyWithValue(8, "8"))
})
})
Describe("MostFrequent", func() {
It("returns zero value if no arguments are passed", func() {
Expect(slice.MostFrequent([]int{})).To(BeZero())