navidrome/scanner/tag_scanner.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

431 lines
13 KiB
Go
Raw Normal View History

2020-01-16 22:53:48 +01:00
package scanner
import (
"context"
"io/fs"
2020-01-16 22:53:48 +01:00
"os"
"path/filepath"
"sort"
2020-01-16 22:53:48 +01:00
"strings"
"time"
2020-08-03 05:17:13 +02:00
"github.com/navidrome/navidrome/conf"
"github.com/navidrome/navidrome/core"
"github.com/navidrome/navidrome/core/artwork"
"github.com/navidrome/navidrome/core/auth"
2020-01-24 01:44:08 +01:00
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model"
2020-07-27 17:12:39 +02:00
"github.com/navidrome/navidrome/model/request"
"github.com/navidrome/navidrome/scanner/metadata"
_ "github.com/navidrome/navidrome/scanner/metadata/ffmpeg"
_ "github.com/navidrome/navidrome/scanner/metadata/taglib"
"github.com/navidrome/navidrome/utils"
2020-01-16 22:53:48 +01:00
)
type TagScanner struct {
2022-12-23 18:28:22 +01:00
rootFolder string
ds model.DataStore
plsSync *playlistImporter
cnt *counters
mapper *mediaFileMapper
cacheWarmer artwork.CacheWarmer
2020-01-16 22:53:48 +01:00
}
func NewTagScanner(rootFolder string, ds model.DataStore, playlists core.Playlists, cacheWarmer artwork.CacheWarmer) FolderScanner {
2022-12-23 18:28:22 +01:00
s := &TagScanner{
rootFolder: rootFolder,
2022-12-28 19:37:13 +01:00
plsSync: newPlaylistImporter(ds, playlists, cacheWarmer, rootFolder),
2022-12-23 18:28:22 +01:00
ds: ds,
cacheWarmer: cacheWarmer,
2020-01-16 22:53:48 +01:00
}
2022-12-23 18:28:22 +01:00
return s
2020-01-16 22:53:48 +01:00
}
type dirMap map[string]dirStats
type counters struct {
added int64
updated int64
deleted int64
playlists int64
}
func (cnt *counters) total() int64 { return cnt.added + cnt.updated + cnt.deleted }
2020-06-11 23:36:09 +02:00
const (
2020-07-27 17:12:39 +02:00
// filesBatchSize used for batching file metadata extraction
2020-06-11 23:36:09 +02:00
filesBatchSize = 100
)
// Scan algorithm overview:
2020-07-27 17:12:39 +02:00
// Load all directories from the DB
2020-10-29 16:31:45 +01:00
// Traverse the music folder, collecting each subfolder's ModTime (self or any non-dir children, whichever is newer)
2020-07-27 17:12:39 +02:00
// For each changed folder: get all files from DB whose path starts with the changed folder (non-recursively), check each file:
2022-09-27 03:28:10 +02:00
// - if file in folder is newer, update the one in DB
// - if file in folder does not exists in DB, add it
// - for each file in the DB that is not found in the folder, delete it from DB
2020-10-29 16:31:45 +01:00
// Compare directories in the fs with the ones in the DB to find deleted folders
// For each deleted folder: delete all files from DB whose path starts with the delete folder path (non-recursively)
// Create new albums/artists, update counters:
2022-09-27 03:28:10 +02:00
// - collect all albumIDs and artistIDs from previous steps
// - refresh the collected albums and artists with the metadata from the mediafiles
2020-07-27 17:12:39 +02:00
// For each changed folder, process playlists:
2022-09-27 03:28:10 +02:00
// - If the playlist is not in the DB, import it, setting sync = true
// - If the playlist is in the DB and sync == true, import it, or else skip it
2020-07-27 17:12:39 +02:00
// Delete all empty albums, delete all empty artists, clean-up playlists
func (s *TagScanner) Scan(ctx context.Context, lastModifiedSince time.Time, progress chan uint32) (int64, error) {
ctx = auth.WithAdminUser(ctx, s.ds)
start := time.Now()
2020-07-12 19:30:03 +02:00
// Special case: if lastModifiedSince is zero, re-import all files
fullScan := lastModifiedSince.IsZero()
// If the media folder is empty (no music and no subfolders), abort to avoid deleting all data from DB
empty, err := isDirEmpty(ctx, s.rootFolder)
if err != nil {
return 0, err
}
if empty && !fullScan {
log.Error(ctx, "Media Folder is empty. Aborting scan.", "folder", s.rootFolder)
return 0, nil
}
2020-07-27 17:12:39 +02:00
allDBDirs, err := s.getDBDirTree(ctx)
2020-01-16 22:53:48 +01:00
if err != nil {
return 0, err
2020-01-16 22:53:48 +01:00
}
2020-10-29 16:31:45 +01:00
allFSDirs := dirMap{}
var changedDirs []string
s.cnt = &counters{}
2021-07-16 17:03:28 +02:00
genres := newCachedGenreRepository(ctx, s.ds.Genre(ctx))
s.mapper = newMediaFileMapper(s.rootFolder, genres)
2022-12-23 18:28:22 +01:00
refresher := newRefresher(s.ds, s.cacheWarmer, allFSDirs)
2020-07-27 17:12:39 +02:00
foldersFound, walkerError := s.getRootFolderWalker(ctx)
2020-10-29 16:31:45 +01:00
for {
folderStats, more := <-foldersFound
if !more {
break
}
2020-11-01 22:37:33 +01:00
progress <- folderStats.AudioFilesCount
2020-10-29 16:31:45 +01:00
allFSDirs[folderStats.Path] = folderStats
2020-01-16 22:53:48 +01:00
if s.folderHasChanged(folderStats, allDBDirs, lastModifiedSince) {
2020-10-29 16:31:45 +01:00
changedDirs = append(changedDirs, folderStats.Path)
log.Debug("Processing changed folder", "dir", folderStats.Path)
2022-12-23 18:28:22 +01:00
err := s.processChangedDir(ctx, refresher, fullScan, folderStats.Path)
2020-10-29 16:31:45 +01:00
if err != nil {
log.Error("Error updating folder in the DB", "dir", folderStats.Path, err)
2020-10-29 16:31:45 +01:00
}
}
}
2020-01-16 22:53:48 +01:00
if err := <-walkerError; err != nil {
log.Error("Scan was interrupted by error. See errors above", err)
return 0, err
}
2020-10-29 16:31:45 +01:00
deletedDirs := s.getDeletedDirs(ctx, allFSDirs, allDBDirs)
if len(deletedDirs)+len(changedDirs) == 0 {
log.Debug(ctx, "No changes found in Music Folder", "folder", s.rootFolder, "elapsed", time.Since(start))
return 0, nil
2020-10-29 16:31:45 +01:00
}
2020-01-16 22:53:48 +01:00
2020-07-27 17:12:39 +02:00
for _, dir := range deletedDirs {
2022-12-23 18:28:22 +01:00
err := s.processDeletedDir(ctx, refresher, dir)
2020-01-16 22:53:48 +01:00
if err != nil {
log.Error("Error removing deleted folder from DB", "dir", dir, err)
2020-01-16 22:53:48 +01:00
}
}
2020-10-29 16:31:45 +01:00
s.cnt.playlists = 0
2020-08-03 05:17:13 +02:00
if conf.Server.AutoImportPlaylists {
2020-10-29 16:31:45 +01:00
// Now that all mediafiles are imported/updated, search for and import/update playlists
2020-08-03 05:17:13 +02:00
u, _ := request.UserFrom(ctx)
for _, dir := range changedDirs {
info := allFSDirs[dir]
2020-10-29 16:31:45 +01:00
if info.HasPlaylist {
2020-08-03 05:17:13 +02:00
if !u.IsAdmin {
log.Warn("Playlists will not be imported, as there are no admin users yet, "+
"Please create an admin user first, and then update the playlists for them to be imported", "dir", dir)
} else {
2020-10-29 16:31:45 +01:00
s.cnt.playlists = s.plsSync.processPlaylists(ctx, dir)
2020-08-03 05:17:13 +02:00
}
2020-07-27 17:12:39 +02:00
}
}
2020-08-03 05:17:13 +02:00
} else {
log.Debug("Playlist auto-import is disabled")
2020-01-16 22:53:48 +01:00
}
err = s.ds.GC(log.NewContext(ctx), s.rootFolder)
2020-07-27 17:12:39 +02:00
log.Info("Finished processing Music Folder", "folder", s.rootFolder, "elapsed", time.Since(start),
2020-10-29 16:31:45 +01:00
"added", s.cnt.added, "updated", s.cnt.updated, "deleted", s.cnt.deleted, "playlistsImported", s.cnt.playlists)
2020-07-27 17:12:39 +02:00
return s.cnt.total(), err
2020-07-27 17:12:39 +02:00
}
func isDirEmpty(ctx context.Context, dir string) (bool, error) {
children, stats, err := loadDir(ctx, dir)
if err != nil {
return false, err
}
return len(children) == 0 && stats.AudioFilesCount == 0, nil
}
func (s *TagScanner) getRootFolderWalker(ctx context.Context) (walkResults, chan error) {
2020-07-27 17:12:39 +02:00
start := time.Now()
log.Trace(ctx, "Loading directory tree from music folder", "folder", s.rootFolder)
2020-10-29 16:31:45 +01:00
results := make(chan dirStats, 5000)
walkerError := make(chan error)
2020-10-29 16:31:45 +01:00
go func() {
err := walkDirTree(ctx, s.rootFolder, results)
if err != nil {
log.Error("There were errors reading directories from filesystem", err)
2020-10-29 16:31:45 +01:00
}
walkerError <- err
log.Debug("Finished reading directories from filesystem", "elapsed", time.Since(start))
2020-10-29 16:31:45 +01:00
}()
return results, walkerError
2020-07-27 17:12:39 +02:00
}
2020-01-16 22:53:48 +01:00
2020-07-27 17:12:39 +02:00
func (s *TagScanner) getDBDirTree(ctx context.Context) (map[string]struct{}, error) {
start := time.Now()
log.Trace(ctx, "Loading directory tree from database", "folder", s.rootFolder)
2020-07-27 17:12:39 +02:00
repo := s.ds.MediaFile(ctx)
dirs, err := repo.FindPathsRecursively(s.rootFolder)
if err != nil {
return nil, err
}
resp := map[string]struct{}{}
for _, d := range dirs {
resp[filepath.Clean(d)] = struct{}{}
}
2020-01-18 05:28:11 +01:00
2020-07-27 17:12:39 +02:00
log.Debug("Directory tree loaded from DB", "total", len(resp), "elapsed", time.Since(start))
return resp, nil
2020-01-16 22:53:48 +01:00
}
func (s *TagScanner) folderHasChanged(folder dirStats, dbDirs map[string]struct{}, lastModified time.Time) bool {
2020-10-29 16:31:45 +01:00
_, inDB := dbDirs[folder.Path]
// If is a new folder with at least one song OR it was modified after lastModified
return (!inDB && (folder.AudioFilesCount > 0)) || folder.ModTime.After(lastModified)
2020-07-27 17:12:39 +02:00
}
func (s *TagScanner) getDeletedDirs(ctx context.Context, fsDirs dirMap, dbDirs map[string]struct{}) []string {
start := time.Now()
log.Trace(ctx, "Checking for deleted folders")
var deleted []string
for d := range dbDirs {
if _, ok := fsDirs[d]; !ok {
deleted = append(deleted, d)
}
2020-01-16 22:53:48 +01:00
}
2020-07-27 17:12:39 +02:00
sort.Strings(deleted)
log.Debug(ctx, "Finished deleted folders check", "total", len(deleted), "elapsed", time.Since(start))
return deleted
2020-01-16 22:53:48 +01:00
}
2022-12-23 18:28:22 +01:00
func (s *TagScanner) processDeletedDir(ctx context.Context, refresher *refresher, dir string) error {
2020-07-27 17:12:39 +02:00
start := time.Now()
mfs, err := s.ds.MediaFile(ctx).FindAllByPath(dir)
if err != nil {
return err
}
2020-07-27 17:12:39 +02:00
c, err := s.ds.MediaFile(ctx).DeleteByPath(dir)
if err != nil {
return err
2020-01-16 22:53:48 +01:00
}
2020-07-27 17:12:39 +02:00
s.cnt.deleted += c
for _, t := range mfs {
2022-12-23 18:28:22 +01:00
refresher.accumulate(t)
2020-07-27 17:12:39 +02:00
}
2022-12-23 18:28:22 +01:00
err = refresher.flush(ctx)
log.Info(ctx, "Finished processing deleted folder", "dir", dir, "purged", len(mfs), "elapsed", time.Since(start))
2020-07-27 17:12:39 +02:00
return err
2020-01-16 22:53:48 +01:00
}
2022-12-23 18:28:22 +01:00
func (s *TagScanner) processChangedDir(ctx context.Context, refresher *refresher, fullScan bool, dir string) error {
2020-01-16 22:53:48 +01:00
start := time.Now()
// Load folder's current tracks from DB into a map
currentTracks := map[string]model.MediaFile{}
ct, err := s.ds.MediaFile(ctx).FindAllByPath(dir)
2020-01-16 22:53:48 +01:00
if err != nil {
return err
}
for _, t := range ct {
currentTracks[t.Path] = t
2020-01-16 22:53:48 +01:00
}
// Load track list from the folder
2020-07-27 17:12:39 +02:00
files, err := loadAllAudioFiles(dir)
2020-01-16 22:53:48 +01:00
if err != nil {
return err
}
// If no files to process, return
if len(files)+len(currentTracks) == 0 {
return nil
}
2020-07-27 17:12:39 +02:00
orphanTracks := map[string]model.MediaFile{}
for k, v := range currentTracks {
orphanTracks[k] = v
}
// If track from folder is newer than the one in DB, select for update/insert in DB
log.Trace(ctx, "Processing changed folder", "dir", dir, "tracksInDB", len(currentTracks), "tracksInFolder", len(files))
var filesToUpdate []string
for filePath, entry := range files {
c, inDB := currentTracks[filePath]
if !inDB || fullScan {
2020-07-12 18:35:23 +02:00
filesToUpdate = append(filesToUpdate, filePath)
2020-07-27 17:12:39 +02:00
s.cnt.added++
} else {
info, err := entry.Info()
if err != nil {
log.Error("Could not stat file", "filePath", filePath, err)
continue
}
if info.ModTime().After(c.UpdatedAt) {
filesToUpdate = append(filesToUpdate, filePath)
s.cnt.updated++
}
}
2020-07-27 17:12:39 +02:00
// Force a refresh of the album and artist, to cater for cover art files
2022-12-23 18:28:22 +01:00
refresher.accumulate(c)
2020-07-27 17:12:39 +02:00
// Only leaves in orphanTracks the ones not found in the folder. After this loop any remaining orphanTracks
// are considered gone from the music folder and will be deleted from DB
delete(orphanTracks, filePath)
}
2020-01-16 22:53:48 +01:00
numUpdatedTracks := 0
numPurgedTracks := 0
if len(filesToUpdate) > 0 {
2022-12-23 18:28:22 +01:00
numUpdatedTracks, err = s.addOrUpdateTracksInDB(ctx, refresher, dir, currentTracks, filesToUpdate)
if err != nil {
return err
}
}
2020-07-27 17:12:39 +02:00
if len(orphanTracks) > 0 {
2022-12-23 18:28:22 +01:00
numPurgedTracks, err = s.deleteOrphanSongs(ctx, refresher, dir, orphanTracks)
if err != nil {
return err
}
}
2020-07-27 17:12:39 +02:00
2022-12-23 18:28:22 +01:00
err = refresher.flush(ctx)
2020-07-27 17:12:39 +02:00
log.Info(ctx, "Finished processing changed folder", "dir", dir, "updated", numUpdatedTracks,
"deleted", numPurgedTracks, "elapsed", time.Since(start))
2020-07-27 17:12:39 +02:00
return err
}
2022-12-23 18:28:22 +01:00
func (s *TagScanner) deleteOrphanSongs(
ctx context.Context,
refresher *refresher,
dir string,
tracksToDelete map[string]model.MediaFile,
) (int, error) {
2020-07-27 17:12:39 +02:00
numPurgedTracks := 0
2020-01-16 22:53:48 +01:00
2020-07-27 17:12:39 +02:00
log.Debug(ctx, "Deleting orphan tracks from DB", "dir", dir, "numTracks", len(tracksToDelete))
// Remaining tracks from DB that are not in the folder are deleted
for _, ct := range tracksToDelete {
numPurgedTracks++
2022-12-23 18:28:22 +01:00
refresher.accumulate(ct)
2020-07-27 17:12:39 +02:00
if err := s.ds.MediaFile(ctx).Delete(ct.ID); err != nil {
return 0, err
}
2020-07-27 17:12:39 +02:00
s.cnt.deleted++
2020-01-16 22:53:48 +01:00
}
2020-07-27 17:12:39 +02:00
return numPurgedTracks, nil
2020-01-16 22:53:48 +01:00
}
2022-12-23 18:28:22 +01:00
func (s *TagScanner) addOrUpdateTracksInDB(
ctx context.Context,
refresher *refresher,
dir string,
currentTracks map[string]model.MediaFile,
filesToUpdate []string,
) (int, error) {
2020-07-27 17:12:39 +02:00
numUpdatedTracks := 0
log.Trace(ctx, "Updating mediaFiles in DB", "dir", dir, "numFiles", len(filesToUpdate))
// Break the file list in chunks to avoid calling ffmpeg with too many parameters
chunks := utils.BreakUpStringSlice(filesToUpdate, filesBatchSize)
for _, chunk := range chunks {
// Load tracks Metadata from the folder
newTracks, err := s.loadTracks(chunk)
if err != nil {
2020-07-27 17:12:39 +02:00
return 0, err
}
2020-07-27 17:12:39 +02:00
// If track from folder is newer than the one in DB, update/insert in DB
log.Trace(ctx, "Updating mediaFiles in DB", "dir", dir, "files", chunk, "numFiles", len(chunk))
for i := range newTracks {
n := newTracks[i]
// Keep current annotations if the track is in the DB
if t, ok := currentTracks[n.Path]; ok {
n.Annotations = t.Annotations
}
2020-07-27 17:12:39 +02:00
err := s.ds.MediaFile(ctx).Put(&n)
if err != nil {
return 0, err
}
2022-12-23 18:28:22 +01:00
refresher.accumulate(n)
2020-07-27 17:12:39 +02:00
numUpdatedTracks++
}
}
2020-07-27 17:12:39 +02:00
return numUpdatedTracks, nil
}
2020-01-16 22:53:48 +01:00
func (s *TagScanner) loadTracks(filePaths []string) (model.MediaFiles, error) {
mds, err := metadata.Extract(filePaths...)
if err != nil {
return nil, err
}
var mfs model.MediaFiles
for _, md := range mds {
mf := s.mapper.toMediaFile(md)
mfs = append(mfs, mf)
2020-01-16 22:53:48 +01:00
}
return mfs, nil
2020-01-16 22:53:48 +01:00
}
func loadAllAudioFiles(dirPath string) (map[string]fs.DirEntry, error) {
files, err := fs.ReadDir(os.DirFS(dirPath), ".")
if err != nil {
return nil, err
}
fileInfos := make(map[string]fs.DirEntry)
for _, f := range files {
if f.IsDir() {
continue
}
if strings.HasPrefix(f.Name(), ".") {
continue
}
filePath := filepath.Join(dirPath, f.Name())
2022-12-23 17:32:39 +01:00
if !model.IsAudioFile(filePath) {
continue
}
fileInfos[filePath] = f
}
return fileInfos, nil
}