New TagLib extractor (wip)

This commit is contained in:
Deluan 2024-05-15 20:32:49 -04:00
parent 40378348fc
commit 8f5b8085d3
12 changed files with 82 additions and 151 deletions

View File

@ -5,57 +5,69 @@ import (
"os"
"strconv"
"strings"
"time"
"github.com/navidrome/navidrome/log"
"github.com/navidrome/navidrome/model/tag"
"github.com/navidrome/navidrome/scanner/metadata"
)
const ExtractorID = "taglib"
type extractor struct{}
type Extractor struct{}
func (e *Extractor) Parse(paths ...string) (map[string]metadata.ParsedTags, error) {
fileTags := map[string]metadata.ParsedTags{}
for _, path := range paths {
tags, err := e.extractMetadata(path)
if !errors.Is(err, os.ErrPermission) {
fileTags[path] = tags
func (e extractor) Parse(files ...string) (map[string]tag.Properties, error) {
results := make(map[string]tag.Properties)
for _, path := range files {
props, err := e.extractMetadata(path)
if errors.Is(err, os.ErrPermission) {
continue
}
results[path] = *props
}
return fileTags, nil
return results, nil
}
func (e *Extractor) CustomMappings() metadata.ParsedTags {
return metadata.ParsedTags{
"title": {"titlesort"},
"album": {"albumsort"},
"artist": {"artistsort"},
"tracknumber": {"trck", "_track"},
}
}
func (e *Extractor) Version() string {
func (e extractor) Version() string {
return Version()
}
func (e *Extractor) extractMetadata(filePath string) (metadata.ParsedTags, error) {
func (e *extractor) extractMetadata(filePath string) (*tag.Properties, error) {
tags, err := Read(filePath)
if err != nil {
log.Warn("TagLib: Error reading metadata from file. Skipping", "filePath", filePath, err)
log.Warn("extractor: Error reading metadata from file. Skipping", "filePath", filePath, err)
return nil, err
}
// Parse audio properties
ap := tag.AudioProperties{}
if length, ok := tags["lengthinmilliseconds"]; ok && len(length) > 0 {
millis, _ := strconv.Atoi(length[0])
if duration := float64(millis) / 1000.0; duration > 0 {
tags["duration"] = []string{strconv.FormatFloat(duration, 'f', 2, 32)}
if millis > 0 {
ap.Duration = time.Millisecond * time.Duration(millis)
}
delete(tags, "lengthinmilliseconds")
}
if bitrate, ok := tags["bitrate"]; ok && len(bitrate) > 0 {
ap.BitRate, _ = strconv.Atoi(bitrate[0])
delete(tags, "bitrate")
}
if channels, ok := tags["channels"]; ok && len(channels) > 0 {
ap.Channels, _ = strconv.Atoi(channels[0])
delete(tags, "channels")
}
if samplerate, ok := tags["samplerate"]; ok && len(samplerate) > 0 {
ap.SampleRate, _ = strconv.Atoi(samplerate[0])
delete(tags, "samplerate")
}
// Adjust some ID3 tags
parseTIPL(tags)
delete(tags, "tmcl") // TMCL is already parsed by TagLib
delete(tags, "tmcl") // TMCL is already parsed by extractor
return tags, nil
return &tag.Properties{
Tags: tags,
AudioProperties: ap,
HasPicture: tags["has_picture"] != nil && len(tags["has_picture"]) > 0 && tags["has_picture"][0] == "true",
}, nil
}
// These are the only roles we support, based on Picard's tag map:
@ -68,7 +80,7 @@ var tiplMapping = map[string]string{
"dj-mix": "djmixer",
}
// parseTIPL parses the ID3v2.4 TIPL frame string, which is received from TagLib in the format
// parseTIPL parses the ID3v2.4 TIPL frame string, which is received from extractor in the format
//
// "arranger Andrew Powell engineer Chris Blair engineer Pat Stapley producer Eric Woolfson".
//
@ -103,6 +115,8 @@ func parseTIPL(tags metadata.ParsedTags) {
delete(tags, "tipl")
}
var _ tag.Extractor = (*extractor)(nil)
func init() {
metadata.RegisterExtractor(ExtractorID, &Extractor{})
tag.RegisterExtractor("taglib", &extractor{})
}

View File

@ -13,5 +13,5 @@ func TestTagLib(t *testing.T) {
tests.Init(t, true)
log.SetLevel(log.LevelFatal)
RegisterFailHandler(Fail)
RunSpecs(t, "TagLib Suite")
RunSpecs(t, "extractor Suite")
}

View File

@ -5,16 +5,17 @@ import (
"os"
"github.com/navidrome/navidrome/scanner/metadata"
"github.com/navidrome/navidrome/scanner/metadata/taglib"
"github.com/navidrome/navidrome/utils"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)
var _ = Describe("Extractor", func() {
var e *Extractor
var e *taglib.Extractor
BeforeEach(func() {
e = &Extractor{}
e = &taglib.Extractor{}
})
Describe("Parse", func() {
@ -240,7 +241,7 @@ var _ = Describe("Extractor", func() {
Context("when the TIPL string is populated", func() {
It("correctly parses roles and names", func() {
tags["tipl"] = []string{"arranger Andrew Powell dj-mix François Kevorkian engineer Chris Blair"}
parseTIPL(tags)
taglib.parseTIPL(tags)
Expect(tags["arranger"]).To(ConsistOf("Andrew Powell"))
Expect(tags["engineer"]).To(ConsistOf("Chris Blair"))
Expect(tags["djmixer"]).To(ConsistOf("François Kevorkian"))
@ -248,14 +249,14 @@ var _ = Describe("Extractor", func() {
It("handles multiple names for a single role", func() {
tags["tipl"] = []string{"engineer Pat Stapley producer Eric Woolfson engineer Chris Blair"}
parseTIPL(tags)
taglib.parseTIPL(tags)
Expect(tags["producer"]).To(ConsistOf("Eric Woolfson"))
Expect(tags["engineer"]).To(ConsistOf("Pat Stapley", "Chris Blair"))
})
It("discards roles without names", func() {
tags["tipl"] = []string{"engineer Pat Stapley producer engineer Chris Blair"}
parseTIPL(tags)
taglib.parseTIPL(tags)
Expect(tags).ToNot(HaveKey("producer"))
Expect(tags["engineer"]).To(ConsistOf("Pat Stapley", "Chris Blair"))
})
@ -264,14 +265,14 @@ var _ = Describe("Extractor", func() {
Context("when the TIPL string is empty", func() {
It("does nothing", func() {
tags["tipl"] = []string{""}
parseTIPL(tags)
taglib.parseTIPL(tags)
Expect(tags).To(BeEmpty())
})
})
Context("when the TIPL is not present", func() {
It("does nothing", func() {
parseTIPL(tags)
taglib.parseTIPL(tags)
Expect(tags).To(BeEmpty())
})
})

View File

@ -50,119 +50,13 @@ int taglib_read(const FILENAME_CHAR_T *filename, unsigned long id) {
// Create a map to collect all the tags
TagLib::PropertyMap tags = f.file()->properties();
// Make sure at least the basic properties are extracted
TagLib::Tag *basic = f.file()->tag();
if (!basic->isEmpty()) {
if (!basic->title().isEmpty()) {
tags.insert("title", basic->title());
}
if (!basic->artist().isEmpty()) {
tags.insert("artist", basic->artist());
}
if (!basic->album().isEmpty()) {
tags.insert("album", basic->album());
}
if (basic->year() > 0) {
tags.insert("date", TagLib::String::number(basic->year()));
}
if (basic->track() > 0) {
tags.insert("_track", TagLib::String::number(basic->track()));
}
}
TagLib::ID3v2::Tag *id3Tags = NULL;
// Get some extended/non-standard ID3-only tags (ex: iTunes extended frames)
TagLib::MPEG::File *mp3File(dynamic_cast<TagLib::MPEG::File *>(f.file()));
if (mp3File != NULL) {
id3Tags = mp3File->ID3v2Tag();
}
if (id3Tags == NULL) {
TagLib::RIFF::WAV::File *wavFile(dynamic_cast<TagLib::RIFF::WAV::File *>(f.file()));
if (wavFile != NULL && wavFile->hasID3v2Tag()) {
id3Tags = wavFile->ID3v2Tag();
}
}
if (id3Tags == NULL) {
TagLib::RIFF::AIFF::File *aiffFile(dynamic_cast<TagLib::RIFF::AIFF::File *>(f.file()));
if (aiffFile && aiffFile->hasID3v2Tag()) {
id3Tags = aiffFile->tag();
}
}
// Yes, it is possible to have ID3v2 tags in FLAC. However, that can cause problems
// with many players, so they will not be parsed
if (id3Tags != NULL) {
const auto &frames = id3Tags->frameListMap();
for (const auto &kv: frames) {
if (kv.first == "USLT") {
for (const auto &tag: kv.second) {
TagLib::ID3v2::UnsynchronizedLyricsFrame *frame = dynamic_cast<TagLib::ID3v2::UnsynchronizedLyricsFrame *>(tag);
if (frame == NULL) continue;
tags.erase("LYRICS");
const auto bv = frame->language();
char language[4] = {'x', 'x', 'x', '\0'};
if (bv.size() == 3) {
strncpy(language, bv.data(), 3);
}
char *val = (char *)frame->text().toCString(true);
go_map_put_lyrics(id, language, val);
}
} else if (kv.first == "SYLT") {
for (const auto &tag: kv.second) {
TagLib::ID3v2::SynchronizedLyricsFrame *frame = dynamic_cast<TagLib::ID3v2::SynchronizedLyricsFrame *>(tag);
if (frame == NULL) continue;
const auto bv = frame->language();
char language[4] = {'x', 'x', 'x', '\0'};
if (bv.size() == 3) {
strncpy(language, bv.data(), 3);
}
const auto format = frame->timestampFormat();
if (format == TagLib::ID3v2::SynchronizedLyricsFrame::AbsoluteMilliseconds) {
for (const auto &line: frame->synchedText()) {
char *text = (char *)line.text.toCString(true);
go_map_put_lyric_line(id, language, text, line.time);
}
} else if (format == TagLib::ID3v2::SynchronizedLyricsFrame::AbsoluteMpegFrames) {
const int sampleRate = props->sampleRate();
if (sampleRate != 0) {
for (const auto &line: frame->synchedText()) {
const int timeInMs = (line.time * 1000) / sampleRate;
char *text = (char *)line.text.toCString(true);
go_map_put_lyric_line(id, language, text, timeInMs);
}
}
}
}
} else {
if (!kv.second.isEmpty()) {
tags.insert(kv.first, kv.second.front()->toString());
}
}
}
}
// M4A may have some iTunes specific tags
TagLib::MP4::File *m4afile(dynamic_cast<TagLib::MP4::File *>(f.file()));
if (m4afile != NULL) {
const auto itemListMap = m4afile->tag()->itemMap();
for (const auto item: itemListMap) {
char *key = (char *)item.first.toCString(true);
for (const auto value: item.second.toStringList()) {
char *val = (char *)value.toCString(true);
go_map_put_m4a_str(id, key, val);
tags.insert(item.first, value);
}
}
}
@ -177,6 +71,26 @@ int taglib_read(const FILENAME_CHAR_T *filename, unsigned long id) {
}
}
// Make sure at least the basic properties are extracted
TagLib::Tag *basic = f.file()->tag();
if (!basic->isEmpty()) {
if (!basic->title().isEmpty()) {
tags.insert("_title", basic->title());
}
if (!basic->artist().isEmpty()) {
tags.insert("_artist", basic->artist());
}
if (!basic->album().isEmpty()) {
tags.insert("_album", basic->album());
}
if (basic->year() > 0) {
tags.insert("_date", TagLib::String::number(basic->year()));
}
if (basic->track() > 0) {
tags.insert("_track", TagLib::String::number(basic->track()));
}
}
// Send all collected tags to the Go map
for (TagLib::PropertyMap::ConstIterator i = tags.begin(); i != tags.end();
++i) {

View File

@ -34,8 +34,8 @@ func Read(filename string) (tags map[string][]string, err error) {
debug.SetPanicOnFault(true)
defer func() {
if r := recover(); r != nil {
log.Error("TagLib: recovered from panic when reading tags", "file", filename, "error", r)
err = fmt.Errorf("TagLib: recovered from panic: %s", r)
log.Error("extractor: recovered from panic when reading tags", "file", filename, "error", r)
err = fmt.Errorf("extractor: recovered from panic: %s", r)
}
}()
@ -44,7 +44,7 @@ func Read(filename string) (tags map[string][]string, err error) {
id, m := newMap()
defer deleteMap(id)
log.Trace("TagLib: reading tags", "filename", filename, "map_id", id)
log.Trace("extractor: reading tags", "filename", filename, "map_id", id)
res := C.taglib_read(fp, C.ulong(id))
switch res {
case C.TAGLIB_ERR_PARSE:
@ -64,9 +64,9 @@ func Read(filename string) (tags map[string][]string, err error) {
}
if log.IsGreaterOrEqualTo(log.LevelDebug) {
j, _ := json.Marshal(m)
log.Trace("TagLib: read tags", "tags", string(j), "filename", filename, "id", id)
log.Trace("extractor: read tags", "tags", string(j), "filename", filename, "id", id)
} else {
log.Trace("TagLib: read tags", "tags", m, "filename", filename, "id", id)
log.Trace("extractor: read tags", "tags", m, "filename", filename, "id", id)
}
return m, nil
@ -126,7 +126,7 @@ func do_put_map(id C.ulong, key string, val *C.char) {
}
/*
As I'm working on the new scanner, I see that the `properties` from TagLib is ill-suited to extract multi-valued ID3 frames. I'll have to change the way we do it for ID3, probably by sending the raw frames to Go and mapping there, instead of relying on the auto-mapped `properties`. I think this would reduce our reliance on C++, while also giving us more flexibility, including parsing the USLT / SYLT frames in Go
As I'm working on the new scanner, I see that the `properties` from extractor is ill-suited to extract multi-valued ID3 frames. I'll have to change the way we do it for ID3, probably by sending the raw frames to Go and mapping there, instead of relying on the auto-mapped `properties`. I think this would reduce our reliance on C++, while also giving us more flexibility, including parsing the USLT / SYLT frames in Go
*/
//export go_map_put_int

View File

@ -3,6 +3,7 @@ package main
import (
_ "net/http/pprof" //nolint:gosec
_ "github.com/navidrome/navidrome/adapters/taglib"
"github.com/navidrome/navidrome/cmd"
)

View File

@ -1,6 +1,7 @@
package tag
import (
"fmt"
"sync"
)
@ -23,6 +24,7 @@ var (
func RegisterExtractor(id string, parser Extractor) {
lock.Lock()
defer lock.Unlock()
fmt.Println("!!!! Registering extractor", id, "version", parser.Version())
extractors[id] = parser
}

View File

@ -25,6 +25,7 @@ genre: [ TCON, GENRE, ©gen, WM/Genre, IGNR ]
compilation: [ TCMP, COMPILATION, cpil, WM/IsCompilation ]
track: [ TRACK, TRCK, TRACKNUMBER, trkn, WM/TrackNumber, ITRK ]
tracktotal: [ TRACKTOTAL, TOTALTRACKS ]
subtitle: [ TIT3, SUBTITLE, ----:com.apple.iTunes:SUBTITLE, WM/SubTitle ]
disc: [ TPOS, DISC, DISCNUMBER, disk, WM/PartOfSet ]
disctotal: [ DISCTOTAL, TOTALDISCS ]
discsubtitle: [ TSST, DISCSUBTITLE, ----:com.apple.iTunes:DISCSUBTITLE, WM/SetSubTitle ]
@ -68,6 +69,5 @@ releasecountry: [ TXXX:MusicBrainz Album Release Country, RELEASECOUNTRY, ----:c
releasestatus: [ TXXX:MusicBrainz Album Status, RELEASESTATUS, MUSICBRAINZ_ALBUMSTATUS, ----:com.apple.iTunes:MusicBrainz Album Status, MusicBrainz/Album Status ]
releasetype: [ TXXX:MusicBrainz Album Type, RELEASETYPE, MUSICBRAINZ_ALBUMTYPE, ----:com.apple.iTunes:MusicBrainz Album Type, MusicBrainz/Album Type ]
script: [ TXXX:SCRIPT, SCRIPT, ----:com.apple.iTunes:SCRIPT, WM/Script ]
subtitle: [ TIT3, SUBTITLE, ----:com.apple.iTunes:SUBTITLE, WM/SubTitle ]
website: [ WOAR, WEBSITE, Weblink, WM/AuthorURL ]
work: [ TXXX:WORK, TIT1, WORK, ©wrk, WM/Work ]

View File

@ -18,7 +18,6 @@ import (
"github.com/navidrome/navidrome/model/request"
"github.com/navidrome/navidrome/scanner/metadata"
_ "github.com/navidrome/navidrome/scanner/metadata/ffmpeg"
_ "github.com/navidrome/navidrome/scanner/metadata/taglib"
"github.com/navidrome/navidrome/utils/pl"
"github.com/navidrome/navidrome/utils/slice"
"golang.org/x/sync/errgroup"