New TagLib extractor (wip)
This commit is contained in:
parent
40378348fc
commit
8f5b8085d3
|
@ -5,57 +5,69 @@ import (
|
|||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/navidrome/navidrome/log"
|
||||
"github.com/navidrome/navidrome/model/tag"
|
||||
"github.com/navidrome/navidrome/scanner/metadata"
|
||||
)
|
||||
|
||||
const ExtractorID = "taglib"
|
||||
type extractor struct{}
|
||||
|
||||
type Extractor struct{}
|
||||
|
||||
func (e *Extractor) Parse(paths ...string) (map[string]metadata.ParsedTags, error) {
|
||||
fileTags := map[string]metadata.ParsedTags{}
|
||||
for _, path := range paths {
|
||||
tags, err := e.extractMetadata(path)
|
||||
if !errors.Is(err, os.ErrPermission) {
|
||||
fileTags[path] = tags
|
||||
func (e extractor) Parse(files ...string) (map[string]tag.Properties, error) {
|
||||
results := make(map[string]tag.Properties)
|
||||
for _, path := range files {
|
||||
props, err := e.extractMetadata(path)
|
||||
if errors.Is(err, os.ErrPermission) {
|
||||
continue
|
||||
}
|
||||
results[path] = *props
|
||||
}
|
||||
return fileTags, nil
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (e *Extractor) CustomMappings() metadata.ParsedTags {
|
||||
return metadata.ParsedTags{
|
||||
"title": {"titlesort"},
|
||||
"album": {"albumsort"},
|
||||
"artist": {"artistsort"},
|
||||
"tracknumber": {"trck", "_track"},
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Extractor) Version() string {
|
||||
func (e extractor) Version() string {
|
||||
return Version()
|
||||
}
|
||||
|
||||
func (e *Extractor) extractMetadata(filePath string) (metadata.ParsedTags, error) {
|
||||
func (e *extractor) extractMetadata(filePath string) (*tag.Properties, error) {
|
||||
tags, err := Read(filePath)
|
||||
if err != nil {
|
||||
log.Warn("TagLib: Error reading metadata from file. Skipping", "filePath", filePath, err)
|
||||
log.Warn("extractor: Error reading metadata from file. Skipping", "filePath", filePath, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Parse audio properties
|
||||
ap := tag.AudioProperties{}
|
||||
if length, ok := tags["lengthinmilliseconds"]; ok && len(length) > 0 {
|
||||
millis, _ := strconv.Atoi(length[0])
|
||||
if duration := float64(millis) / 1000.0; duration > 0 {
|
||||
tags["duration"] = []string{strconv.FormatFloat(duration, 'f', 2, 32)}
|
||||
if millis > 0 {
|
||||
ap.Duration = time.Millisecond * time.Duration(millis)
|
||||
}
|
||||
delete(tags, "lengthinmilliseconds")
|
||||
}
|
||||
if bitrate, ok := tags["bitrate"]; ok && len(bitrate) > 0 {
|
||||
ap.BitRate, _ = strconv.Atoi(bitrate[0])
|
||||
delete(tags, "bitrate")
|
||||
}
|
||||
if channels, ok := tags["channels"]; ok && len(channels) > 0 {
|
||||
ap.Channels, _ = strconv.Atoi(channels[0])
|
||||
delete(tags, "channels")
|
||||
}
|
||||
if samplerate, ok := tags["samplerate"]; ok && len(samplerate) > 0 {
|
||||
ap.SampleRate, _ = strconv.Atoi(samplerate[0])
|
||||
delete(tags, "samplerate")
|
||||
}
|
||||
|
||||
// Adjust some ID3 tags
|
||||
parseTIPL(tags)
|
||||
delete(tags, "tmcl") // TMCL is already parsed by TagLib
|
||||
delete(tags, "tmcl") // TMCL is already parsed by extractor
|
||||
|
||||
return tags, nil
|
||||
return &tag.Properties{
|
||||
Tags: tags,
|
||||
AudioProperties: ap,
|
||||
HasPicture: tags["has_picture"] != nil && len(tags["has_picture"]) > 0 && tags["has_picture"][0] == "true",
|
||||
}, nil
|
||||
}
|
||||
|
||||
// These are the only roles we support, based on Picard's tag map:
|
||||
|
@ -68,7 +80,7 @@ var tiplMapping = map[string]string{
|
|||
"dj-mix": "djmixer",
|
||||
}
|
||||
|
||||
// parseTIPL parses the ID3v2.4 TIPL frame string, which is received from TagLib in the format
|
||||
// parseTIPL parses the ID3v2.4 TIPL frame string, which is received from extractor in the format
|
||||
//
|
||||
// "arranger Andrew Powell engineer Chris Blair engineer Pat Stapley producer Eric Woolfson".
|
||||
//
|
||||
|
@ -103,6 +115,8 @@ func parseTIPL(tags metadata.ParsedTags) {
|
|||
delete(tags, "tipl")
|
||||
}
|
||||
|
||||
var _ tag.Extractor = (*extractor)(nil)
|
||||
|
||||
func init() {
|
||||
metadata.RegisterExtractor(ExtractorID, &Extractor{})
|
||||
tag.RegisterExtractor("taglib", &extractor{})
|
||||
}
|
|
@ -13,5 +13,5 @@ func TestTagLib(t *testing.T) {
|
|||
tests.Init(t, true)
|
||||
log.SetLevel(log.LevelFatal)
|
||||
RegisterFailHandler(Fail)
|
||||
RunSpecs(t, "TagLib Suite")
|
||||
RunSpecs(t, "extractor Suite")
|
||||
}
|
|
@ -5,16 +5,17 @@ import (
|
|||
"os"
|
||||
|
||||
"github.com/navidrome/navidrome/scanner/metadata"
|
||||
"github.com/navidrome/navidrome/scanner/metadata/taglib"
|
||||
"github.com/navidrome/navidrome/utils"
|
||||
. "github.com/onsi/ginkgo/v2"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("Extractor", func() {
|
||||
var e *Extractor
|
||||
var e *taglib.Extractor
|
||||
|
||||
BeforeEach(func() {
|
||||
e = &Extractor{}
|
||||
e = &taglib.Extractor{}
|
||||
})
|
||||
|
||||
Describe("Parse", func() {
|
||||
|
@ -240,7 +241,7 @@ var _ = Describe("Extractor", func() {
|
|||
Context("when the TIPL string is populated", func() {
|
||||
It("correctly parses roles and names", func() {
|
||||
tags["tipl"] = []string{"arranger Andrew Powell dj-mix François Kevorkian engineer Chris Blair"}
|
||||
parseTIPL(tags)
|
||||
taglib.parseTIPL(tags)
|
||||
Expect(tags["arranger"]).To(ConsistOf("Andrew Powell"))
|
||||
Expect(tags["engineer"]).To(ConsistOf("Chris Blair"))
|
||||
Expect(tags["djmixer"]).To(ConsistOf("François Kevorkian"))
|
||||
|
@ -248,14 +249,14 @@ var _ = Describe("Extractor", func() {
|
|||
|
||||
It("handles multiple names for a single role", func() {
|
||||
tags["tipl"] = []string{"engineer Pat Stapley producer Eric Woolfson engineer Chris Blair"}
|
||||
parseTIPL(tags)
|
||||
taglib.parseTIPL(tags)
|
||||
Expect(tags["producer"]).To(ConsistOf("Eric Woolfson"))
|
||||
Expect(tags["engineer"]).To(ConsistOf("Pat Stapley", "Chris Blair"))
|
||||
})
|
||||
|
||||
It("discards roles without names", func() {
|
||||
tags["tipl"] = []string{"engineer Pat Stapley producer engineer Chris Blair"}
|
||||
parseTIPL(tags)
|
||||
taglib.parseTIPL(tags)
|
||||
Expect(tags).ToNot(HaveKey("producer"))
|
||||
Expect(tags["engineer"]).To(ConsistOf("Pat Stapley", "Chris Blair"))
|
||||
})
|
||||
|
@ -264,14 +265,14 @@ var _ = Describe("Extractor", func() {
|
|||
Context("when the TIPL string is empty", func() {
|
||||
It("does nothing", func() {
|
||||
tags["tipl"] = []string{""}
|
||||
parseTIPL(tags)
|
||||
taglib.parseTIPL(tags)
|
||||
Expect(tags).To(BeEmpty())
|
||||
})
|
||||
})
|
||||
|
||||
Context("when the TIPL is not present", func() {
|
||||
It("does nothing", func() {
|
||||
parseTIPL(tags)
|
||||
taglib.parseTIPL(tags)
|
||||
Expect(tags).To(BeEmpty())
|
||||
})
|
||||
})
|
|
@ -50,119 +50,13 @@ int taglib_read(const FILENAME_CHAR_T *filename, unsigned long id) {
|
|||
// Create a map to collect all the tags
|
||||
TagLib::PropertyMap tags = f.file()->properties();
|
||||
|
||||
// Make sure at least the basic properties are extracted
|
||||
TagLib::Tag *basic = f.file()->tag();
|
||||
if (!basic->isEmpty()) {
|
||||
if (!basic->title().isEmpty()) {
|
||||
tags.insert("title", basic->title());
|
||||
}
|
||||
if (!basic->artist().isEmpty()) {
|
||||
tags.insert("artist", basic->artist());
|
||||
}
|
||||
if (!basic->album().isEmpty()) {
|
||||
tags.insert("album", basic->album());
|
||||
}
|
||||
if (basic->year() > 0) {
|
||||
tags.insert("date", TagLib::String::number(basic->year()));
|
||||
}
|
||||
if (basic->track() > 0) {
|
||||
tags.insert("_track", TagLib::String::number(basic->track()));
|
||||
}
|
||||
}
|
||||
|
||||
TagLib::ID3v2::Tag *id3Tags = NULL;
|
||||
|
||||
// Get some extended/non-standard ID3-only tags (ex: iTunes extended frames)
|
||||
TagLib::MPEG::File *mp3File(dynamic_cast<TagLib::MPEG::File *>(f.file()));
|
||||
if (mp3File != NULL) {
|
||||
id3Tags = mp3File->ID3v2Tag();
|
||||
}
|
||||
|
||||
if (id3Tags == NULL) {
|
||||
TagLib::RIFF::WAV::File *wavFile(dynamic_cast<TagLib::RIFF::WAV::File *>(f.file()));
|
||||
if (wavFile != NULL && wavFile->hasID3v2Tag()) {
|
||||
id3Tags = wavFile->ID3v2Tag();
|
||||
}
|
||||
}
|
||||
|
||||
if (id3Tags == NULL) {
|
||||
TagLib::RIFF::AIFF::File *aiffFile(dynamic_cast<TagLib::RIFF::AIFF::File *>(f.file()));
|
||||
if (aiffFile && aiffFile->hasID3v2Tag()) {
|
||||
id3Tags = aiffFile->tag();
|
||||
}
|
||||
}
|
||||
|
||||
// Yes, it is possible to have ID3v2 tags in FLAC. However, that can cause problems
|
||||
// with many players, so they will not be parsed
|
||||
|
||||
if (id3Tags != NULL) {
|
||||
const auto &frames = id3Tags->frameListMap();
|
||||
|
||||
for (const auto &kv: frames) {
|
||||
if (kv.first == "USLT") {
|
||||
for (const auto &tag: kv.second) {
|
||||
TagLib::ID3v2::UnsynchronizedLyricsFrame *frame = dynamic_cast<TagLib::ID3v2::UnsynchronizedLyricsFrame *>(tag);
|
||||
if (frame == NULL) continue;
|
||||
|
||||
tags.erase("LYRICS");
|
||||
|
||||
const auto bv = frame->language();
|
||||
char language[4] = {'x', 'x', 'x', '\0'};
|
||||
if (bv.size() == 3) {
|
||||
strncpy(language, bv.data(), 3);
|
||||
}
|
||||
|
||||
char *val = (char *)frame->text().toCString(true);
|
||||
|
||||
go_map_put_lyrics(id, language, val);
|
||||
}
|
||||
} else if (kv.first == "SYLT") {
|
||||
for (const auto &tag: kv.second) {
|
||||
TagLib::ID3v2::SynchronizedLyricsFrame *frame = dynamic_cast<TagLib::ID3v2::SynchronizedLyricsFrame *>(tag);
|
||||
if (frame == NULL) continue;
|
||||
|
||||
const auto bv = frame->language();
|
||||
char language[4] = {'x', 'x', 'x', '\0'};
|
||||
if (bv.size() == 3) {
|
||||
strncpy(language, bv.data(), 3);
|
||||
}
|
||||
|
||||
const auto format = frame->timestampFormat();
|
||||
if (format == TagLib::ID3v2::SynchronizedLyricsFrame::AbsoluteMilliseconds) {
|
||||
|
||||
for (const auto &line: frame->synchedText()) {
|
||||
char *text = (char *)line.text.toCString(true);
|
||||
go_map_put_lyric_line(id, language, text, line.time);
|
||||
}
|
||||
} else if (format == TagLib::ID3v2::SynchronizedLyricsFrame::AbsoluteMpegFrames) {
|
||||
const int sampleRate = props->sampleRate();
|
||||
|
||||
if (sampleRate != 0) {
|
||||
for (const auto &line: frame->synchedText()) {
|
||||
const int timeInMs = (line.time * 1000) / sampleRate;
|
||||
char *text = (char *)line.text.toCString(true);
|
||||
go_map_put_lyric_line(id, language, text, timeInMs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!kv.second.isEmpty()) {
|
||||
tags.insert(kv.first, kv.second.front()->toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// M4A may have some iTunes specific tags
|
||||
TagLib::MP4::File *m4afile(dynamic_cast<TagLib::MP4::File *>(f.file()));
|
||||
if (m4afile != NULL) {
|
||||
const auto itemListMap = m4afile->tag()->itemMap();
|
||||
for (const auto item: itemListMap) {
|
||||
char *key = (char *)item.first.toCString(true);
|
||||
for (const auto value: item.second.toStringList()) {
|
||||
char *val = (char *)value.toCString(true);
|
||||
go_map_put_m4a_str(id, key, val);
|
||||
tags.insert(item.first, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -177,6 +71,26 @@ int taglib_read(const FILENAME_CHAR_T *filename, unsigned long id) {
|
|||
}
|
||||
}
|
||||
|
||||
// Make sure at least the basic properties are extracted
|
||||
TagLib::Tag *basic = f.file()->tag();
|
||||
if (!basic->isEmpty()) {
|
||||
if (!basic->title().isEmpty()) {
|
||||
tags.insert("_title", basic->title());
|
||||
}
|
||||
if (!basic->artist().isEmpty()) {
|
||||
tags.insert("_artist", basic->artist());
|
||||
}
|
||||
if (!basic->album().isEmpty()) {
|
||||
tags.insert("_album", basic->album());
|
||||
}
|
||||
if (basic->year() > 0) {
|
||||
tags.insert("_date", TagLib::String::number(basic->year()));
|
||||
}
|
||||
if (basic->track() > 0) {
|
||||
tags.insert("_track", TagLib::String::number(basic->track()));
|
||||
}
|
||||
}
|
||||
|
||||
// Send all collected tags to the Go map
|
||||
for (TagLib::PropertyMap::ConstIterator i = tags.begin(); i != tags.end();
|
||||
++i) {
|
|
@ -34,8 +34,8 @@ func Read(filename string) (tags map[string][]string, err error) {
|
|||
debug.SetPanicOnFault(true)
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Error("TagLib: recovered from panic when reading tags", "file", filename, "error", r)
|
||||
err = fmt.Errorf("TagLib: recovered from panic: %s", r)
|
||||
log.Error("extractor: recovered from panic when reading tags", "file", filename, "error", r)
|
||||
err = fmt.Errorf("extractor: recovered from panic: %s", r)
|
||||
}
|
||||
}()
|
||||
|
||||
|
@ -44,7 +44,7 @@ func Read(filename string) (tags map[string][]string, err error) {
|
|||
id, m := newMap()
|
||||
defer deleteMap(id)
|
||||
|
||||
log.Trace("TagLib: reading tags", "filename", filename, "map_id", id)
|
||||
log.Trace("extractor: reading tags", "filename", filename, "map_id", id)
|
||||
res := C.taglib_read(fp, C.ulong(id))
|
||||
switch res {
|
||||
case C.TAGLIB_ERR_PARSE:
|
||||
|
@ -64,9 +64,9 @@ func Read(filename string) (tags map[string][]string, err error) {
|
|||
}
|
||||
if log.IsGreaterOrEqualTo(log.LevelDebug) {
|
||||
j, _ := json.Marshal(m)
|
||||
log.Trace("TagLib: read tags", "tags", string(j), "filename", filename, "id", id)
|
||||
log.Trace("extractor: read tags", "tags", string(j), "filename", filename, "id", id)
|
||||
} else {
|
||||
log.Trace("TagLib: read tags", "tags", m, "filename", filename, "id", id)
|
||||
log.Trace("extractor: read tags", "tags", m, "filename", filename, "id", id)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
|
@ -126,7 +126,7 @@ func do_put_map(id C.ulong, key string, val *C.char) {
|
|||
}
|
||||
|
||||
/*
|
||||
As I'm working on the new scanner, I see that the `properties` from TagLib is ill-suited to extract multi-valued ID3 frames. I'll have to change the way we do it for ID3, probably by sending the raw frames to Go and mapping there, instead of relying on the auto-mapped `properties`. I think this would reduce our reliance on C++, while also giving us more flexibility, including parsing the USLT / SYLT frames in Go
|
||||
As I'm working on the new scanner, I see that the `properties` from extractor is ill-suited to extract multi-valued ID3 frames. I'll have to change the way we do it for ID3, probably by sending the raw frames to Go and mapping there, instead of relying on the auto-mapped `properties`. I think this would reduce our reliance on C++, while also giving us more flexibility, including parsing the USLT / SYLT frames in Go
|
||||
*/
|
||||
|
||||
//export go_map_put_int
|
1
main.go
1
main.go
|
@ -3,6 +3,7 @@ package main
|
|||
import (
|
||||
_ "net/http/pprof" //nolint:gosec
|
||||
|
||||
_ "github.com/navidrome/navidrome/adapters/taglib"
|
||||
"github.com/navidrome/navidrome/cmd"
|
||||
)
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package tag
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
)
|
||||
|
||||
|
@ -23,6 +24,7 @@ var (
|
|||
func RegisterExtractor(id string, parser Extractor) {
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
fmt.Println("!!!! Registering extractor", id, "version", parser.Version())
|
||||
extractors[id] = parser
|
||||
}
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ genre: [ TCON, GENRE, ©gen, WM/Genre, IGNR ]
|
|||
compilation: [ TCMP, COMPILATION, cpil, WM/IsCompilation ]
|
||||
track: [ TRACK, TRCK, TRACKNUMBER, trkn, WM/TrackNumber, ITRK ]
|
||||
tracktotal: [ TRACKTOTAL, TOTALTRACKS ]
|
||||
subtitle: [ TIT3, SUBTITLE, ----:com.apple.iTunes:SUBTITLE, WM/SubTitle ]
|
||||
disc: [ TPOS, DISC, DISCNUMBER, disk, WM/PartOfSet ]
|
||||
disctotal: [ DISCTOTAL, TOTALDISCS ]
|
||||
discsubtitle: [ TSST, DISCSUBTITLE, ----:com.apple.iTunes:DISCSUBTITLE, WM/SetSubTitle ]
|
||||
|
@ -68,6 +69,5 @@ releasecountry: [ TXXX:MusicBrainz Album Release Country, RELEASECOUNTRY, ----:c
|
|||
releasestatus: [ TXXX:MusicBrainz Album Status, RELEASESTATUS, MUSICBRAINZ_ALBUMSTATUS, ----:com.apple.iTunes:MusicBrainz Album Status, MusicBrainz/Album Status ]
|
||||
releasetype: [ TXXX:MusicBrainz Album Type, RELEASETYPE, MUSICBRAINZ_ALBUMTYPE, ----:com.apple.iTunes:MusicBrainz Album Type, MusicBrainz/Album Type ]
|
||||
script: [ TXXX:SCRIPT, SCRIPT, ----:com.apple.iTunes:SCRIPT, WM/Script ]
|
||||
subtitle: [ TIT3, SUBTITLE, ----:com.apple.iTunes:SUBTITLE, WM/SubTitle ]
|
||||
website: [ WOAR, WEBSITE, Weblink, WM/AuthorURL ]
|
||||
work: [ TXXX:WORK, TIT1, WORK, ©wrk, WM/Work ]
|
||||
|
|
|
@ -18,7 +18,6 @@ import (
|
|||
"github.com/navidrome/navidrome/model/request"
|
||||
"github.com/navidrome/navidrome/scanner/metadata"
|
||||
_ "github.com/navidrome/navidrome/scanner/metadata/ffmpeg"
|
||||
_ "github.com/navidrome/navidrome/scanner/metadata/taglib"
|
||||
"github.com/navidrome/navidrome/utils/pl"
|
||||
"github.com/navidrome/navidrome/utils/slice"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
|
Loading…
Reference in New Issue