Sort repeated lyrics that may be out of order (#2989)

With synchronized lyrics with repeated text, there is not a guarantee that the repeat is in order (e.g. `[00:00.00][00:10.00] a\n[00:05.00]b`).
This change will post-process lyrics with repeated timestamps in one line to ensure that it is always sorted.
This commit is contained in:
Kendall Garner 2024-05-02 01:54:46 +00:00 committed by GitHub
parent 8f11b991d2
commit a4c2232041
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 76 additions and 39 deletions

View File

@ -1,7 +1,9 @@
package model
import (
"cmp"
"regexp"
"slices"
"strconv"
"strings"
@ -46,6 +48,7 @@ func ToLyrics(language, text string) (*Lyrics, error) {
synced := syncRegex.MatchString(text)
priorLine := ""
validLine := false
repeated := false
var timestamps []int64
for _, line := range lines {
@ -82,6 +85,10 @@ func ToLyrics(language, text string) (*Lyrics, error) {
}
times := timeRegex.FindAllStringSubmatchIndex(line, -1)
if len(times) > 1 {
repeated = true
}
// The second condition is for when there is a timestamp in the middle of
// a line (after any text)
if times == nil || times[0][0] != 0 {
@ -105,9 +112,6 @@ func ToLyrics(language, text string) (*Lyrics, error) {
// [fullStart, fullEnd, hourStart, hourEnd, minStart, minEnd, secStart, secEnd, msStart, msEnd]
for _, match := range times {
var hours, millis int64
var err error
// for multiple matches, we need to check that later matches are not
// in the middle of the string
if end != 0 {
@ -118,46 +122,11 @@ func ToLyrics(language, text string) (*Lyrics, error) {
}
end = match[1]
hourStart := match[2]
if hourStart != -1 {
// subtract 1 because group has : at the end
hourEnd := match[3] - 1
hours, err = strconv.ParseInt(line[hourStart:hourEnd], 10, 64)
if err != nil {
return nil, err
}
}
minutes, err := strconv.ParseInt(line[match[4]:match[5]], 10, 64)
timeInMillis, err := parseTime(line, match)
if err != nil {
return nil, err
}
sec, err := strconv.ParseInt(line[match[6]:match[7]], 10, 64)
if err != nil {
return nil, err
}
msStart := match[8]
if msStart != -1 {
msEnd := match[9]
// +1 offset since this capture group contains .
millis, err = strconv.ParseInt(line[msStart+1:msEnd], 10, 64)
if err != nil {
return nil, err
}
length := msEnd - msStart
if length == 3 {
millis *= 10
} else if length == 2 {
millis *= 100
}
}
timeInMillis := (((((hours * 60) + minutes) * 60) + sec) * 1000) + millis
timestamps = append(timestamps, timeInMillis)
}
@ -186,6 +155,14 @@ func ToLyrics(language, text string) (*Lyrics, error) {
}
}
// If there are repeated values, there is no guarantee that they are in order
// In this, case, sort the lyrics by start time
if repeated {
slices.SortFunc(structuredLines, func(a, b Line) int {
return cmp.Compare(*a.Start, *b.Start)
})
}
lyrics := Lyrics{
DisplayArtist: artist,
DisplayTitle: title,
@ -198,4 +175,50 @@ func ToLyrics(language, text string) (*Lyrics, error) {
return &lyrics, nil
}
func parseTime(line string, match []int) (int64, error) {
var hours, millis int64
var err error
hourStart := match[2]
if hourStart != -1 {
// subtract 1 because group has : at the end
hourEnd := match[3] - 1
hours, err = strconv.ParseInt(line[hourStart:hourEnd], 10, 64)
if err != nil {
return 0, err
}
}
minutes, err := strconv.ParseInt(line[match[4]:match[5]], 10, 64)
if err != nil {
return 0, err
}
sec, err := strconv.ParseInt(line[match[6]:match[7]], 10, 64)
if err != nil {
return 0, err
}
msStart := match[8]
if msStart != -1 {
msEnd := match[9]
// +1 offset since this capture group contains .
millis, err = strconv.ParseInt(line[msStart+1:msEnd], 10, 64)
if err != nil {
return 0, err
}
length := msEnd - msStart
if length == 3 {
millis *= 10
} else if length == 2 {
millis *= 100
}
}
timeInMillis := (((((hours * 60) + minutes) * 60) + sec) * 1000) + millis
return timeInMillis, nil
}
type LyricList []Lyrics

View File

@ -101,4 +101,18 @@ var _ = Describe("ToLyrics", func() {
{Start: &c, Value: "c"},
}))
})
It("Properly sorts repeated lyrics out of order", func() {
a, b, c, d, e := int64(0), int64(10000), int64(40000), int64(13*60*1000), int64(1000*60*60*51)
lyrics, err := ToLyrics("xxx", "[00:00.00] [13:00]Repeated\n[00:10.00][51:00:00.00]Test\n[00:40.00]Not repeated")
Expect(err).ToNot(HaveOccurred())
Expect(lyrics.Synced).To(BeTrue())
Expect(lyrics.Line).To(Equal([]Line{
{Start: &a, Value: "Repeated"},
{Start: &b, Value: "Test"},
{Start: &c, Value: "Not repeated"},
{Start: &d, Value: "Repeated"},
{Start: &e, Value: "Test"},
}))
})
})