Remove duplicated words and extra spaces from full text searchable fields

This commit is contained in:
Deluan 2020-04-08 23:29:28 -04:00
parent 0fa8290ed3
commit b8d1185f7f
3 changed files with 47 additions and 9 deletions

View File

@ -32,7 +32,7 @@ func TestPersistence(t *testing.T) {
var (
artistKraftwerk = model.Artist{ID: "2", Name: "Kraftwerk", AlbumCount: 1, FullText: "kraftwerk"}
artistBeatles = model.Artist{ID: "3", Name: "The Beatles", AlbumCount: 2, FullText: "the beatles"}
artistBeatles = model.Artist{ID: "3", Name: "The Beatles", AlbumCount: 2, FullText: "beatles the"}
testArtists = model.Artists{
artistKraftwerk,
artistBeatles,
@ -40,9 +40,9 @@ var (
)
var (
albumSgtPeppers = model.Album{ID: "1", Name: "Sgt Peppers", Artist: "The Beatles", AlbumArtistID: "3", Genre: "Rock", CoverArtId: "1", CoverArtPath: P("/beatles/1/sgt/a day.mp3"), SongCount: 1, MaxYear: 1967, FullText: "sgt peppers the beatles"}
albumAbbeyRoad = model.Album{ID: "2", Name: "Abbey Road", Artist: "The Beatles", AlbumArtistID: "3", Genre: "Rock", CoverArtId: "2", CoverArtPath: P("/beatles/1/come together.mp3"), SongCount: 1, MaxYear: 1969, FullText: "abbey road the beatles"}
albumRadioactivity = model.Album{ID: "3", Name: "Radioactivity", Artist: "Kraftwerk", AlbumArtistID: "2", Genre: "Electronic", CoverArtId: "3", CoverArtPath: P("/kraft/radio/radio.mp3"), SongCount: 2, FullText: "radioactivity kraftwerk"}
albumSgtPeppers = model.Album{ID: "1", Name: "Sgt Peppers", Artist: "The Beatles", AlbumArtistID: "3", Genre: "Rock", CoverArtId: "1", CoverArtPath: P("/beatles/1/sgt/a day.mp3"), SongCount: 1, MaxYear: 1967, FullText: "beatles peppers sgt the"}
albumAbbeyRoad = model.Album{ID: "2", Name: "Abbey Road", Artist: "The Beatles", AlbumArtistID: "3", Genre: "Rock", CoverArtId: "2", CoverArtPath: P("/beatles/1/come together.mp3"), SongCount: 1, MaxYear: 1969, FullText: "abbey beatles road the"}
albumRadioactivity = model.Album{ID: "3", Name: "Radioactivity", Artist: "Kraftwerk", AlbumArtistID: "2", Genre: "Electronic", CoverArtId: "3", CoverArtPath: P("/kraft/radio/radio.mp3"), SongCount: 2, FullText: "kraftwerk radioactivity"}
testAlbums = model.Albums{
albumSgtPeppers,
albumAbbeyRoad,
@ -51,10 +51,10 @@ var (
)
var (
songDayInALife = model.MediaFile{ID: "1", Title: "A Day In A Life", ArtistID: "3", Artist: "The Beatles", AlbumID: "1", Album: "Sgt Peppers", Genre: "Rock", Path: P("/beatles/1/sgt/a day.mp3"), FullText: "a day in a life sgt peppers the beatles"}
songComeTogether = model.MediaFile{ID: "2", Title: "Come Together", ArtistID: "3", Artist: "The Beatles", AlbumID: "2", Album: "Abbey Road", Genre: "Rock", Path: P("/beatles/1/come together.mp3"), FullText: "come together abbey road the beatles"}
songRadioactivity = model.MediaFile{ID: "3", Title: "Radioactivity", ArtistID: "2", Artist: "Kraftwerk", AlbumID: "3", Album: "Radioactivity", Genre: "Electronic", Path: P("/kraft/radio/radio.mp3"), FullText: "radioactivity radioactivity kraftwerk"}
songAntenna = model.MediaFile{ID: "4", Title: "Antenna", ArtistID: "2", Artist: "Kraftwerk", AlbumID: "3", Genre: "Electronic", Path: P("/kraft/radio/antenna.mp3"), FullText: "antenna kraftwerk"}
songDayInALife = model.MediaFile{ID: "1", Title: "A Day In A Life", ArtistID: "3", Artist: "The Beatles", AlbumID: "1", Album: "Sgt Peppers", Genre: "Rock", Path: P("/beatles/1/sgt/a day.mp3"), FullText: "a beatles day in life peppers sgt the"}
songComeTogether = model.MediaFile{ID: "2", Title: "Come Together", ArtistID: "3", Artist: "The Beatles", AlbumID: "2", Album: "Abbey Road", Genre: "Rock", Path: P("/beatles/1/come together.mp3"), FullText: "abbey beatles come road the together"}
songRadioactivity = model.MediaFile{ID: "3", Title: "Radioactivity", ArtistID: "2", Artist: "Kraftwerk", AlbumID: "3", Album: "Radioactivity", Genre: "Electronic", Path: P("/kraft/radio/radio.mp3"), FullText: "kraftwerk radioactivity"}
songAntenna = model.MediaFile{ID: "4", Title: "Antenna", ArtistID: "2", Artist: "Kraftwerk", AlbumID: "3", Genre: "Electronic", Path: P("/kraft/radio/antenna.mp3"), FullText: "antenna kraftwerk"}
testSongs = model.MediaFiles{
songDayInALife,
songComeTogether,

View File

@ -1,6 +1,7 @@
package persistence
import (
"sort"
"strings"
. "github.com/Masterminds/squirrel"
@ -12,7 +13,16 @@ func (r sqlRepository) getFullText(text ...string) string {
for _, txt := range text {
sanitizedText.WriteString(strings.TrimSpace(sanitize.Accents(strings.ToLower(txt))) + " ")
}
return strings.TrimSpace(sanitizedText.String())
words := make(map[string]struct{})
for _, w := range strings.Fields(sanitizedText.String()) {
words[w] = struct{}{}
}
var fullText []string
for w := range words {
fullText = append(fullText, w)
}
sort.Strings(fullText)
return strings.Join(fullText, " ")
}
func (r sqlRepository) doSearch(q string, offset, size int, results interface{}, orderBys ...string) error {

View File

@ -0,0 +1,28 @@
package persistence
import (
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
var _ = Describe("sqlRepository", func() {
var sqlRepository = &sqlRepository{}
Describe("getFullText", func() {
It("returns all lowercase chars", func() {
Expect(sqlRepository.getFullText("Some Text")).To(Equal("some text"))
})
It("removes accents", func() {
Expect(sqlRepository.getFullText("Quintão")).To(Equal("quintao"))
})
It("remove extra spaces", func() {
Expect(sqlRepository.getFullText(" some text ")).To(Equal("some text"))
})
It("remove duplicated words", func() {
Expect(sqlRepository.getFullText("legião urbana urbana legiÃo")).To(Equal("legiao urbana"))
})
})
})