mirror of https://github.com/miniflux/v2.git
Calculate reading time during feed processing
The goal is to speed up the user interface. Detecting the language based on the content is pretty slow.
This commit is contained in:
parent
b1c9977711
commit
de7a613098
|
@ -129,20 +129,21 @@ type Feeds []*Feed
|
|||
|
||||
// Entry represents a subscription item in the system.
|
||||
type Entry struct {
|
||||
ID int64 `json:"id"`
|
||||
UserID int64 `json:"user_id"`
|
||||
FeedID int64 `json:"feed_id"`
|
||||
Status string `json:"status"`
|
||||
Hash string `json:"hash"`
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
Date time.Time `json:"published_at"`
|
||||
Content string `json:"content"`
|
||||
Author string `json:"author"`
|
||||
ShareCode string `json:"share_code"`
|
||||
Starred bool `json:"starred"`
|
||||
Enclosures Enclosures `json:"enclosures,omitempty"`
|
||||
Feed *Feed `json:"feed,omitempty"`
|
||||
ID int64 `json:"id"`
|
||||
UserID int64 `json:"user_id"`
|
||||
FeedID int64 `json:"feed_id"`
|
||||
Status string `json:"status"`
|
||||
Hash string `json:"hash"`
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
Date time.Time `json:"published_at"`
|
||||
Content string `json:"content"`
|
||||
Author string `json:"author"`
|
||||
ShareCode string `json:"share_code"`
|
||||
Starred bool `json:"starred"`
|
||||
ReadingTime int `json:"reading_time"`
|
||||
Enclosures Enclosures `json:"enclosures,omitempty"`
|
||||
Feed *Feed `json:"feed,omitempty"`
|
||||
}
|
||||
|
||||
// Entries represents a list of entries.
|
||||
|
|
|
@ -12,7 +12,7 @@ import (
|
|||
"miniflux.app/logger"
|
||||
)
|
||||
|
||||
const schemaVersion = 40
|
||||
const schemaVersion = 41
|
||||
|
||||
// Migrate executes database migrations.
|
||||
func Migrate(db *sql.DB) {
|
||||
|
|
|
@ -203,6 +203,7 @@ alter table users add column entry_direction entry_sorting_direction default 'as
|
|||
add column keeplist_rules text not null default ''
|
||||
;
|
||||
`,
|
||||
"schema_version_41": `alter table entries add column reading_time int not null default 0;`,
|
||||
"schema_version_5": `create table integrations (
|
||||
user_id int not null,
|
||||
pinboard_enabled bool default 'f',
|
||||
|
@ -264,6 +265,7 @@ var SqlMapChecksums = map[string]string{
|
|||
"schema_version_39": "b0f90b97502921d4681a07c64d180a91a0b4ccac7d3c1dbe30519ad6f1bf1737",
|
||||
"schema_version_4": "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9",
|
||||
"schema_version_40": "6a8fec92399f853ed6817aff4cfa43255dce4c19afad796e41519d09de62105e",
|
||||
"schema_version_41": "128e118ce61267ea1f6ae03b63a6d4734eae87e520b00e309ad083f1f6afdfe5",
|
||||
"schema_version_5": "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c",
|
||||
"schema_version_6": "9d05b4fb223f0e60efc716add5048b0ca9c37511cf2041721e20505d6d798ce4",
|
||||
"schema_version_7": "33f298c9aa30d6de3ca28e1270df51c2884d7596f1283a75716e2aeb634cd05c",
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
alter table entries add column reading_time int not null default 0;
|
|
@ -33,6 +33,7 @@ type Entry struct {
|
|||
Author string `json:"author"`
|
||||
ShareCode string `json:"share_code"`
|
||||
Starred bool `json:"starred"`
|
||||
ReadingTime int `json:"reading_time"`
|
||||
Enclosures EnclosureList `json:"enclosures,omitempty"`
|
||||
Feed *Feed `json:"feed,omitempty"`
|
||||
}
|
||||
|
|
|
@ -5,8 +5,11 @@
|
|||
package processor
|
||||
|
||||
import (
|
||||
"math"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"miniflux.app/config"
|
||||
"miniflux.app/logger"
|
||||
|
@ -16,6 +19,8 @@ import (
|
|||
"miniflux.app/reader/sanitizer"
|
||||
"miniflux.app/reader/scraper"
|
||||
"miniflux.app/storage"
|
||||
|
||||
"github.com/rylans/getlang"
|
||||
)
|
||||
|
||||
// ProcessFeedEntries downloads original web page for entries and apply filters.
|
||||
|
@ -58,6 +63,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed) {
|
|||
// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
|
||||
entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
|
||||
|
||||
entry.ReadingTime = calculateReadingTime(entry.Content)
|
||||
filteredEntries = append(filteredEntries, entry)
|
||||
}
|
||||
|
||||
|
@ -108,7 +114,23 @@ func ProcessEntryWebPage(entry *model.Entry) error {
|
|||
|
||||
if content != "" {
|
||||
entry.Content = content
|
||||
entry.ReadingTime = calculateReadingTime(content)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func calculateReadingTime(content string) int {
|
||||
sanitizedContent := sanitizer.StripTags(content)
|
||||
languageInfo := getlang.FromString(sanitizedContent)
|
||||
|
||||
var timeToReadInt int
|
||||
if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
|
||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
|
||||
} else {
|
||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
|
||||
}
|
||||
|
||||
return timeToReadInt
|
||||
}
|
||||
|
|
|
@ -75,11 +75,11 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
|
|||
UPDATE
|
||||
entries
|
||||
SET
|
||||
content=$1
|
||||
content=$1, reading_time=$2
|
||||
WHERE
|
||||
id=$2 AND user_id=$3
|
||||
id=$3 AND user_id=$4
|
||||
`
|
||||
_, err = tx.Exec(query, entry.Content, entry.ID, entry.UserID)
|
||||
_, err = tx.Exec(query, entry.Content, entry.ReadingTime, entry.ID, entry.UserID)
|
||||
if err != nil {
|
||||
tx.Rollback()
|
||||
return fmt.Errorf(`store: unable to update content of entry #%d: %v`, entry.ID, err)
|
||||
|
@ -106,9 +106,35 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error {
|
|||
func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
||||
query := `
|
||||
INSERT INTO entries
|
||||
(title, hash, url, comments_url, published_at, content, author, user_id, feed_id, changed_at, document_vectors)
|
||||
(
|
||||
title,
|
||||
hash,
|
||||
url,
|
||||
comments_url,
|
||||
published_at,
|
||||
content,
|
||||
author,
|
||||
user_id,
|
||||
feed_id,
|
||||
reading_time,
|
||||
changed_at,
|
||||
document_vectors
|
||||
)
|
||||
VALUES
|
||||
($1, $2, $3, $4, $5, $6, $7, $8, $9, now(), setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B'))
|
||||
(
|
||||
$1,
|
||||
$2,
|
||||
$3,
|
||||
$4,
|
||||
$5,
|
||||
$6,
|
||||
$7,
|
||||
$8,
|
||||
$9,
|
||||
$10,
|
||||
now(),
|
||||
setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B')
|
||||
)
|
||||
RETURNING
|
||||
id, status
|
||||
`
|
||||
|
@ -123,6 +149,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
entry.Author,
|
||||
entry.UserID,
|
||||
entry.FeedID,
|
||||
entry.ReadingTime,
|
||||
).Scan(&entry.ID, &entry.Status)
|
||||
|
||||
if err != nil {
|
||||
|
@ -154,9 +181,10 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
comments_url=$3,
|
||||
content=$4,
|
||||
author=$5,
|
||||
reading_time=$6,
|
||||
document_vectors = setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($4, '') for 1000000)), 'B')
|
||||
WHERE
|
||||
user_id=$6 AND feed_id=$7 AND hash=$8
|
||||
user_id=$7 AND feed_id=$8 AND hash=$9
|
||||
RETURNING
|
||||
id
|
||||
`
|
||||
|
@ -167,6 +195,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
entry.CommentsURL,
|
||||
entry.Content,
|
||||
entry.Author,
|
||||
entry.ReadingTime,
|
||||
entry.UserID,
|
||||
entry.FeedID,
|
||||
entry.Hash,
|
||||
|
|
|
@ -226,6 +226,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
|||
e.content,
|
||||
e.status,
|
||||
e.starred,
|
||||
e.reading_time,
|
||||
f.title as feed_title,
|
||||
f.feed_url,
|
||||
f.site_url,
|
||||
|
@ -284,6 +285,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
|||
&entry.Content,
|
||||
&entry.Status,
|
||||
&entry.Starred,
|
||||
&entry.ReadingTime,
|
||||
&entry.Feed.Title,
|
||||
&entry.Feed.FeedURL,
|
||||
&entry.Feed.SiteURL,
|
||||
|
|
|
@ -242,10 +242,10 @@ SOFTWARE.
|
|||
<li>
|
||||
<time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
|
||||
</li>
|
||||
{{ if .user.ShowReadingTime }}
|
||||
{{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
|
||||
<li>
|
||||
<span>
|
||||
{{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
|
||||
{{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
|
||||
</span>
|
||||
</li>
|
||||
{{ end }}
|
||||
|
@ -523,7 +523,7 @@ var templateCommonMapChecksums = map[string]string{
|
|||
"feed_list": "931e43d328a116318c510de5658c688cd940b934c86b6ec82a472e1f81e020ae",
|
||||
"feed_menu": "318d8662dda5ca9dfc75b909c8461e79c86fb5082df1428f67aaf856f19f4b50",
|
||||
"icons": "9a41753778072f286216085d8712495e2ccca20c7a24f5c982775436a3d38579",
|
||||
"item_meta": "eb72c6e2a924759af20b8ef41f2ce7495aedc053181c2e5ca1b063f9410c58b0",
|
||||
"item_meta": "56ab09d7dd46eeb2e2ee11ddcec0c157a5832c896dbd2887d9e2b013680b2af6",
|
||||
"layout": "65767e7dbebe1f7ed42895ecd5a737b0693e4a2ec35e84e3e391f462beb11977",
|
||||
"pagination": "7b61288e86283c4cf0dc83bcbf8bf1c00c7cb29e60201c8c0b633b2450d2911f",
|
||||
"settings_menu": "e2b777630c0efdbc529800303c01d6744ed3af80ec505ac5a5b3f99c9b989156",
|
||||
|
|
|
@ -65,9 +65,6 @@ func (e *Engine) Render(name, language string, data interface{}) []byte {
|
|||
"plural": func(key string, n int, args ...interface{}) string {
|
||||
return printer.Plural(key, n, args...)
|
||||
},
|
||||
"timeToRead": func(content string) int {
|
||||
return timeToRead(content)
|
||||
},
|
||||
})
|
||||
|
||||
var b bytes.Buffer
|
||||
|
|
|
@ -11,19 +11,16 @@ import (
|
|||
"net/mail"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"miniflux.app/config"
|
||||
"miniflux.app/http/route"
|
||||
"miniflux.app/locale"
|
||||
"miniflux.app/model"
|
||||
"miniflux.app/proxy"
|
||||
"miniflux.app/reader/sanitizer"
|
||||
"miniflux.app/timezone"
|
||||
"miniflux.app/url"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/rylans/getlang"
|
||||
)
|
||||
|
||||
type funcMap struct {
|
||||
|
@ -94,9 +91,6 @@ func (f *funcMap) Map() template.FuncMap {
|
|||
"plural": func(key string, n int, args ...interface{}) string {
|
||||
return ""
|
||||
},
|
||||
"timeToRead": func(content string) int {
|
||||
return 0
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -195,18 +189,3 @@ func formatFileSize(b int64) string {
|
|||
return fmt.Sprintf("%.1f %ciB",
|
||||
float64(b)/float64(div), "KMGTPE"[exp])
|
||||
}
|
||||
|
||||
func timeToRead(content string) int {
|
||||
sanitizedContent := sanitizer.StripTags(content)
|
||||
languageInfo := getlang.FromString(sanitizedContent)
|
||||
|
||||
var timeToReadInt int
|
||||
if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
|
||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / 500))
|
||||
} else {
|
||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / 265))
|
||||
}
|
||||
|
||||
return timeToReadInt
|
||||
}
|
||||
|
|
|
@ -7,10 +7,10 @@
|
|||
<li>
|
||||
<time datetime="{{ isodate .entry.Date }}" title="{{ isodate .entry.Date }}">{{ elapsed .user.Timezone .entry.Date }}</time>
|
||||
</li>
|
||||
{{ if .user.ShowReadingTime }}
|
||||
{{ if and .user.ShowReadingTime (gt .entry.ReadingTime 0) }}
|
||||
<li>
|
||||
<span>
|
||||
{{ plural "entry.estimated_reading_time" (timeToRead .entry.Content) (timeToRead .entry.Content) }}
|
||||
{{ plural "entry.estimated_reading_time" .entry.ReadingTime .entry.ReadingTime }}
|
||||
</span>
|
||||
</li>
|
||||
{{ end }}
|
||||
|
|
Loading…
Reference in New Issue