Refactor RSS Parser to use an adapter

2024-03-13 21:06:28 -07:00 · 2024-03-13 21:06:28 -07:00 · 648b9a8f6f
parent 66b8483791
commit 648b9a8f6f
11 changed files with 497 additions and 364 deletions
--- a/internal/reader/atom/atom_10.go
+++ b/internal/reader/atom/atom_10.go
@ -91,7 +91,7 @@ type atom10Entry struct {
 	Content    atom10Text       `xml:"http://www.w3.org/2005/Atom content"`
 	Authors    atomAuthors      `xml:"author"`
 	Categories []atom10Category `xml:"category"`
-	media.Element
+	media.MediaItemElement
 }

 func (a *atom10Entry) Transform() *model.Entry {
--- a/internal/reader/googleplay/googleplay.go
+++ b/internal/reader/googleplay/googleplay.go
@ -6,7 +6,7 @@ package googleplay // import "miniflux.app/v2/internal/reader/googleplay"
 // Specs:
 // https://support.google.com/googleplay/podcasts/answer/6260341
 // https://www.google.com/schemas/play-podcasts/1.0/play-podcasts.xsd
-type GooglePlayFeedElement struct {
+type GooglePlayChannelElement struct {
 	GooglePlayAuthor      string                    `xml:"http://www.google.com/schemas/play-podcasts/1.0 author"`
 	GooglePlayEmail       string                    `xml:"http://www.google.com/schemas/play-podcasts/1.0 email"`
 	GooglePlayImage       GooglePlayImageElement    `xml:"http://www.google.com/schemas/play-podcasts/1.0 image"`
--- a/internal/reader/itunes/itunes.go
+++ b/internal/reader/itunes/itunes.go
@ -6,7 +6,7 @@ package itunes // import "miniflux.app/v2/internal/reader/itunes"
 import "strings"

 // Specs: https://help.apple.com/itc/podcasts_connect/#/itcb54353390
-type ItunesFeedElement struct {
+type ItunesChannelElement struct {
 	ItunesAuthor     string                  `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"`
 	ItunesBlock      string                  `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd block"`
 	ItunesCategories []ItunesCategoryElement `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd category"`
@ -22,7 +22,7 @@ type ItunesFeedElement struct {
 	ItunesType       string                  `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd type"`
 }

-func (i *ItunesFeedElement) GetItunesCategories() []string {
+func (i *ItunesChannelElement) GetItunesCategories() []string {
 	var categories []string
 	for _, category := range i.ItunesCategories {
 		categories = append(categories, category.Text)
--- a/internal/reader/media/media.go
+++ b/internal/reader/media/media.go
@ -11,9 +11,8 @@ import (

 var textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)

-// Element represents XML media elements.
 // Specs: https://www.rssboard.org/media-rss
-type Element struct {
+type MediaItemElement struct {
 	MediaGroups       []Group         `xml:"http://search.yahoo.com/mrss/ group"`
 	MediaContents     []Content       `xml:"http://search.yahoo.com/mrss/ content"`
 	MediaThumbnails   []Thumbnail     `xml:"http://search.yahoo.com/mrss/ thumbnail"`
@ -22,7 +21,7 @@ type Element struct {
 }

 // AllMediaThumbnails returns all thumbnail elements merged together.
-func (e *Element) AllMediaThumbnails() []Thumbnail {
+func (e *MediaItemElement) AllMediaThumbnails() []Thumbnail {
 	var items []Thumbnail
 	items = append(items, e.MediaThumbnails...)
 	for _, mediaGroup := range e.MediaGroups {
@ -32,7 +31,7 @@ func (e *Element) AllMediaThumbnails() []Thumbnail {
 }

 // AllMediaContents returns all content elements merged together.
-func (e *Element) AllMediaContents() []Content {
+func (e *MediaItemElement) AllMediaContents() []Content {
 	var items []Content
 	items = append(items, e.MediaContents...)
 	for _, mediaGroup := range e.MediaGroups {
@ -42,7 +41,7 @@ func (e *Element) AllMediaContents() []Content {
 }

 // AllMediaPeerLinks returns all peer link elements merged together.
-func (e *Element) AllMediaPeerLinks() []PeerLink {
+func (e *MediaItemElement) AllMediaPeerLinks() []PeerLink {
 	var items []PeerLink
 	items = append(items, e.MediaPeerLinks...)
 	for _, mediaGroup := range e.MediaGroups {
@ -52,7 +51,7 @@ func (e *Element) AllMediaPeerLinks() []PeerLink {
 }

 // FirstMediaDescription returns the first description element.
-func (e *Element) FirstMediaDescription() string {
+func (e *MediaItemElement) FirstMediaDescription() string {
 	description := e.MediaDescriptions.First()
 	if description != "" {
 		return description
--- a/internal/reader/rdf/adapter.go
+++ b/internal/reader/rdf/adapter.go
@ -28,15 +28,14 @@ func (r *RDFAdapter) BuildFeed(feedURL string) *model.Feed {
 	feed := &model.Feed{
 		Title:   stripTags(r.rdf.Channel.Title),
 		FeedURL: feedURL,
+		SiteURL: r.rdf.Channel.Link,
 	}

 	if feed.Title == "" {
 		feed.Title = feedURL
 	}

-	if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err != nil {
-		feed.SiteURL = r.rdf.Channel.Link
-	} else {
+	if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err == nil {
 		feed.SiteURL = siteURL
 	}

--- a/internal/reader/rss/adapter.go
+++ b/internal/reader/rss/adapter.go
@ -0,0 +1,310 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package rss // import "miniflux.app/v2/internal/reader/rss"
+
+import (
+	"html"
+	"log/slog"
+	"path"
+	"strconv"
+	"strings"
+	"time"
+
+	"miniflux.app/v2/internal/crypto"
+	"miniflux.app/v2/internal/model"
+	"miniflux.app/v2/internal/reader/date"
+	"miniflux.app/v2/internal/reader/sanitizer"
+	"miniflux.app/v2/internal/urllib"
+)
+
+type RSSAdapter struct {
+	rss *RSS
+}
+
+func NewRSSAdapter(rss *RSS) *RSSAdapter {
+	return &RSSAdapter{rss}
+}
+
+func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
+	feed := &model.Feed{
+		Title:   html.UnescapeString(strings.TrimSpace(r.rss.Channel.Title)),
+		FeedURL: feedURL,
+		SiteURL: r.rss.Channel.Link,
+	}
+
+	if siteURL, err := urllib.AbsoluteURL(feedURL, r.rss.Channel.Link); err == nil {
+		feed.SiteURL = siteURL
+	}
+
+	// Try to find the feed URL from the Atom links.
+	for _, atomLink := range r.rss.Channel.AtomLinks.Links {
+		atomLinkHref := strings.TrimSpace(atomLink.URL)
+		if atomLinkHref != "" && atomLink.Rel == "self" {
+			if absoluteFeedURL, err := urllib.AbsoluteURL(feedURL, atomLinkHref); err == nil {
+				feed.FeedURL = absoluteFeedURL
+				break
+			}
+		}
+	}
+
+	// Fallback to the site URL if the title is empty.
+	if feed.Title == "" {
+		feed.Title = feed.SiteURL
+	}
+
+	// Get TTL if defined.
+	if r.rss.Channel.TTL != "" {
+		if ttl, err := strconv.Atoi(r.rss.Channel.TTL); err == nil {
+			feed.TTL = ttl
+		}
+	}
+
+	// Get the feed icon URL if defined.
+	if r.rss.Channel.Image != nil {
+		if absoluteIconURL, err := urllib.AbsoluteURL(feed.SiteURL, r.rss.Channel.Image.URL); err == nil {
+			feed.IconURL = absoluteIconURL
+		}
+	}
+
+	for _, item := range r.rss.Channel.Items {
+		entry := model.NewEntry()
+		entry.Author = findEntryAuthor(&item)
+		entry.Date = findEntryDate(&item)
+		entry.Content = findEntryContent(&item)
+		entry.Enclosures = findEntryEnclosures(&item)
+
+		// Populate the entry URL.
+		entryURL := findEntryURL(&item)
+		if entryURL == "" {
+			entry.URL = feed.SiteURL
+		} else {
+			if absoluteEntryURL, err := urllib.AbsoluteURL(feed.SiteURL, entryURL); err == nil {
+				entry.URL = absoluteEntryURL
+			} else {
+				entry.URL = entryURL
+			}
+		}
+
+		// Populate the entry title.
+		entry.Title = findEntryTitle(&item)
+		if entry.Title == "" {
+			entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
+		}
+
+		if entry.Title == "" {
+			entry.Title = entry.URL
+		}
+
+		if entry.Author == "" {
+			entry.Author = findFeedAuthor(&r.rss.Channel)
+		}
+
+		// Generate the entry hash.
+		for _, value := range []string{item.GUID.Data, entryURL} {
+			if value != "" {
+				entry.Hash = crypto.Hash(value)
+				break
+			}
+		}
+
+		// Find CommentsURL if defined.
+		if absoluteCommentsURL := strings.TrimSpace(item.CommentsURL); absoluteCommentsURL != "" && urllib.IsAbsoluteURL(absoluteCommentsURL) {
+			entry.CommentsURL = absoluteCommentsURL
+		}
+
+		// Set podcast listening time.
+		if item.ItunesDuration != "" {
+			if duration, err := getDurationInMinutes(item.ItunesDuration); err == nil {
+				entry.ReadingTime = duration
+			}
+		}
+
+		// Populate entry categories.
+		entry.Tags = append(entry.Tags, item.Categories...)
+		entry.Tags = append(entry.Tags, r.rss.Channel.Categories...)
+		entry.Tags = append(entry.Tags, r.rss.Channel.GetItunesCategories()...)
+
+		if r.rss.Channel.GooglePlayCategory.Text != "" {
+			entry.Tags = append(entry.Tags, r.rss.Channel.GooglePlayCategory.Text)
+		}
+
+		feed.Entries = append(feed.Entries, entry)
+	}
+
+	return feed
+}
+
+func findFeedAuthor(rssChannel *RSSChannel) string {
+	var author string
+	switch {
+	case rssChannel.ItunesAuthor != "":
+		author = rssChannel.ItunesAuthor
+	case rssChannel.GooglePlayAuthor != "":
+		author = rssChannel.GooglePlayAuthor
+	case rssChannel.ItunesOwner.String() != "":
+		author = rssChannel.ItunesOwner.String()
+	case rssChannel.ManagingEditor != "":
+		author = rssChannel.ManagingEditor
+	case rssChannel.Webmaster != "":
+		author = rssChannel.Webmaster
+	}
+	return sanitizer.StripTags(strings.TrimSpace(author))
+}
+
+func findEntryTitle(rssItem *RSSItem) string {
+	title := rssItem.Title
+
+	if rssItem.DublinCoreTitle != "" {
+		title = rssItem.DublinCoreTitle
+	}
+
+	return html.UnescapeString(strings.TrimSpace(title))
+}
+
+func findEntryURL(rssItem *RSSItem) string {
+	for _, link := range []string{rssItem.FeedBurnerLink, rssItem.Link} {
+		if link != "" {
+			return strings.TrimSpace(link)
+		}
+	}
+
+	for _, atomLink := range rssItem.AtomLinks.Links {
+		if atomLink.URL != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
+			return strings.TrimSpace(atomLink.URL)
+		}
+	}
+
+	// Specs: https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
+	// isPermaLink is optional, its default value is true.
+	// If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
+	if rssItem.GUID.IsPermaLink == "true" || rssItem.GUID.IsPermaLink == "" {
+		return strings.TrimSpace(rssItem.GUID.Data)
+	}
+
+	return ""
+}
+
+func findEntryContent(rssItem *RSSItem) string {
+	for _, value := range []string{
+		rssItem.DublinCoreContent,
+		rssItem.Description,
+		rssItem.GooglePlayDescription,
+		rssItem.ItunesSummary,
+		rssItem.ItunesSubtitle,
+	} {
+		if value != "" {
+			return value
+		}
+	}
+	return ""
+}
+
+func findEntryDate(rssItem *RSSItem) time.Time {
+	value := rssItem.PubDate
+	if rssItem.DublinCoreDate != "" {
+		value = rssItem.DublinCoreDate
+	}
+
+	if value != "" {
+		result, err := date.Parse(value)
+		if err != nil {
+			slog.Debug("Unable to parse date from RSS feed",
+				slog.String("date", value),
+				slog.String("guid", rssItem.GUID.Data),
+				slog.Any("error", err),
+			)
+			return time.Now()
+		}
+
+		return result
+	}
+
+	return time.Now()
+}
+
+func findEntryAuthor(rssItem *RSSItem) string {
+	var author string
+
+	switch {
+	case rssItem.GooglePlayAuthor != "":
+		author = rssItem.GooglePlayAuthor
+	case rssItem.ItunesAuthor != "":
+		author = rssItem.ItunesAuthor
+	case rssItem.DublinCoreCreator != "":
+		author = rssItem.DublinCoreCreator
+	case rssItem.AtomAuthor.String() != "":
+		author = rssItem.AtomAuthor.String()
+	case strings.Contains(rssItem.Author.Inner, "<![CDATA["):
+		author = rssItem.Author.Data
+	default:
+		author = rssItem.Author.Inner
+	}
+
+	return strings.TrimSpace(sanitizer.StripTags(author))
+}
+
+func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList {
+	enclosures := make(model.EnclosureList, 0)
+	duplicates := make(map[string]bool)
+
+	for _, mediaThumbnail := range rssItem.AllMediaThumbnails() {
+		if _, found := duplicates[mediaThumbnail.URL]; !found {
+			duplicates[mediaThumbnail.URL] = true
+			enclosures = append(enclosures, &model.Enclosure{
+				URL:      mediaThumbnail.URL,
+				MimeType: mediaThumbnail.MimeType(),
+				Size:     mediaThumbnail.Size(),
+			})
+		}
+	}
+
+	for _, enclosure := range rssItem.Enclosures {
+		enclosureURL := enclosure.URL
+
+		if rssItem.FeedBurnerEnclosureLink != "" {
+			filename := path.Base(rssItem.FeedBurnerEnclosureLink)
+			if strings.Contains(enclosureURL, filename) {
+				enclosureURL = rssItem.FeedBurnerEnclosureLink
+			}
+		}
+
+		if enclosureURL == "" {
+			continue
+		}
+
+		if _, found := duplicates[enclosureURL]; !found {
+			duplicates[enclosureURL] = true
+
+			enclosures = append(enclosures, &model.Enclosure{
+				URL:      enclosureURL,
+				MimeType: enclosure.Type,
+				Size:     enclosure.Size(),
+			})
+		}
+	}
+
+	for _, mediaContent := range rssItem.AllMediaContents() {
+		if _, found := duplicates[mediaContent.URL]; !found {
+			duplicates[mediaContent.URL] = true
+			enclosures = append(enclosures, &model.Enclosure{
+				URL:      mediaContent.URL,
+				MimeType: mediaContent.MimeType(),
+				Size:     mediaContent.Size(),
+			})
+		}
+	}
+
+	for _, mediaPeerLink := range rssItem.AllMediaPeerLinks() {
+		if _, found := duplicates[mediaPeerLink.URL]; !found {
+			duplicates[mediaPeerLink.URL] = true
+			enclosures = append(enclosures, &model.Enclosure{
+				URL:      mediaPeerLink.URL,
+				MimeType: mediaPeerLink.MimeType(),
+				Size:     mediaPeerLink.Size(),
+			})
+		}
+	}
+
+	return enclosures
+}
--- a/internal/reader/rss/feedburner.go
+++ b/internal/reader/rss/feedburner.go
@ -3,8 +3,8 @@

 package rss // import "miniflux.app/v2/internal/reader/rss"

-// FeedBurnerElement represents FeedBurner XML elements.
-type FeedBurnerElement struct {
+// FeedBurnerItemElement represents FeedBurner XML elements.
+type FeedBurnerItemElement struct {
 	FeedBurnerLink          string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
 	FeedBurnerEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
 }
--- a/internal/reader/rss/parser.go
+++ b/internal/reader/rss/parser.go
@ -13,11 +13,11 @@ import (

 // Parse returns a normalized feed struct from a RSS feed.
 func Parse(baseURL string, data io.ReadSeeker) (*model.Feed, error) {
-	feed := new(rssFeed)
+	rssFeed := new(RSS)
 	decoder := xml.NewXMLDecoder(data)
 	decoder.DefaultSpace = "rss"
-	if err := decoder.Decode(feed); err != nil {
+	if err := decoder.Decode(rssFeed); err != nil {
 		return nil, fmt.Errorf("rss: unable to parse feed: %w", err)
 	}
-	return feed.Transform(baseURL), nil
+	return NewRSSAdapter(rssFeed).BuildFeed(baseURL), nil
 }
--- a/internal/reader/rss/parser_test.go
+++ b/internal/reader/rss/parser_test.go
@ -846,6 +846,59 @@ func TestParseEntryWithEnclosures(t *testing.T) {
 	}
 }

+func TestParseEntryWithIncorrectEnclosureLength(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0">
+		<channel>
+		<title>My Podcast Feed</title>
+		<link>http://example.org</link>
+		<author>some.email@example.org</author>
+		<item>
+			<title>Podcasting with RSS</title>
+			<link>http://www.example.org/entries/1</link>
+			<description>An overview of RSS podcasting</description>
+			<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
+			<guid isPermaLink="true">http://www.example.org/entries/1</guid>
+			<enclosure url="http://www.example.org/myaudiofile.mp3" length="invalid" type="audio/mpeg" />
+			<enclosure url="http://www.example.org/myaudiofile.wav" length=" " type="audio" />
+		</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(feed.Entries) != 1 {
+		t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
+	}
+
+	if feed.Entries[0].URL != "http://www.example.org/entries/1" {
+		t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
+	}
+
+	if len(feed.Entries[0].Enclosures) != 2 {
+		t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
+	}
+
+	if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
+		t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
+	}
+
+	if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
+		t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
+	}
+
+	if feed.Entries[0].Enclosures[0].Size != 0 {
+		t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+	}
+
+	if feed.Entries[0].Enclosures[1].Size != 0 {
+		t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
+	}
+}
+
 func TestParseEntryWithEmptyEnclosureURL(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss version="2.0">
@ -1306,6 +1359,60 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) {
 	}
 }

+func TestParseItunesDuration(t *testing.T) {
+	data := `<?xml version="1.0" encoding="UTF-8"?>
+		<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
+		<channel>
+			<title>Podcast Example</title>
+			<link>http://www.example.com/index.html</link>
+			<item>
+				<title>Podcast Episode</title>
+				<guid>http://example.com/episode.m4a</guid>
+				<pubDate>Tue, 08 Mar 2016 12:00:00 GMT</pubDate>
+				<itunes:duration>1:23:45</itunes:duration>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := 83
+	result := feed.Entries[0].ReadingTime
+	if expected != result {
+		t.Errorf(`Unexpected podcast duration, got %d instead of %d`, result, expected)
+	}
+}
+
+func TestParseIncorrectItunesDuration(t *testing.T) {
+	data := `<?xml version="1.0" encoding="UTF-8"?>
+		<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
+		<channel>
+			<title>Podcast Example</title>
+			<link>http://www.example.com/index.html</link>
+			<item>
+				<title>Podcast Episode</title>
+				<guid>http://example.com/episode.m4a</guid>
+				<pubDate>Tue, 08 Mar 2016 12:00:00 GMT</pubDate>
+				<itunes:duration>invalid</itunes:duration>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	expected := 0
+	result := feed.Entries[0].ReadingTime
+	if expected != result {
+		t.Errorf(`Unexpected podcast duration, got %d instead of %d`, result, expected)
+	}
+}
+
 func TestEntryDescriptionFromItunesSummary(t *testing.T) {
 	data := `<?xml version="1.0" encoding="UTF-8"?>
 	<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
--- a/internal/reader/rss/podcast.go
+++ b/internal/reader/rss/podcast.go
@ -12,8 +12,7 @@ import (

 var ErrInvalidDurationFormat = errors.New("rss: invalid duration format")

-// normalizeDuration returns the duration tag value as a number of minutes
-func normalizeDuration(rawDuration string) (int, error) {
+func getDurationInMinutes(rawDuration string) (int, error) {
 	var sumSeconds int

 	durationParts := strings.Split(rawDuration, ":")
--- a/internal/reader/rss/rss.go
+++ b/internal/reader/rss/rss.go
@ -5,391 +5,110 @@ package rss // import "miniflux.app/v2/internal/reader/rss"

 import (
 	"encoding/xml"
-	"html"
-	"log/slog"
-	"path"
 	"strconv"
 	"strings"
-	"time"

-	"miniflux.app/v2/internal/crypto"
-	"miniflux.app/v2/internal/model"
-	"miniflux.app/v2/internal/reader/date"
 	"miniflux.app/v2/internal/reader/dublincore"
 	"miniflux.app/v2/internal/reader/googleplay"
 	"miniflux.app/v2/internal/reader/itunes"
 	"miniflux.app/v2/internal/reader/media"
-	"miniflux.app/v2/internal/reader/sanitizer"
-	"miniflux.app/v2/internal/urllib"
 )

 // Specs: https://www.rssboard.org/rss-specification
-type rssFeed struct {
-	XMLName xml.Name   `xml:"rss"`
+type RSS struct {
 	Version string     `xml:"rss version,attr"`
-	Channel rssChannel `xml:"rss channel"`
+	Channel RSSChannel `xml:"rss channel"`
 }

-type rssChannel struct {
-	Categories     []string  `xml:"rss category"`
+type RSSChannel struct {
 	Title          string    `xml:"rss title"`
 	Link           string    `xml:"rss link"`
-	ImageURL       string    `xml:"rss image>url"`
-	Language       string    `xml:"rss language"`
 	Description    string    `xml:"rss description"`
-	PubDate        string    `xml:"rss pubDate"`
+	Language       string    `xml:"rss language"`
+	Copyright      string    `xml:"rss copyRight"`
 	ManagingEditor string    `xml:"rss managingEditor"`
 	Webmaster      string    `xml:"rss webMaster"`
-	TimeToLive     rssTTL    `xml:"rss ttl"`
-	Items          []rssItem `xml:"rss item"`
+	PubDate        string    `xml:"rss pubDate"`
+	LastBuildDate  string    `xml:"rss lastBuildDate"`
+	Categories     []string  `xml:"rss category"`
+	Generator      string    `xml:"rss generator"`
+	Docs           string    `xml:"rss docs"`
+	Cloud          *RSSCloud `xml:"rss cloud"`
+	Image          *RSSImage `xml:"rss image"`
+	TTL            string    `xml:"rss ttl"`
+	SkipHours      []string  `xml:"rss skipHours>hour"`
+	SkipDays       []string  `xml:"rss skipDays>day"`
+	Items          []RSSItem `xml:"rss item"`
 	AtomLinks
-	itunes.ItunesFeedElement
-	googleplay.GooglePlayFeedElement
+	itunes.ItunesChannelElement
+	googleplay.GooglePlayChannelElement
 }

-type rssTTL struct {
-	Data string `xml:",chardata"`
+type RSSCloud struct {
+	Domain            string `xml:"domain,attr"`
+	Port              string `xml:"port,attr"`
+	Path              string `xml:"path,attr"`
+	RegisterProcedure string `xml:"registerProcedure,attr"`
+	Protocol          string `xml:"protocol,attr"`
 }

-func (r *rssTTL) Value() int {
-	if r.Data == "" {
-		return 0
-	}
+type RSSImage struct {
+	// URL is the URL of a GIF, JPEG or PNG image that represents the channel.
+	URL string `xml:"url"`

-	value, err := strconv.Atoi(r.Data)
-	if err != nil {
-		return 0
-	}
+	// Title describes the image, it's used in the ALT attribute of the HTML <img> tag when the channel is rendered in HTML.
+	Title string `xml:"title"`

-	return value
+	// Link is the URL of the site, when the channel is rendered, the image is a link to the site.
+	Link string `xml:"link"`
 }

-func (r *rssFeed) Transform(baseURL string) *model.Feed {
-	var err error
-
-	feed := new(model.Feed)
-
-	siteURL := r.siteURL()
-	feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL)
-	if err != nil {
-		feed.SiteURL = siteURL
-	}
-
-	feedURL := r.feedURL()
-	feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL)
-	if err != nil {
-		feed.FeedURL = feedURL
-	}
-
-	feed.Title = html.UnescapeString(strings.TrimSpace(r.Channel.Title))
-	if feed.Title == "" {
-		feed.Title = feed.SiteURL
-	}
-
-	feed.IconURL = strings.TrimSpace(r.Channel.ImageURL)
-	feed.TTL = r.Channel.TimeToLive.Value()
-
-	for _, item := range r.Channel.Items {
-		entry := item.Transform()
-		if entry.Author == "" {
-			entry.Author = r.feedAuthor()
-		}
-
-		if entry.URL == "" {
-			entry.URL = feed.SiteURL
-		} else {
-			entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL)
-			if err == nil {
-				entry.URL = entryURL
-			}
-		}
-
-		if entry.Title == "" {
-			entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
-		}
-
-		if entry.Title == "" {
-			entry.Title = entry.URL
-		}
-
-		entry.Tags = append(entry.Tags, r.Channel.Categories...)
-		entry.Tags = append(entry.Tags, r.Channel.GetItunesCategories()...)
-
-		if r.Channel.GooglePlayCategory.Text != "" {
-			entry.Tags = append(entry.Tags, r.Channel.GooglePlayCategory.Text)
-		}
-
-		feed.Entries = append(feed.Entries, entry)
-	}
-
-	return feed
-}
-
-func (r *rssFeed) siteURL() string {
-	return strings.TrimSpace(r.Channel.Link)
-}
-
-func (r *rssFeed) feedURL() string {
-	for _, atomLink := range r.Channel.AtomLinks.Links {
-		if atomLink.Rel == "self" {
-			return strings.TrimSpace(atomLink.URL)
-		}
-	}
-	return ""
-}
-
-func (r rssFeed) feedAuthor() string {
-	var author string
-	switch {
-	case r.Channel.ItunesAuthor != "":
-		author = r.Channel.ItunesAuthor
-	case r.Channel.GooglePlayAuthor != "":
-		author = r.Channel.GooglePlayAuthor
-	case r.Channel.ItunesOwner.String() != "":
-		author = r.Channel.ItunesOwner.String()
-	case r.Channel.ManagingEditor != "":
-		author = r.Channel.ManagingEditor
-	case r.Channel.Webmaster != "":
-		author = r.Channel.Webmaster
-	}
-	return sanitizer.StripTags(strings.TrimSpace(author))
-}
-
-type rssGUID struct {
-	XMLName     xml.Name
-	Data        string `xml:",chardata"`
-	IsPermaLink string `xml:"isPermaLink,attr"`
-}
-
-type rssAuthor struct {
-	XMLName xml.Name
-	Data    string `xml:",chardata"`
-	Inner   string `xml:",innerxml"`
-}
-
-type rssEnclosure struct {
-	URL    string `xml:"url,attr"`
-	Type   string `xml:"type,attr"`
-	Length string `xml:"length,attr"`
-}
-
-func (enclosure *rssEnclosure) Size() int64 {
-	if enclosure.Length == "" {
-		return 0
-	}
-	size, _ := strconv.ParseInt(enclosure.Length, 10, 0)
-	return size
-}
-
-type rssItem struct {
-	GUID           rssGUID        `xml:"rss guid"`
-	Title          string         `xml:"rss title"`
-	Link           string         `xml:"rss link"`
-	Description    string         `xml:"rss description"`
-	PubDate        string         `xml:"rss pubDate"`
-	Author         rssAuthor      `xml:"rss author"`
-	Comments       string         `xml:"rss comments"`
-	EnclosureLinks []rssEnclosure `xml:"rss enclosure"`
-	Categories     []string       `xml:"rss category"`
+type RSSItem struct {
+	Title       string         `xml:"rss title"`
+	Link        string         `xml:"rss link"`
+	Description string         `xml:"rss description"`
+	Author      RSSAuthor      `xml:"rss author"`
+	Categories  []string       `xml:"rss category"`
+	CommentsURL string         `xml:"rss comments"`
+	Enclosures  []RSSEnclosure `xml:"rss enclosure"`
+	GUID        RSSGUID        `xml:"rss guid"`
+	PubDate     string         `xml:"rss pubDate"`
+	Source      RSSSource      `xml:"rss source"`
 	dublincore.DublinCoreItemElement
-	FeedBurnerElement
-	media.Element
+	FeedBurnerItemElement
+	media.MediaItemElement
 	AtomAuthor
 	AtomLinks
 	itunes.ItunesItemElement
 	googleplay.GooglePlayItemElement
 }

-func (r *rssItem) Transform() *model.Entry {
-	entry := model.NewEntry()
-	entry.URL = r.entryURL()
-	entry.CommentsURL = r.entryCommentsURL()
-	entry.Date = r.entryDate()
-	entry.Author = r.entryAuthor()
-	entry.Hash = r.entryHash()
-	entry.Content = r.entryContent()
-	entry.Title = r.entryTitle()
-	entry.Enclosures = r.entryEnclosures()
-	entry.Tags = r.Categories
-	if duration, err := normalizeDuration(r.ItunesDuration); err == nil {
-		entry.ReadingTime = duration
-	}
-
-	return entry
+type RSSAuthor struct {
+	XMLName xml.Name
+	Data    string `xml:",chardata"`
+	Inner   string `xml:",innerxml"`
 }

-func (r *rssItem) entryDate() time.Time {
-	value := r.PubDate
-	if r.DublinCoreDate != "" {
-		value = r.DublinCoreDate
-	}
-
-	if value != "" {
-		result, err := date.Parse(value)
-		if err != nil {
-			slog.Debug("Unable to parse date from RSS feed",
-				slog.String("date", value),
-				slog.String("guid", r.GUID.Data),
-				slog.Any("error", err),
-			)
-			return time.Now()
-		}
-
-		return result
-	}
-
-	return time.Now()
+type RSSEnclosure struct {
+	URL    string `xml:"url,attr"`
+	Type   string `xml:"type,attr"`
+	Length string `xml:"length,attr"`
 }

-func (r *rssItem) entryAuthor() string {
-	var author string
-
-	switch {
-	case r.GooglePlayAuthor != "":
-		author = r.GooglePlayAuthor
-	case r.ItunesAuthor != "":
-		author = r.ItunesAuthor
-	case r.DublinCoreCreator != "":
-		author = r.DublinCoreCreator
-	case r.AtomAuthor.String() != "":
-		author = r.AtomAuthor.String()
-	case strings.Contains(r.Author.Inner, "<![CDATA["):
-		author = r.Author.Data
-	default:
-		author = r.Author.Inner
+func (enclosure *RSSEnclosure) Size() int64 {
+	if strings.TrimSpace(enclosure.Length) == "" {
+		return 0
 	}
-
-	return strings.TrimSpace(sanitizer.StripTags(author))
+	size, _ := strconv.ParseInt(enclosure.Length, 10, 0)
+	return size
 }

-func (r *rssItem) entryHash() string {
-	for _, value := range []string{r.GUID.Data, r.entryURL()} {
-		if value != "" {
-			return crypto.Hash(value)
-		}
-	}
-
-	return ""
+type RSSGUID struct {
+	Data        string `xml:",chardata"`
+	IsPermaLink string `xml:"isPermaLink,attr"`
 }

-func (r *rssItem) entryTitle() string {
-	title := r.Title
-
-	if r.DublinCoreTitle != "" {
-		title = r.DublinCoreTitle
-	}
-
-	return html.UnescapeString(strings.TrimSpace(title))
-}
-
-func (r *rssItem) entryContent() string {
-	for _, value := range []string{
-		r.DublinCoreContent,
-		r.Description,
-		r.GooglePlayDescription,
-		r.ItunesSummary,
-		r.ItunesSubtitle,
-	} {
-		if value != "" {
-			return value
-		}
-	}
-	return ""
-}
-
-func (r *rssItem) entryURL() string {
-	for _, link := range []string{r.FeedBurnerLink, r.Link} {
-		if link != "" {
-			return strings.TrimSpace(link)
-		}
-	}
-
-	for _, atomLink := range r.AtomLinks.Links {
-		if atomLink.URL != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
-			return strings.TrimSpace(atomLink.URL)
-		}
-	}
-
-	// Specs: https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
-	// isPermaLink is optional, its default value is true.
-	// If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
-	if r.GUID.IsPermaLink == "true" || r.GUID.IsPermaLink == "" {
-		return strings.TrimSpace(r.GUID.Data)
-	}
-
-	return ""
-}
-
-func (r *rssItem) entryEnclosures() model.EnclosureList {
-	enclosures := make(model.EnclosureList, 0)
-	duplicates := make(map[string]bool)
-
-	for _, mediaThumbnail := range r.AllMediaThumbnails() {
-		if _, found := duplicates[mediaThumbnail.URL]; !found {
-			duplicates[mediaThumbnail.URL] = true
-			enclosures = append(enclosures, &model.Enclosure{
-				URL:      mediaThumbnail.URL,
-				MimeType: mediaThumbnail.MimeType(),
-				Size:     mediaThumbnail.Size(),
-			})
-		}
-	}
-
-	for _, enclosure := range r.EnclosureLinks {
-		enclosureURL := enclosure.URL
-
-		if r.FeedBurnerEnclosureLink != "" {
-			filename := path.Base(r.FeedBurnerEnclosureLink)
-			if strings.Contains(enclosureURL, filename) {
-				enclosureURL = r.FeedBurnerEnclosureLink
-			}
-		}
-
-		if enclosureURL == "" {
-			continue
-		}
-
-		if _, found := duplicates[enclosureURL]; !found {
-			duplicates[enclosureURL] = true
-
-			enclosures = append(enclosures, &model.Enclosure{
-				URL:      enclosureURL,
-				MimeType: enclosure.Type,
-				Size:     enclosure.Size(),
-			})
-		}
-	}
-
-	for _, mediaContent := range r.AllMediaContents() {
-		if _, found := duplicates[mediaContent.URL]; !found {
-			duplicates[mediaContent.URL] = true
-			enclosures = append(enclosures, &model.Enclosure{
-				URL:      mediaContent.URL,
-				MimeType: mediaContent.MimeType(),
-				Size:     mediaContent.Size(),
-			})
-		}
-	}
-
-	for _, mediaPeerLink := range r.AllMediaPeerLinks() {
-		if _, found := duplicates[mediaPeerLink.URL]; !found {
-			duplicates[mediaPeerLink.URL] = true
-			enclosures = append(enclosures, &model.Enclosure{
-				URL:      mediaPeerLink.URL,
-				MimeType: mediaPeerLink.MimeType(),
-				Size:     mediaPeerLink.Size(),
-			})
-		}
-	}
-
-	return enclosures
-}
-
-func (r *rssItem) entryCommentsURL() string {
-	commentsURL := strings.TrimSpace(r.Comments)
-	if commentsURL != "" && urllib.IsAbsoluteURL(commentsURL) {
-		return commentsURL
-	}
-
-	return ""
+type RSSSource struct {
+	URL  string `xml:"url,attr"`
+	Name string `xml:",chardata"`
 }