diff --git a/reader/scraper/scraper.go b/reader/scraper/scraper.go index 9044a1cb..81ebd8c8 100644 --- a/reader/scraper/scraper.go +++ b/reader/scraper/scraper.go @@ -21,14 +21,6 @@ import ( // Fetch downloads a web page and returns relevant contents. func Fetch(websiteURL, rules, userAgent string, cookie string, allowSelfSignedCertificates, useProxy bool) (string, error) { - content, err := fetchURL(websiteURL, rules, userAgent, cookie, allowSelfSignedCertificates, useProxy) - if err != nil { - return "", err - } - return followTheOnlyLink(websiteURL, content, rules, userAgent, cookie, allowSelfSignedCertificates, useProxy) -} - -func fetchURL(websiteURL, rules, userAgent string, cookie string, allowSelfSignedCertificates, useProxy bool) (string, error) { clt := client.NewClientWithConfig(websiteURL, config.Opts) clt.WithUserAgent(userAgent) clt.WithCookie(cookie) @@ -54,8 +46,8 @@ func fetchURL(websiteURL, rules, userAgent string, cookie string, allowSelfSigne return "", err } - sameSite := url.Domain(websiteURL) == url.Domain(response.EffectiveURL) // The entry URL could redirect somewhere else. + sameSite := url.Domain(websiteURL) == url.Domain(response.EffectiveURL) websiteURL = response.EffectiveURL if rules == "" { @@ -112,45 +104,3 @@ func isAllowedContentType(contentType string) bool { return strings.HasPrefix(contentType, "text/html") || strings.HasPrefix(contentType, "application/xhtml+xml") } - -func followTheOnlyLink(websiteURL, content string, rules, userAgent string, cookie string, allowSelfSignedCertificates, useProxy bool) (string, error) { - document, err := goquery.NewDocumentFromReader(strings.NewReader(content)) - if err != nil { - return "", err - } - - if len(document.Find("body").Nodes) == 0 { - return content, nil - } - - body := document.Find("body").Nodes[0] - if body.FirstChild.NextSibling != nil || - body.FirstChild.Data != "a" { - return content, nil - } - - // the body has only one child of - var href string - for _, attr := range body.FirstChild.Attr { - if attr.Key == "href" { - href = attr.Val - break - } - } - - if href == "" { - return content, nil - } - - href, err = url.AbsoluteURL(websiteURL, href) - if err != nil { - return "", err - } - - sameSite := url.Domain(websiteURL) == url.Domain(href) - if sameSite { - return fetchURL(href, rules, userAgent, cookie, allowSelfSignedCertificates, useProxy) - } - - return fetchURL(href, rules, userAgent, "", false, false) -}