aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRené 'Necoro' Neumann <necoro@necoro.eu>2021-12-21 23:12:02 +0100
committerRené 'Necoro' Neumann <necoro@necoro.eu>2021-12-21 23:12:02 +0100
commit549fa5aaf56a23a090e27f273f15ee09dd0a37cf (patch)
treea0783309f9324d864c450767712ec62af822e37c
parentccbbff4774c57c77f66ea64351f7f46bd0db2d9f (diff)
downloadfeed2imap-go-549fa5aaf56a23a090e27f273f15ee09dd0a37cf.tar.gz
feed2imap-go-549fa5aaf56a23a090e27f273f15ee09dd0a37cf.tar.bz2
feed2imap-go-549fa5aaf56a23a090e27f273f15ee09dd0a37cf.zip
Resolve relative links, so that external clients can use absolute links.
Found in https://go.dev/blog/feed.atom
-rw-r--r--CHANGELOG.md2
-rw-r--r--internal/feed/mail.go89
2 files changed, 62 insertions, 29 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b77cf17..3a4297b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
+### Added
+- Make links absolute: relative links inside a feed cannot be resolved outside a webbrowser
## [1.3.0] - 2021-11-01
### Added
diff --git a/internal/feed/mail.go b/internal/feed/mail.go
index 2865800..6914b76 100644
--- a/internal/feed/mail.go
+++ b/internal/feed/mail.go
@@ -270,21 +270,31 @@ func getBody(content, description string, bodyCfg config.Body) string {
}
}
-func (item *Item) downloadImage(src string) string {
+func (item *Item) resolveUrl(otherUrlStr string) string {
feed := item.feed
+ feedUrl := feed.url()
+
+ if feedUrl == nil {
+ // no url, just return the original
+ return otherUrlStr
+ }
- imgUrl, err := url.Parse(src)
+ otherUrl, err := url.Parse(otherUrlStr)
if err != nil {
log.Errorf("Feed %s: Item %s: Error parsing URL '%s' embedded in item: %s",
- feed.Name, item.Link, src, err)
+ feed.Name, item.Link, otherUrlStr, err)
return ""
}
- if feedUrl := feed.url(); feedUrl != nil {
- imgUrl = feedUrl.ResolveReference(imgUrl)
- }
+ return feedUrl.ResolveReference(otherUrl).String()
+}
+
+func (item *Item) downloadImage(src string) string {
+ feed := item.feed
- img, mime, err := getImage(imgUrl.String(), feed.Global.Timeout, feed.NoTLS)
+ imgUrl := item.resolveUrl(src)
+
+ img, mime, err := getImage(imgUrl, feed.Global.Timeout, feed.NoTLS)
if err != nil {
log.Errorf("Feed %s: Item %s: Error fetching image: %s",
feed.Name, item.Link, err)
@@ -310,31 +320,63 @@ func (item *Item) downloadImage(src string) string {
func (item *Item) buildBody() {
feed := item.feed
- body := getBody(item.Content, item.Description, feed.Body)
- bodyNode, err := html.Parse(strings.NewReader(body))
+ item.Body = getBody(item.Content, item.Description, feed.Body)
+ bodyNode, err := html.Parse(strings.NewReader(item.Body))
if err != nil {
log.Errorf("Feed %s: Item %s: Error while parsing html: %s", feed.Name, item.Link, err)
- item.Body = body
- item.TextBody = body
+ item.TextBody = item.Body
return
}
+ doc := goquery.NewDocumentFromNode(bodyNode)
+ doneAnything := false
+
+ updateBody := func() {
+ if doneAnything {
+ html, err := goquery.OuterHtml(doc.Selection)
+ if err != nil {
+ item.clearImages()
+ log.Errorf("Feed %s: Item %s: Error during rendering HTML: %s",
+ feed.Name, item.Link, err)
+ } else {
+ item.Body = html
+ }
+ }
+ }
+
+ // make relative links absolute
+ doc.Find("a").Each(func(i int, selection *goquery.Selection) {
+ const attr = "href"
+
+ src, ok := selection.Attr(attr)
+ if !ok {
+ return
+ }
+
+ if src != "" && src[0] == '/' {
+ absUrl := item.resolveUrl(src)
+ selection.SetAttr(attr, absUrl)
+ doneAnything = true
+ }
+ })
+
if feed.Global.WithPartText() {
if item.TextBody, err = html2text.FromHTMLNode(bodyNode, html2text.Options{CitationStyleLinks: true}); err != nil {
log.Errorf("Feed %s: Item %s: Error while converting html to text: %s", feed.Name, item.Link, err)
}
}
- if !feed.InclImages || !feed.Global.WithPartHtml() || err != nil {
- item.Body = body
+ if !feed.Global.WithPartHtml() || err != nil {
return
}
- doc := goquery.NewDocumentFromNode(bodyNode)
+ if !feed.InclImages {
+ updateBody()
+ return
+ }
- doneAnything := true
- nodes := doc.Find("img")
- nodes.Each(func(i int, selection *goquery.Selection) {
+ // download images
+ doc.Find("img").Each(func(i int, selection *goquery.Selection) {
const attr = "src"
src, ok := selection.Attr(attr)
@@ -355,16 +397,5 @@ func (item *Item) buildBody() {
doneAnything = true
})
- if doneAnything {
- html, err := doc.Find("body").Html()
- if err != nil {
- item.clearImages()
- log.Errorf("Feed %s: Item %s: Error during rendering HTML: %s",
- feed.Name, item.Link, err)
- } else {
- body = html
- }
- }
-
- item.Body = body
+ updateBody()
}