aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRené 'Necoro' Neumann <necoro@necoro.eu>2020-05-07 22:04:27 +0200
committerRené 'Necoro' Neumann <necoro@necoro.eu>2020-05-07 22:04:27 +0200
commite02e1c6afda26554d2cd5bcb386a0cf582f973bf (patch)
tree07f629789cc037f2fdb37de83785e5c672d77d0e
parentdeb47998900a3b9caea4f1d36eaecfe3aba31e3d (diff)
downloadfeed2imap-go-e02e1c6afda26554d2cd5bcb386a0cf582f973bf.tar.gz
feed2imap-go-e02e1c6afda26554d2cd5bcb386a0cf582f973bf.tar.bz2
feed2imap-go-e02e1c6afda26554d2cd5bcb386a0cf582f973bf.zip
Better detection if a text starts with html or not
-rw-r--r--go.mod2
-rw-r--r--internal/feed/mail.go15
2 files changed, 13 insertions, 4 deletions
diff --git a/go.mod b/go.mod
index 0ff1224..1d1041d 100644
--- a/go.mod
+++ b/go.mod
@@ -11,6 +11,6 @@ require (
github.com/google/go-cmp v0.4.0
github.com/google/uuid v1.1.1
github.com/mmcdole/gofeed v1.0.0-beta2.0.20200331235650-4298e4366be3
- golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5 // indirect
+ golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c
)
diff --git a/internal/feed/mail.go b/internal/feed/mail.go
index 0a8bd10..f77987e 100644
--- a/internal/feed/mail.go
+++ b/internal/feed/mail.go
@@ -16,6 +16,7 @@ import (
"github.com/emersion/go-message"
"github.com/emersion/go-message/mail"
"github.com/gabriel-vasile/mimetype"
+ "golang.org/x/net/html"
"github.com/Necoro/feed2imap-go/internal/feed/template"
"github.com/Necoro/feed2imap-go/internal/http"
@@ -240,6 +241,13 @@ func getBody(content, description string, bodyCfg config.Body) string {
}
}
+func startsWithText(str string) bool {
+ reader := strings.NewReader(str)
+ tokenizer := html.NewTokenizerFragment(reader, "")
+
+ return tokenizer.Next() == html.TextToken
+}
+
func (item *item) buildBody() {
feed := item.feed
feedUrl, err := url.Parse(feed.Url)
@@ -248,6 +256,9 @@ func (item *item) buildBody() {
}
body := getBody(item.Content, item.Description, feed.Body)
+ if body != "" && startsWithText(body) {
+ body = "<br />" + body
+ }
if !feed.InclImages {
item.Body = body
@@ -257,9 +268,7 @@ func (item *item) buildBody() {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
if err != nil {
log.Errorf("Feed %s: Item %s: Error while parsing html content: %s", feed.Name, item.Link, err)
- if body != "" {
- item.Body = "<br />" + body
- }
+ item.Body = body
return
}