From e02e1c6afda26554d2cd5bcb386a0cf582f973bf Mon Sep 17 00:00:00 2001 From: René 'Necoro' Neumann Date: Thu, 7 May 2020 22:04:27 +0200 Subject: Better detection if a text starts with html or not --- internal/feed/mail.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'internal/feed/mail.go') diff --git a/internal/feed/mail.go b/internal/feed/mail.go index 0a8bd10..f77987e 100644 --- a/internal/feed/mail.go +++ b/internal/feed/mail.go @@ -16,6 +16,7 @@ import ( "github.com/emersion/go-message" "github.com/emersion/go-message/mail" "github.com/gabriel-vasile/mimetype" + "golang.org/x/net/html" "github.com/Necoro/feed2imap-go/internal/feed/template" "github.com/Necoro/feed2imap-go/internal/http" @@ -240,6 +241,13 @@ func getBody(content, description string, bodyCfg config.Body) string { } } +func startsWithText(str string) bool { + reader := strings.NewReader(str) + tokenizer := html.NewTokenizerFragment(reader, "") + + return tokenizer.Next() == html.TextToken +} + func (item *item) buildBody() { feed := item.feed feedUrl, err := url.Parse(feed.Url) @@ -248,6 +256,9 @@ func (item *item) buildBody() { } body := getBody(item.Content, item.Description, feed.Body) + if body != "" && startsWithText(body) { + body = "
" + body + } if !feed.InclImages { item.Body = body @@ -257,9 +268,7 @@ func (item *item) buildBody() { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) if err != nil { log.Errorf("Feed %s: Item %s: Error while parsing html content: %s", feed.Name, item.Link, err) - if body != "" { - item.Body = "
" + body - } + item.Body = body return } -- cgit v1.2.3-54-g00ecf