diff options
-rw-r--r-- | README.md | 29 | ||||
-rw-r--r-- | go.mod | 1 | ||||
-rw-r--r-- | internal/feed/feed.go | 17 | ||||
-rw-r--r-- | internal/feed/mail.go | 184 | ||||
-rw-r--r-- | internal/feed/template/feed.tpl.go | 10 | ||||
-rw-r--r-- | pkg/config/config.go | 26 |
6 files changed, 220 insertions, 47 deletions
@@ -2,15 +2,20 @@ # feed2imap-go -A software to convert rss feeds into mails. feed2imap-go acts an an RSS/Atom feed aggregator. After downloading feeds (over HTTP or HTTPS), it uploads them to a specified folder of an IMAP mail server. The user can then access the feeds using their preferred client (Mutt, Evolution, Mozilla Thunderbird, webmail,...). +A software to convert rss feeds into mails. feed2imap-go acts an an RSS/Atom feed aggregator. After downloading feeds +(over HTTP or HTTPS), it uploads them to a specified folder of an IMAP mail server. The user can then access the feeds +using their preferred client (Mutt, Evolution, Mozilla Thunderbird, webmail,...). -It is a rewrite in Go of the wonderful, but unfortunately now unmaintained, [feed2imap](https://github.com/feed2imap/feed2imap). It also includes the features that up to now only lived on [my own branch][nec]. +It is a rewrite in Go of the wonderful, but unfortunately now unmaintained, [feed2imap](https://github.com/feed2imap/feed2imap). +It also includes the features that up to now only lived on [my own branch][nec]. -It aims to be compatible in functionality and configuration, and should mostly work as a drop-in replacement (but see [Changes](#changes)). +It aims to be compatible in functionality and configuration, and should mostly work as a drop-in replacement +(but see [Changes](#changes)). ## Features -* Support for most feed formats. See [gofeed documentation](https://github.com/mmcdole/gofeed/blob/master/README.md#features) for details. +* Support for most feed formats. See [gofeed documentation](https://github.com/mmcdole/gofeed/blob/master/README.md#features) +for details. * Connection to any IMAP server, using IMAP, IMAP+STARTTLS, or IMAPS. * Detection of duplicates: Heuristics what feed items have already been uploaded. * Update mechanism: When a feed item is updated, so is the mail. (_TODO_: [issue #9][i9]) @@ -22,13 +27,21 @@ It aims to be compatible in functionality and configuration, and should mostly w * groups (_details TBD_) * heavier use of parallel processing (it's Go after all ;)) -* Global `target` and each feed only specifies the folder relative to that target. (feature contained also in [fork of the original][nec]) +* Global `target` and each feed only specifies the folder relative to that target. +(feature contained also in [fork of the original][nec]) +* Fix `include-images` option: It now includes images as mime-parts. An additional `embed-images` option serves the images +as inline base64-encoded data (the old default behavior of feed2imap). +* Use HTML-Parser instead of regular expressions for modifying the HTML content. ### Subtle differences -* **Feed rendering**: Unfortunately, semantics of RSS and Atom tags are very broad. As we use a different feed parser library than the original, the interpretation (e.g., what tag is "the author") can differ. -* **Caching**: We do not implement the caching algorithm of feed2imap point by point. In general we opted for less heuristics and more optimism (belief that GUID is filled correctly; belief that the difference between publishing and update date is adhered to). If this results in a problem, file a bug and include the `X-Feed2Imap-Reason` header of the mail. -* **Configuration**: We took the liberty to restructure the configuration options. Old configs are supported, but a warning is issued when an option should now be in another place or is no longer supported (i.e., the option is without function). +* **Feed rendering**: Unfortunately, semantics of RSS and Atom tags are very broad. As we use a different feed parser +ibrary than the original, the interpretation (e.g., what tag is "the author") can differ. +* **Caching**: We do not implement the caching algorithm of feed2imap point by point. In general we opted for less +heuristics and more optimism (belief that GUID is filled correctly; belief that the difference between publishing and +update date is adhered to). If this results in a problem, file a bug and include the `X-Feed2Imap-Reason` header of the mail. +* **Configuration**: We took the liberty to restructure the configuration options. Old configs are supported, but a +warning is issued when an option should now be in another place or is no longer supported (i.e., the option is without function). ### Unsupported features of feed2imap @@ -3,6 +3,7 @@ module github.com/Necoro/feed2imap-go go 1.14 require ( + github.com/PuerkitoBio/goquery v1.5.0 github.com/emersion/go-imap v1.0.4 github.com/emersion/go-message v0.11.3-0.20200422153910-8c6ac6b57e3d github.com/google/go-cmp v0.4.0 diff --git a/internal/feed/feed.go b/internal/feed/feed.go index ddc91ed..1a6ef97 100644 --- a/internal/feed/feed.go +++ b/internal/feed/feed.go @@ -23,11 +23,18 @@ type feedDescriptor struct { Url string } +type feedImage struct { + image []byte + mime string +} + type feeditem struct { *gofeed.Feed *gofeed.Item + Body string updateOnly bool reasons []string + images []feedImage } // Creator returns the name of the creating author. @@ -45,6 +52,16 @@ func (item *feeditem) addReason(reason string) { } } +func (item *feeditem) addImage(img []byte, mime string) int { + i := feedImage{img, mime} + item.images = append(item.images, i) + return len(item.images) +} + +func (item *feeditem) clearImages() { + item.images = []feedImage{} +} + func (feed *Feed) descriptor() feedDescriptor { return feedDescriptor{ Name: feed.Name, diff --git a/internal/feed/mail.go b/internal/feed/mail.go index 620b444..5f543e4 100644 --- a/internal/feed/mail.go +++ b/internal/feed/mail.go @@ -2,15 +2,22 @@ package feed import ( "bytes" + "encoding/base64" "fmt" "io" + "io/ioutil" + "mime" + "path" "strings" "time" + "github.com/PuerkitoBio/goquery" + "github.com/emersion/go-message" "github.com/emersion/go-message/mail" "github.com/Necoro/feed2imap-go/internal/feed/template" "github.com/Necoro/feed2imap-go/pkg/config" + "github.com/Necoro/feed2imap-go/pkg/log" ) func address(name, address string) []*mail.Address { @@ -36,8 +43,9 @@ func writeHtml(writer io.Writer, item feeditem) error { return template.Feed.Execute(writer, item) } -func writeToBuffer(b *bytes.Buffer, feed *Feed, item feeditem, cfg *config.Config) error { +func buildHeader(feed *Feed, item feeditem, cfg *config.Config) message.Header { var h mail.Header + h.SetContentType("multipart/alternative", nil) h.SetAddressList("From", fromAdress(feed, item, cfg)) h.SetAddressList("To", address(feed.Name, cfg.DefaultEmail)) h.Add("X-Feed2Imap-Version", config.Version()) @@ -62,39 +70,84 @@ func writeToBuffer(b *bytes.Buffer, feed *Feed, item feeditem, cfg *config.Confi h.SetSubject(subject) } - tw, err := mail.CreateInlineWriter(b, h) + return h.Header +} + +func writeHtmlPart(w *message.Writer, item feeditem) error { + var ih message.Header + ih.SetContentType("text/html", map[string]string{"charset": "utf-8"}) + ih.SetContentDisposition("inline", nil) + ih.Set("Content-Transfer-Encoding", "8bit") + + partW, err := w.CreatePart(ih) if err != nil { return err } - defer tw.Close() + defer partW.Close() - if false /* cfg.WithPartText() */ { - var th mail.InlineHeader - th.SetContentType("text/plain", map[string]string{"charset": "utf-8", "format": "flowed"}) + if err = writeHtml(w, item); err != nil { + return fmt.Errorf("writing html part: %w", err) + } - w, err := tw.CreatePart(th) - if err != nil { - return err - } - defer w.Close() + return nil +} + +func writeImagePart(w *message.Writer, img feedImage, cid string) error { + var ih message.Header + ih.SetContentType(img.mime, nil) + ih.SetContentDisposition("inline", nil) + ih.Set("Content-Transfer-Encoding", "base64") + ih.SetText("Content-ID", fmt.Sprintf("<%s>", cid)) + + imgW, err := w.CreatePart(ih) + if err != nil { + return err + } + defer imgW.Close() - _, _ = io.WriteString(w, "Who are you?") + if _, err = imgW.Write(img.image); err != nil { + return err } + return nil +} + +func writeToBuffer(b *bytes.Buffer, feed *Feed, item feeditem, cfg *config.Config) error { + h := buildHeader(feed, item, cfg) + + writer, err := message.CreateWriter(b, h) + if err != nil { + return err + } + defer writer.Close() + if cfg.WithPartHtml() { - var th mail.InlineHeader - th.SetContentType("text/html", map[string]string{"charset": "utf-8"}) + feed.buildBody(&item) - w, err := tw.CreatePart(th) - if err != nil { + var relWriter *message.Writer + if len(item.images) > 0 { + var rh message.Header + rh.SetContentType("multipart/related", map[string]string{"type": "text/html"}) + if relWriter, err = writer.CreatePart(rh); err != nil { + return err + } + defer relWriter.Close() + } else { + relWriter = writer + } + + if err = writeHtmlPart(relWriter, item); err != nil { return err } - if err = writeHtml(w, item); err != nil { - return fmt.Errorf("writing html part: %w", err) + for idx, img := range item.images { + cid := cidNr(idx + 1) + if err = writeImagePart(relWriter, img, cid); err != nil { + return err + } } - w.Close() + item.clearImages() // safe memory } return nil @@ -122,3 +175,96 @@ func (feed *Feed) ToMails(cfg *config.Config) ([]string, error) { } return mails, nil } + +func getImage(src string) ([]byte, string) { + resp, err := stdHTTPClient.Get(src) + if err != nil { + log.Errorf("Error fetching from '%s': %s", src, err) + return nil, "" + } + defer resp.Body.Close() + + img, err := ioutil.ReadAll(resp.Body) + if err != nil { + log.Errorf("Error reading body from '%s': %s", src, err) + return nil, "" + } + + ext := path.Ext(src) + if ext == "" { + log.Warnf("Cannot determine extension from '%s', skipping.", src) + return nil, "" + } + + mime := mime.TypeByExtension(ext) + return img, mime +} + +func cidNr(idx int) string { + return fmt.Sprintf("cid_%d", idx) +} + +func (feed *Feed) buildBody(item *feeditem) { + var body string + var comment string + + if item.Item.Content != "" { + comment = "<!-- Content -->\n" + body = item.Item.Content + } else if item.Item.Description != "" { + comment = "<!-- Description -->\n" + body = item.Item.Description + } + + if !feed.InclImages { + item.Body = comment + body + return + } + + doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) + if err != nil { + log.Debugf("Feed %s: Error while parsing html content: %s", feed.Name, err) + if body != "" { + item.Body = "<br />" + comment + body + } + return + } + + doneAnything := true + nodes := doc.Find("img") + nodes.Each(func(i int, selection *goquery.Selection) { + const attr = "src" + + src, ok := selection.Attr(attr) + if !ok { + return + } + + img, mime := getImage(src) + if img == nil { + return + } + + if feed.EmbedImages { + imgStr := "data:" + mime + ";base64," + base64.StdEncoding.EncodeToString(img) + selection.SetAttr(attr, imgStr) + } else { + idx := item.addImage(img, mime) + cid := "cid:" + cidNr(idx) + selection.SetAttr(attr, cid) + } + doneAnything = true + }) + + if doneAnything { + html, err := doc.Find("body").Html() + if err != nil { + item.clearImages() + log.Errorf("Error during rendering HTML, skipping.") + } else { + body = html + } + } + + item.Body = comment + body +} diff --git a/internal/feed/template/feed.tpl.go b/internal/feed/template/feed.tpl.go index 8aab9d7..0e09180 100644 --- a/internal/feed/template/feed.tpl.go +++ b/internal/feed/template/feed.tpl.go @@ -40,14 +40,8 @@ const feedTpl = `{{- /*gotype:github.com/Necoro/feed2imap-go/internal/feed.feedi </td> </tr> </table> -{{with .Item.Content}} - <br /> <!-- originally: only if content and 'content !~ /\A\s*</m' --> +{{with .Body}} {{html .}} -{{else}} -{{with .Item.Description}} - <br /> <!-- originally: only if content and 'content !~ /\A\s*</m' --> - {{html .}} -{{end}} {{end}} {{with .Item.Enclosures}} <table border="1" width="100%" cellpadding="0" cellspacing="0" style="border-spacing: 0; "> @@ -59,7 +53,7 @@ const feedTpl = `{{- /*gotype:github.com/Necoro/feed2imap-go/internal/feed.feedi <tr> <td> - <a href={{.URL}}>{{.URL | lastUrlPart}}</a> ({{.Length | byteCount}}, {{.Type}}) + <a href={{.URL}}>{{.URL | lastUrlPart}}</a> ({{with .Length}}{{. | byteCount}}, {{end}}{{.Type}}) </td> </tr> {{end}} diff --git a/pkg/config/config.go b/pkg/config/config.go index ffb53b7..d1cd4c9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -37,22 +37,24 @@ var DefaultGlobalOptions = GlobalOptions{ // Per feed options // NB: Always specify a yaml name, as it is later used in processing type Options struct { - MinFreq int `yaml:"min-frequency"` - InclImages bool `yaml:"include-images"` - Disable bool `yaml:"disable"` - IgnHash bool `yaml:"ignore-hash"` - AlwaysNew bool `yaml:"always-new"` - NoTLS bool `yaml:"tls-no-verify"` + MinFreq int `yaml:"min-frequency"` + InclImages bool `yaml:"include-images"` + EmbedImages bool `yaml:"embed-images"` + Disable bool `yaml:"disable"` + IgnHash bool `yaml:"ignore-hash"` + AlwaysNew bool `yaml:"always-new"` + NoTLS bool `yaml:"tls-no-verify"` } // Default feed options var DefaultFeedOptions = Options{ - MinFreq: 1, - InclImages: false, - IgnHash: false, - AlwaysNew: false, - Disable: false, - NoTLS: false, + MinFreq: 1, + InclImages: true, + EmbedImages: false, + IgnHash: false, + AlwaysNew: false, + Disable: false, + NoTLS: false, } // Config holds the global configuration options and the configured feeds |