aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRené 'Necoro' Neumann <necoro@necoro.eu>2022-01-08 21:00:01 +0100
committerRené 'Necoro' Neumann <necoro@necoro.eu>2022-01-08 21:00:01 +0100
commitea115e8bb1145e20ec25d8e1954427244c250a10 (patch)
treee5db38d87b6c33edc255dfe88f8bd32a01ba41f4
parenteaa361b983dd3cea4a5eb879fb542c550411fc62 (diff)
downloadfeed2imap-go-ea115e8bb1145e20ec25d8e1954427244c250a10.tar.gz
feed2imap-go-ea115e8bb1145e20ec25d8e1954427244c250a10.tar.bz2
feed2imap-go-ea115e8bb1145e20ec25d8e1954427244c250a10.zip
#67: Readability support
-rw-r--r--go.mod3
-rw-r--r--go.sum10
-rw-r--r--internal/feed/mail.go53
-rw-r--r--pkg/config/body.go2
4 files changed, 58 insertions, 10 deletions
diff --git a/go.mod b/go.mod
index 4e6ee59..20f6f94 100644
--- a/go.mod
+++ b/go.mod
@@ -3,6 +3,7 @@ module github.com/Necoro/feed2imap-go
go 1.17
require (
+ github.com/Necoro/go-readability v0.0.0-20220107222023-364d914a66d0
github.com/Necoro/gofeed v1.1.4-0.20211029114605-b1a032d3e32f
github.com/Necoro/html2text v0.0.0-20211029113451-0e111ca632ef
github.com/PuerkitoBio/goquery v1.8.0
@@ -22,6 +23,8 @@ require (
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/emersion/go-sasl v0.0.0-20200509203442-7bfe0ed36a21 // indirect
github.com/emersion/go-textwrapper v0.0.0-20200911093747-65d896831594 // indirect
+ github.com/go-shiori/dom v0.0.0-20210627111528-4e4722cd0d65 // indirect
+ github.com/gogs/chardet v0.0.0-20191104214054-4b6791f73a28 // indirect
github.com/json-iterator/go v1.1.10 // indirect
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf // indirect
diff --git a/go.sum b/go.sum
index 9d72ed9..35c21da 100644
--- a/go.sum
+++ b/go.sum
@@ -1,10 +1,13 @@
github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
+github.com/Necoro/go-readability v0.0.0-20220107222023-364d914a66d0 h1:4tPedWfYpstnlPsXmuMeOJq11a+Ws2tuD+l2RyrSrqA=
+github.com/Necoro/go-readability v0.0.0-20220107222023-364d914a66d0/go.mod h1:nk2AaJ7eR19njLHFOnOdTEyrRQJrQLpMjOzcO8LY+bc=
github.com/Necoro/gofeed v1.1.4-0.20211029114605-b1a032d3e32f h1:4U61yP/+eEhN03KS4vs9nZb1BIof32BTexlDhumniF8=
github.com/Necoro/gofeed v1.1.4-0.20211029114605-b1a032d3e32f/go.mod h1:O4tTFVp3PQj8ZXBtP8BEdN3+FAi0OTFBkNhJ01jHxVU=
github.com/Necoro/html2text v0.0.0-20211029113451-0e111ca632ef h1:ug4tZhYWJ2kio0apzEUzuCdFGJw2eIjNm/+Je2DvlE4=
github.com/Necoro/html2text v0.0.0-20211029113451-0e111ca632ef/go.mod h1:ZuQPJl0H5qfZKIQp5L5Kun61DZa/XwJR0WQnsaaR1NQ=
github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
+github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/antonmedv/expr v1.9.0 h1:j4HI3NHEdgDnN9p6oI6Ndr0G5QryMY0FNxT4ONrFDGU=
@@ -27,6 +30,10 @@ github.com/gabriel-vasile/mimetype v1.4.0 h1:Cn9dkdYsMIu56tGho+fqzh7XmvY2YyGU0Fn
github.com/gabriel-vasile/mimetype v1.4.0/go.mod h1:fA8fi6KUiG7MgQQ+mEWotXoEOvmxRtOJlERCzSmRvr8=
github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg=
github.com/gdamore/tcell v1.3.0/go.mod h1:Hjvr+Ofd+gLglo7RYKxxnzCBmev3BzsS67MebKS4zMM=
+github.com/go-shiori/dom v0.0.0-20210627111528-4e4722cd0d65 h1:zx4B0AiwqKDQq+AgqxWeHwbbLJQeidq20hgfP+aMNWI=
+github.com/go-shiori/dom v0.0.0-20210627111528-4e4722cd0d65/go.mod h1:NPO1+buE6TYOWhUI98/hXLHHJhunIpXRuvDN4xjkCoE=
+github.com/gogs/chardet v0.0.0-20191104214054-4b6791f73a28 h1:gBeyun7mySAKWg7Fb0GOcv0upX9bdaZScs8QcRo8mEY=
+github.com/gogs/chardet v0.0.0-20191104214054-4b6791f73a28/go.mod h1:Pcatq5tYkCW2Q6yrR2VRHlbHpZ/R4/7qyL1TCF7vl14=
github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
@@ -56,6 +63,7 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
github.com/rivo/tview v0.0.0-20200219210816-cd38d7432498/go.mod h1:6lkG1x+13OShEf0EaOCaTQYyB7d5nSbb181KtjlS+84=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/sanity-io/litter v1.2.0/go.mod h1:JF6pZUFgu2Q0sBZ+HSV35P8TVPI1TTzEwyu9FXAw2W4=
+github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo=
github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -63,7 +71,9 @@ github.com/stretchr/testify v0.0.0-20161117074351-18a02ba4a312/go.mod h1:a8OnRci
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
+golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20210505024714-0287a6fb4125/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20210505214959-0714010a04ed/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20190626150813-e07cf5db2756/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
diff --git a/internal/feed/mail.go b/internal/feed/mail.go
index 444f76a..ad669ae 100644
--- a/internal/feed/mail.go
+++ b/internal/feed/mail.go
@@ -11,6 +11,7 @@ import (
"strings"
"time"
+ "github.com/Necoro/go-readability"
"github.com/Necoro/gofeed"
"github.com/Necoro/html2text"
"github.com/PuerkitoBio/goquery"
@@ -18,6 +19,7 @@ import (
"github.com/emersion/go-message/mail"
"github.com/gabriel-vasile/mimetype"
"golang.org/x/net/html"
+ "golang.org/x/net/html/charset"
"github.com/Necoro/feed2imap-go/internal/feed/template"
"github.com/Necoro/feed2imap-go/internal/http"
@@ -248,25 +250,53 @@ func getImage(src string, ctx http.Context) ([]byte, string, error) {
return img, mimeStr, nil
}
+func getFullArticle(src string, ctx http.Context) (string, error) {
+ log.Debugf("Fetching article from '%s'", src)
+ resp, cancel, err := http.Get(src, ctx)
+ if err != nil {
+ return "", fmt.Errorf("fetching from '%s': %w", src, err)
+ }
+ defer cancel()
+
+ reader, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
+ if err != nil {
+ return "", fmt.Errorf("detecting charset from '%s': %w", src, err)
+ }
+
+ doc, err := html.Parse(reader)
+ if err != nil {
+ return "", fmt.Errorf("parsing body from '%s': %w", src, err)
+ }
+
+ article, err := readability.FromDocument(doc, resp.Request.URL)
+ if err != nil {
+ return "", fmt.Errorf("parsing body from '%s': %w", src, err)
+ }
+
+ return article.Content, nil
+}
+
func cidNr(idx int) string {
return fmt.Sprintf("cid_%d", idx)
}
-func getBody(content, description string, bodyCfg config.Body) string {
+func (item *Item) getBody(bodyCfg config.Body) (string, error) {
switch bodyCfg {
case "default":
- if content != "" {
- return content
+ if item.Content != "" {
+ return item.Content, nil
}
- return description
+ return item.Description, nil
case "description":
- return description
+ return item.Description, nil
case "content":
- return content
+ return item.Content, nil
case "both":
- return description + content
+ return item.Description + item.Content, nil
+ case "fetch":
+ return getFullArticle(item.Link, item.feed.Context())
default:
- panic(fmt.Sprintf("Unknown value for Body: %v", bodyCfg))
+ return "", fmt.Errorf("Unknown value for Body: %v", bodyCfg)
}
}
@@ -318,9 +348,14 @@ func (item *Item) downloadImage(src string) string {
}
func (item *Item) buildBody() {
+ var err error
feed := item.feed
- item.Body = getBody(item.Content, item.Description, feed.Body)
+ if item.Body, err = item.getBody(feed.Body); err != nil {
+ log.Errorf("Feed %s: Item %s: Error while fetching body: %s", feed.Name, item.Link, err)
+ return
+ }
+
bodyNode, err := html.Parse(strings.NewReader(item.Body))
if err != nil {
log.Errorf("Feed %s: Item %s: Error while parsing html: %s", feed.Name, item.Link, err)
diff --git a/pkg/config/body.go b/pkg/config/body.go
index d9957d5..3b2f676 100644
--- a/pkg/config/body.go
+++ b/pkg/config/body.go
@@ -10,7 +10,7 @@ import (
type Body string
-var validBody = []string{"default", "both", "content", "description"}
+var validBody = []string{"default", "both", "content", "description", "fetch"}
func (b *Body) UnmarshalYAML(node *yaml.Node) error {
var val string