From 3cbf95d38b6f8bd17b4312371ed07e6847ff0f5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20=27Necoro=27=20Neumann?= Date: Thu, 21 May 2020 01:05:02 +0200 Subject: New option 'item-filter' --- CHANGELOG.md | 1 + go.mod | 1 + go.sum | 22 ++++++++++++++++ internal/feed/feed.go | 2 ++ internal/feed/filter/filter.go | 27 +++++++++++++++++++ internal/feed/state.go | 59 ++++++++++++++++++++++++++++++++++++++---- main.go | 5 +++- pkg/config/config.go | 20 +++++++------- pkg/config/deprecated.go | 2 +- 9 files changed, 123 insertions(+), 16 deletions(-) create mode 100644 internal/feed/filter/filter.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a4f280..e93cfb5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - Verbose variant of 'target' in config: Do not hassle with urlencoded passwords anymore! +- New feed option 'item-filter' for filtering out specific items from feed. ## [0.3.1] - 2020-05-12 - Docker Setup diff --git a/go.mod b/go.mod index 1e12684..3eced8b 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.14 require ( github.com/Necoro/html2text v0.0.0-20200510210108-d7611f0be99f github.com/PuerkitoBio/goquery v1.5.1 + github.com/antonmedv/expr v1.8.8 github.com/emersion/go-imap v1.0.4 github.com/emersion/go-imap-uidplus v0.0.0-20200503180755-e75854c361e9 github.com/emersion/go-message v0.11.3-0.20200422153910-8c6ac6b57e3d diff --git a/go.sum b/go.sum index 27cb5c0..86b1875 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,4 @@ +github.com/DATA-DOG/go-sqlmock v1.3.3/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= github.com/Necoro/html2text v0.0.0-20200510210108-d7611f0be99f h1:8p10S/q2hJbHlRu4vE4Y5geDc7g6CC2hkHfQvc6x/dA= github.com/Necoro/html2text v0.0.0-20200510210108-d7611f0be99f/go.mod h1:9AnGXZMY/GCLWg9ozOQp8a+KcsHkudIrDDWRrjuJtv4= github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= @@ -8,7 +9,10 @@ github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRy github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/antonmedv/expr v1.8.8 h1:uVwIkIBNO2yn4vY2u2DQUqXTmv9jEEMCEcHa19G5weY= +github.com/antonmedv/expr v1.8.8/go.mod h1:5qsM3oLGDND7sDmQGDXHkYfkjYMUX14qsgqmHhwGEk8= github.com/codegangsta/cli v1.20.0/go.mod h1:/qJNoX69yVSKu5o4jLyXAENLRyk1uhi7zkbQ3slBdOA= +github.com/davecgh/go-spew v0.0.0-20161028175848-04cdfd42973b/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -25,29 +29,43 @@ github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe h1:40SWqY0 github.com/emersion/go-textwrapper v0.0.0-20160606182133-d0e65e56babe/go.mod h1:aqO8z8wPrjkscevZJFVE1wXJrLpC5LtJG7fqLOsPb2U= github.com/gabriel-vasile/mimetype v1.1.0 h1:+ahX+MvQPFve4kO9Qjjxf3j49i0ACdV236kJlOCRAnU= github.com/gabriel-vasile/mimetype v1.1.0/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To= +github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg= +github.com/gdamore/tcell v1.3.0/go.mod h1:Hjvr+Ofd+gLglo7RYKxxnzCBmev3BzsS67MebKS4zMM= github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/lucasb-eyer/go-colorful v1.0.2/go.mod h1:0MS4r+7BZKSJ5mw4/S5MPN+qHFF1fYclkSPilDOKW0s= +github.com/lucasb-eyer/go-colorful v1.0.3/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/martinlindhe/base36 v1.0.0 h1:eYsumTah144C0A8P1T/AVSUk5ZoLnhfYFM3OGQxB52A= github.com/martinlindhe/base36 v1.0.0/go.mod h1:+AtEs8xrBpCeYgSLoY/aJ6Wf37jtBuR0s35750M27+8= +github.com/mattn/go-runewidth v0.0.4/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-runewidth v0.0.7 h1:Ei8KR0497xHyKJPAv59M1dkC+rOZCMBJ+t3fZ+twI54= github.com/mattn/go-runewidth v0.0.7/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/mattn/go-runewidth v0.0.8 h1:3tS41NlGYSmhhe/8fhGRzc+z3AYCw1Fe1WAyLuujKs0= +github.com/mattn/go-runewidth v0.0.8/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mmcdole/gofeed v1.0.0 h1:PHqwr8fsEm8xarj9s53XeEAFYhRM3E9Ib7Ie766/LTE= github.com/mmcdole/gofeed v1.0.0/go.mod h1:tkVcyzS3qVMlQrQxJoEH1hkTiuo9a8emDzkMi7TZBu0= github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI= github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8= github.com/olekukonko/tablewriter v0.0.4 h1:vHD/YYe1Wolo78koG299f7V/VAS08c6IpCLn+Ejf/w8= github.com/olekukonko/tablewriter v0.0.4/go.mod h1:zq6QwlOf5SlnkVbMSr5EoBv3636FWnp+qbPhuoO21uA= +github.com/pmezard/go-difflib v0.0.0-20151028094244-d8ed2627bdf0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/tview v0.0.0-20200219210816-cd38d7432498/go.mod h1:6lkG1x+13OShEf0EaOCaTQYyB7d5nSbb181KtjlS+84= +github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/sanity-io/litter v1.2.0/go.mod h1:JF6pZUFgu2Q0sBZ+HSV35P8TVPI1TTzEwyu9FXAw2W4= github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf h1:pvbZ0lM0XWPBqUKqFU8cmavspvIl9nulOYwdy6IFRRo= github.com/ssor/bom v0.0.0-20170718123548-6386211fdfcf/go.mod h1:RJID2RhlZKId02nZ62WenDCkgHFerpIOmW0iT7GKmXM= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v0.0.0-20161117074351-18a02ba4a312/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -57,6 +75,8 @@ golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200506145744-7e3656a0809f h1:QBjCr1Fz5kw158VqdE9JfI9cJnl/ymnJWAdMuinqL7Y= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190626150813-e07cf5db2756/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= @@ -66,5 +86,7 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IV golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200506231410-2ff61e1afc86 h1:OfFoIUYv/me30yv7XlMy4F9RJw8DEm8WQ6QG1Ph4bH0= gopkg.in/yaml.v3 v3.0.0-20200506231410-2ff61e1afc86/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/feed/feed.go b/internal/feed/feed.go index 4a0e724..ef51251 100644 --- a/internal/feed/feed.go +++ b/internal/feed/feed.go @@ -5,6 +5,7 @@ import ( "github.com/mmcdole/gofeed" + "github.com/Necoro/feed2imap-go/internal/feed/filter" "github.com/Necoro/feed2imap-go/pkg/config" "github.com/Necoro/feed2imap-go/pkg/log" ) @@ -12,6 +13,7 @@ import ( type Feed struct { *config.Feed feed *gofeed.Feed + filter *filter.Filter items []item cached CachedFeed Global config.GlobalOptions diff --git a/internal/feed/filter/filter.go b/internal/feed/filter/filter.go new file mode 100644 index 0000000..8ff8a97 --- /dev/null +++ b/internal/feed/filter/filter.go @@ -0,0 +1,27 @@ +package filter + +import ( + "github.com/antonmedv/expr" + "github.com/antonmedv/expr/vm" + "github.com/mmcdole/gofeed" +) + +type Filter struct { + prog *vm.Program +} + +func (f *Filter) Run(item *gofeed.Item) (bool, error) { + if res, err := expr.Run(f.prog, item); err != nil { + return false, err + } else { + return res.(bool), nil + } +} + +func New(s string) (*Filter, error) { + prog, err := expr.Compile(s, expr.AsBool(), expr.Env(gofeed.Item{})) + if err != nil { + return nil, err + } + return &Filter{prog}, nil +} diff --git a/internal/feed/state.go b/internal/feed/state.go index cc9dd94..3828e37 100644 --- a/internal/feed/state.go +++ b/internal/feed/state.go @@ -1,8 +1,13 @@ package feed import ( + "encoding/json" + "fmt" "sync" + "github.com/mmcdole/gofeed" + + "github.com/Necoro/feed2imap-go/internal/feed/filter" "github.com/Necoro/feed2imap-go/pkg/config" "github.com/Necoro/feed2imap-go/pkg/log" ) @@ -77,15 +82,52 @@ func (state *State) Fetch() int { return ctr } +func printItem(item *gofeed.Item) string { + // analogous to gofeed.Feed.String + json, _ := json.MarshalIndent(item, "", " ") + return string(json) +} + +func (feed *Feed) filterItems() []item { + if feed.filter == nil { + return feed.items + } + + items := make([]item, 0, len(feed.items)) + + for _, item := range feed.items { + res, err := feed.filter.Run(item.Item) + if err != nil { + log.Errorf("Feed %s: Item %s: Error applying item filter: %s", feed.Name, printItem(item.Item), err) + res = true // include + } + + if res { + items = append(items, item) + } else if log.IsDebug() { // printItem is not for free + log.Debugf("Filter '%s' matches for item %s, removing.", feed.ItemFilter, printItem(item.Item)) + } + } + return items +} + func filterFeed(feed *Feed) { if len(feed.items) > 0 { origLen := len(feed.items) log.Debugf("Filtering %s. Starting with %d items", feed.Name, origLen) - items := feed.cached.filterItems(feed.items, feed.IgnHash, feed.AlwaysNew) + + items := feed.filterItems() + newLen := len(items) + if newLen < origLen { + log.Printf("Item filter on %s: Reduced from %d to %d items.", feed.Name, origLen, newLen) + origLen = newLen + } + + items = feed.cached.filterItems(items, feed.IgnHash, feed.AlwaysNew) feed.items = items - newLen := len(feed.items) + newLen = len(feed.items) if newLen < origLen { log.Printf("Filtered %s. Reduced from %d to %d items.", feed.Name, origLen, newLen) } else { @@ -106,7 +148,7 @@ func (state *State) Filter() { } } -func NewState(cfg *config.Config) *State { +func NewState(cfg *config.Config) (*State, error) { state := State{ feeds: map[string]*Feed{}, cache: nil, // loaded later on @@ -114,10 +156,17 @@ func NewState(cfg *config.Config) *State { } for name, parsedFeed := range cfg.Feeds { - state.feeds[name] = &Feed{Feed: parsedFeed, Global: cfg.GlobalOptions} + var itemFilter *filter.Filter + var err error + if parsedFeed.ItemFilter != "" { + if itemFilter, err = filter.New(parsedFeed.ItemFilter); err != nil { + return nil, fmt.Errorf("Feed %s: Parsing item-filter: %w", parsedFeed.Name, err) + } + } + state.feeds[name] = &Feed{Feed: parsedFeed, Global: cfg.GlobalOptions, filter: itemFilter} } - return &state + return &state, nil } func (state *State) RemoveUndue() { diff --git a/main.go b/main.go index f2a2447..8d3aa15 100644 --- a/main.go +++ b/main.go @@ -85,7 +85,10 @@ func run() error { return fmt.Errorf("Configuration invalid: %w", err) } - state := feed.NewState(cfg) + state, err := feed.NewState(cfg) + if err != nil { + return err + } err = state.LoadCache(cacheFile, buildCache) if err != nil { diff --git a/pkg/config/config.go b/pkg/config/config.go index 377365f..bd3927c 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -36,15 +36,16 @@ var DefaultGlobalOptions = GlobalOptions{ // Per feed options // NB: Always specify a yaml name, as it is later used in processing type Options struct { - MinFreq int `yaml:"min-frequency"` - InclImages bool `yaml:"include-images"` - EmbedImages bool `yaml:"embed-images"` - Disable bool `yaml:"disable"` - IgnHash bool `yaml:"ignore-hash"` - AlwaysNew bool `yaml:"always-new"` - Reupload bool `yaml:"reupload-if-updated"` - NoTLS bool `yaml:"tls-no-verify"` - Body Body `yaml:"body"` + MinFreq int `yaml:"min-frequency"` + InclImages bool `yaml:"include-images"` + EmbedImages bool `yaml:"embed-images"` + Disable bool `yaml:"disable"` + IgnHash bool `yaml:"ignore-hash"` + AlwaysNew bool `yaml:"always-new"` + Reupload bool `yaml:"reupload-if-updated"` + NoTLS bool `yaml:"tls-no-verify"` + ItemFilter string `yaml:"item-filter"` + Body Body `yaml:"body"` } // Default feed options @@ -57,6 +58,7 @@ var DefaultFeedOptions = Options{ AlwaysNew: false, Disable: false, NoTLS: false, + ItemFilter: "", } // Config holds the global configuration options and the configured feeds diff --git a/pkg/config/deprecated.go b/pkg/config/deprecated.go index c002085..9fb8b6e 100644 --- a/pkg/config/deprecated.go +++ b/pkg/config/deprecated.go @@ -20,7 +20,7 @@ var deprecatedOpts = map[string]deprecated{ "dumpdir": unsupported, "debug-updated": {"Use '-d' as option instead.", nil}, "execurl": unsupported, - "filter": unsupported, + "filter": {"Use 'item-filter' instead.", nil}, "disable-ssl-verification": {"Interpreted as 'tls-no-verify'.", func(i interface{}, global *GlobalOptions, opts *Options) { val, ok := i.(bool) if ok { -- cgit v1.2.3