aboutsummaryrefslogtreecommitdiff
path: root/internal/feed
diff options
context:
space:
mode:
authorRené 'Necoro' Neumann <necoro@necoro.eu>2020-04-25 11:27:34 +0200
committerRené 'Necoro' Neumann <necoro@necoro.eu>2020-04-25 11:27:34 +0200
commitd21881150c09986571a563eaf30bc1687787e63f (patch)
treea5da8a3fdb91a3dcf806b704e20b16616a934801 /internal/feed
parentc08aff21cd67cc27926a4cb1ca72ffe67e015ebf (diff)
downloadfeed2imap-go-d21881150c09986571a563eaf30bc1687787e63f.tar.gz
feed2imap-go-d21881150c09986571a563eaf30bc1687787e63f.tar.bz2
feed2imap-go-d21881150c09986571a563eaf30bc1687787e63f.zip
Improved caching
Diffstat (limited to 'internal/feed')
-rw-r--r--internal/feed/cache.go230
-rw-r--r--internal/feed/feed.go64
-rw-r--r--internal/feed/parse.go24
3 files changed, 297 insertions, 21 deletions
diff --git a/internal/feed/cache.go b/internal/feed/cache.go
new file mode 100644
index 0000000..4f27144
--- /dev/null
+++ b/internal/feed/cache.go
@@ -0,0 +1,230 @@
+package feed
+
+import (
+ "bufio"
+ "crypto/sha256"
+ "encoding/gob"
+ "errors"
+ "fmt"
+ "os"
+ "time"
+
+ "github.com/Necoro/feed2imap-go/internal/log"
+)
+
+const (
+ currentVersion byte = 1
+ startFeedId uint64 = 1
+)
+
+type Cache interface {
+ findItem(*Feed) CachedFeed
+ Version() byte
+ transformToCurrent() (Cache, error)
+}
+
+type feedId uint64
+
+type feedDescriptor struct {
+ Name string
+ Url string
+}
+
+type CachedFeed interface {
+ Checked(withFailure bool)
+ Failures() uint
+}
+
+type cachedFeed struct {
+ LastCheck time.Time
+ NumFailures uint // can't be named `Failures` b/c it'll collide with the interface
+ Items []cachedItem
+}
+
+func (cf *cachedFeed) Checked(withFailure bool) {
+ cf.LastCheck = time.Now()
+ if withFailure {
+ cf.NumFailures++
+ } else {
+ cf.NumFailures = 0
+ }
+}
+
+func (cf *cachedFeed) Failures() uint {
+ return cf.NumFailures
+}
+
+type itemHash [sha256.Size]byte
+
+type cachedItem struct {
+ Uid string
+ Title string
+ Link string
+ Date time.Time
+ Updated time.Time
+ Creator string
+ Hash itemHash
+}
+
+type v1Cache struct {
+ version byte
+ Ids map[feedDescriptor]feedId
+ NextId uint64
+ Feeds map[feedId]*cachedFeed
+}
+
+func (cache *v1Cache) Version() byte {
+ return cache.version
+}
+
+func New() Cache {
+ cache := v1Cache{
+ Ids: map[feedDescriptor]feedId{},
+ Feeds: map[feedId]*cachedFeed{},
+ NextId: startFeedId,
+ }
+ cache.version = currentVersion
+ return &cache
+}
+
+func cacheForVersion(version byte) (Cache, error) {
+ switch version {
+ case 1:
+ return New(), nil
+ default:
+ return nil, fmt.Errorf("unknown cache version '%d'", version)
+ }
+}
+
+func (cache *v1Cache) transformToCurrent() (Cache, error) {
+ return cache, nil
+}
+
+func (cache *v1Cache) getItem(id feedId) CachedFeed {
+ feed, ok := cache.Feeds[id]
+ if !ok {
+ feed = &cachedFeed{}
+ cache.Feeds[id] = feed
+ }
+ return feed
+}
+
+func (cache *v1Cache) findItem(feed *Feed) CachedFeed {
+ if feed.cached != nil {
+ return feed.cached.(*cachedFeed)
+ }
+
+ fId := feedDescriptor{Name: feed.Name, Url: feed.Url}
+ id, ok := cache.Ids[fId]
+ if !ok {
+ var otherId feedDescriptor
+ changed := false
+ for otherId, id = range cache.Ids {
+ if otherId.Name == fId.Name {
+ log.Warnf("Feed %s seems to have changed URLs: New '%s', old '%s'. Updating.",
+ fId.Name, fId.Url, otherId.Url)
+ changed = true
+ break
+ } else if otherId.Url == fId.Url {
+ log.Warnf("Feed with URL '%s' seems to have changed its name: New '%s', old '%s'. Updating",
+ fId.Url, fId.Name, otherId.Name)
+ changed = true
+ break
+ }
+ }
+ if changed {
+ delete(cache.Ids, otherId)
+ } else {
+ id = feedId(cache.NextId)
+ cache.NextId++
+ }
+
+ cache.Ids[fId] = id
+ }
+
+ item := cache.getItem(id)
+ feed.cached = item
+ return item
+}
+
+func (feeds *Feeds) StoreCache(fileName string) error {
+ cache := feeds.cache
+ if cache == nil {
+ return fmt.Errorf("trying to store nil cache")
+ }
+ if cache.Version() != currentVersion {
+ return fmt.Errorf("trying to store cache with unsupported version '%d' (current: '%d')", cache.Version(), currentVersion)
+ }
+
+ f, err := os.Create(fileName)
+ if err != nil {
+ return fmt.Errorf("trying to store cache to '%s': %w", fileName, err)
+ }
+ defer f.Close()
+
+ writer := bufio.NewWriter(f)
+ if err = writer.WriteByte(currentVersion); err != nil {
+ return fmt.Errorf("writing to '%s': %w", fileName, err)
+ }
+
+ encoder := gob.NewEncoder(writer)
+ if err = encoder.Encode(cache); err != nil {
+ return fmt.Errorf("encoding cache: %w", err)
+ }
+
+ writer.Flush()
+ log.Printf("Stored cache to '%s'.", fileName)
+
+ return nil
+}
+
+func (feeds *Feeds) LoadCache(fileName string) error {
+ cache, err := loadCache(fileName)
+ if err != nil {
+ return err
+ }
+ feeds.cache = cache
+
+ for _, feed := range feeds.feeds {
+ feed.cached = cache.findItem(feed)
+ }
+ return nil
+}
+
+func loadCache(fileName string) (Cache, error) {
+ f, err := os.Open(fileName)
+ if err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ // no cache there yet -- make new
+ return New(), nil
+ }
+ return nil, fmt.Errorf("opening cache at '%s': %w", fileName, err)
+ }
+ defer f.Close()
+
+ log.Printf("Loading cache from '%s'", fileName)
+
+ reader := bufio.NewReader(f)
+ version, err := reader.ReadByte()
+ if err != nil {
+ return nil, fmt.Errorf("reading from '%s': %w", fileName, err)
+ }
+
+ cache, err := cacheForVersion(version)
+ if err != nil {
+ return nil, err
+ }
+
+ decoder := gob.NewDecoder(reader)
+ if err = decoder.Decode(cache); err != nil {
+ return nil, fmt.Errorf("decoding for version '%d' from '%s': %w", version, fileName, err)
+ }
+
+ if cache, err = cache.transformToCurrent(); err != nil {
+ return nil, fmt.Errorf("cannot transform from version %d to %d: %w", version, currentVersion, err)
+ }
+
+ log.Printf("Loaded cache (version %d), transformed to version %d.", version, currentVersion)
+
+ return cache, nil
+}
diff --git a/internal/feed/feed.go b/internal/feed/feed.go
index cd906a2..5af4188 100644
--- a/internal/feed/feed.go
+++ b/internal/feed/feed.go
@@ -3,10 +3,13 @@ package feed
import (
"fmt"
"strings"
+ "sync"
+ "time"
"github.com/mmcdole/gofeed"
"github.com/Necoro/feed2imap-go/internal/config"
+ "github.com/Necoro/feed2imap-go/internal/log"
)
type Feed struct {
@@ -14,8 +17,9 @@ type Feed struct {
Target []string
Url string
config.Options
- feed *gofeed.Feed
- items []feeditem
+ feed *gofeed.Feed
+ items []feeditem
+ cached CachedFeed
}
type feeditem struct {
@@ -23,9 +27,18 @@ type feeditem struct {
*gofeed.Item
}
-type Feeds map[string]*Feed
+type Feeds struct {
+ feeds map[string]*Feed
+ cache Cache
+}
+
+func NewFeeds() *Feeds {
+ return &Feeds{
+ feeds: map[string]*Feed{},
+ }
+}
-func (f Feeds) String() string {
+func (feeds *Feeds) String() string {
var b strings.Builder
app := func(a ...interface{}) {
_, _ = fmt.Fprint(&b, a...)
@@ -33,7 +46,7 @@ func (f Feeds) String() string {
app("Feeds [")
first := true
- for k, v := range f {
+ for k, v := range feeds.feeds {
if !first {
app(", ")
}
@@ -49,3 +62,44 @@ func (f Feeds) String() string {
return b.String()
}
+
+func (feeds *Feeds) Len() int {
+ return len(feeds.feeds)
+}
+
+func (feeds *Feeds) Contains(name string) bool {
+ _, ok := feeds.feeds[name]
+ return ok
+}
+
+func (feeds *Feeds) Set(name string, feed *Feed) {
+ feeds.feeds[name] = feed
+}
+
+func (feeds *Feeds) Foreach(f func(*Feed)) {
+ for _, feed := range feeds.feeds {
+ f(feed)
+ }
+}
+
+func (feeds *Feeds) ForeachGo(goFunc func(*Feed, *sync.WaitGroup)) {
+ var wg sync.WaitGroup
+ wg.Add(feeds.Len())
+
+ for _, feed := range feeds.feeds {
+ go goFunc(feed, &wg)
+ }
+ wg.Wait()
+}
+
+func (feed *Feed) NeedsUpdate(updateTime time.Time) bool {
+ if !updateTime.IsZero() && int(time.Since(updateTime).Hours()) >= feed.MinFreq {
+ log.Printf("Feed '%s' does not need updating, skipping.", feed.Name)
+ return false
+ }
+ return true
+}
+
+func (feed *Feed) Success() bool {
+ return feed.feed != nil
+}
diff --git a/internal/feed/parse.go b/internal/feed/parse.go
index 35a7596..6deebb2 100644
--- a/internal/feed/parse.go
+++ b/internal/feed/parse.go
@@ -32,7 +32,7 @@ func parseFeed(feed *Feed) error {
return nil
}
-func handleFeed(feed *Feed, group *sync.WaitGroup, success chan<- bool) {
+func handleFeed(feed *Feed, group *sync.WaitGroup) {
defer group.Done()
log.Printf("Fetching %s from %s", feed.Name, feed.Url)
@@ -40,25 +40,17 @@ func handleFeed(feed *Feed, group *sync.WaitGroup, success chan<- bool) {
if err != nil {
log.Error(err)
}
- success <- err == nil
}
-func Parse(feeds Feeds) int {
- var wg sync.WaitGroup
- wg.Add(len(feeds))
-
- success := make(chan bool, len(feeds))
-
- for _, feed := range feeds {
- go handleFeed(feed, &wg, success)
- }
-
- wg.Wait()
- close(success)
+func (feeds Feeds) Parse() int {
+ feeds.ForeachGo(handleFeed)
ctr := 0
- for s := range success {
- if s {
+ for _, feed := range feeds.feeds {
+ success := feed.Success()
+ feed.cached.Checked(!success)
+
+ if success {
ctr++
}
}