diff options
author | René 'Necoro' Neumann <necoro@necoro.eu> | 2020-04-25 11:27:34 +0200 |
---|---|---|
committer | René 'Necoro' Neumann <necoro@necoro.eu> | 2020-04-25 11:27:34 +0200 |
commit | d21881150c09986571a563eaf30bc1687787e63f (patch) | |
tree | a5da8a3fdb91a3dcf806b704e20b16616a934801 /internal/feed | |
parent | c08aff21cd67cc27926a4cb1ca72ffe67e015ebf (diff) | |
download | feed2imap-go-d21881150c09986571a563eaf30bc1687787e63f.tar.gz feed2imap-go-d21881150c09986571a563eaf30bc1687787e63f.tar.bz2 feed2imap-go-d21881150c09986571a563eaf30bc1687787e63f.zip |
Improved caching
Diffstat (limited to 'internal/feed')
-rw-r--r-- | internal/feed/cache.go | 230 | ||||
-rw-r--r-- | internal/feed/feed.go | 64 | ||||
-rw-r--r-- | internal/feed/parse.go | 24 |
3 files changed, 297 insertions, 21 deletions
diff --git a/internal/feed/cache.go b/internal/feed/cache.go new file mode 100644 index 0000000..4f27144 --- /dev/null +++ b/internal/feed/cache.go @@ -0,0 +1,230 @@ +package feed + +import ( + "bufio" + "crypto/sha256" + "encoding/gob" + "errors" + "fmt" + "os" + "time" + + "github.com/Necoro/feed2imap-go/internal/log" +) + +const ( + currentVersion byte = 1 + startFeedId uint64 = 1 +) + +type Cache interface { + findItem(*Feed) CachedFeed + Version() byte + transformToCurrent() (Cache, error) +} + +type feedId uint64 + +type feedDescriptor struct { + Name string + Url string +} + +type CachedFeed interface { + Checked(withFailure bool) + Failures() uint +} + +type cachedFeed struct { + LastCheck time.Time + NumFailures uint // can't be named `Failures` b/c it'll collide with the interface + Items []cachedItem +} + +func (cf *cachedFeed) Checked(withFailure bool) { + cf.LastCheck = time.Now() + if withFailure { + cf.NumFailures++ + } else { + cf.NumFailures = 0 + } +} + +func (cf *cachedFeed) Failures() uint { + return cf.NumFailures +} + +type itemHash [sha256.Size]byte + +type cachedItem struct { + Uid string + Title string + Link string + Date time.Time + Updated time.Time + Creator string + Hash itemHash +} + +type v1Cache struct { + version byte + Ids map[feedDescriptor]feedId + NextId uint64 + Feeds map[feedId]*cachedFeed +} + +func (cache *v1Cache) Version() byte { + return cache.version +} + +func New() Cache { + cache := v1Cache{ + Ids: map[feedDescriptor]feedId{}, + Feeds: map[feedId]*cachedFeed{}, + NextId: startFeedId, + } + cache.version = currentVersion + return &cache +} + +func cacheForVersion(version byte) (Cache, error) { + switch version { + case 1: + return New(), nil + default: + return nil, fmt.Errorf("unknown cache version '%d'", version) + } +} + +func (cache *v1Cache) transformToCurrent() (Cache, error) { + return cache, nil +} + +func (cache *v1Cache) getItem(id feedId) CachedFeed { + feed, ok := cache.Feeds[id] + if !ok { + feed = &cachedFeed{} + cache.Feeds[id] = feed + } + return feed +} + +func (cache *v1Cache) findItem(feed *Feed) CachedFeed { + if feed.cached != nil { + return feed.cached.(*cachedFeed) + } + + fId := feedDescriptor{Name: feed.Name, Url: feed.Url} + id, ok := cache.Ids[fId] + if !ok { + var otherId feedDescriptor + changed := false + for otherId, id = range cache.Ids { + if otherId.Name == fId.Name { + log.Warnf("Feed %s seems to have changed URLs: New '%s', old '%s'. Updating.", + fId.Name, fId.Url, otherId.Url) + changed = true + break + } else if otherId.Url == fId.Url { + log.Warnf("Feed with URL '%s' seems to have changed its name: New '%s', old '%s'. Updating", + fId.Url, fId.Name, otherId.Name) + changed = true + break + } + } + if changed { + delete(cache.Ids, otherId) + } else { + id = feedId(cache.NextId) + cache.NextId++ + } + + cache.Ids[fId] = id + } + + item := cache.getItem(id) + feed.cached = item + return item +} + +func (feeds *Feeds) StoreCache(fileName string) error { + cache := feeds.cache + if cache == nil { + return fmt.Errorf("trying to store nil cache") + } + if cache.Version() != currentVersion { + return fmt.Errorf("trying to store cache with unsupported version '%d' (current: '%d')", cache.Version(), currentVersion) + } + + f, err := os.Create(fileName) + if err != nil { + return fmt.Errorf("trying to store cache to '%s': %w", fileName, err) + } + defer f.Close() + + writer := bufio.NewWriter(f) + if err = writer.WriteByte(currentVersion); err != nil { + return fmt.Errorf("writing to '%s': %w", fileName, err) + } + + encoder := gob.NewEncoder(writer) + if err = encoder.Encode(cache); err != nil { + return fmt.Errorf("encoding cache: %w", err) + } + + writer.Flush() + log.Printf("Stored cache to '%s'.", fileName) + + return nil +} + +func (feeds *Feeds) LoadCache(fileName string) error { + cache, err := loadCache(fileName) + if err != nil { + return err + } + feeds.cache = cache + + for _, feed := range feeds.feeds { + feed.cached = cache.findItem(feed) + } + return nil +} + +func loadCache(fileName string) (Cache, error) { + f, err := os.Open(fileName) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + // no cache there yet -- make new + return New(), nil + } + return nil, fmt.Errorf("opening cache at '%s': %w", fileName, err) + } + defer f.Close() + + log.Printf("Loading cache from '%s'", fileName) + + reader := bufio.NewReader(f) + version, err := reader.ReadByte() + if err != nil { + return nil, fmt.Errorf("reading from '%s': %w", fileName, err) + } + + cache, err := cacheForVersion(version) + if err != nil { + return nil, err + } + + decoder := gob.NewDecoder(reader) + if err = decoder.Decode(cache); err != nil { + return nil, fmt.Errorf("decoding for version '%d' from '%s': %w", version, fileName, err) + } + + if cache, err = cache.transformToCurrent(); err != nil { + return nil, fmt.Errorf("cannot transform from version %d to %d: %w", version, currentVersion, err) + } + + log.Printf("Loaded cache (version %d), transformed to version %d.", version, currentVersion) + + return cache, nil +} diff --git a/internal/feed/feed.go b/internal/feed/feed.go index cd906a2..5af4188 100644 --- a/internal/feed/feed.go +++ b/internal/feed/feed.go @@ -3,10 +3,13 @@ package feed import ( "fmt" "strings" + "sync" + "time" "github.com/mmcdole/gofeed" "github.com/Necoro/feed2imap-go/internal/config" + "github.com/Necoro/feed2imap-go/internal/log" ) type Feed struct { @@ -14,8 +17,9 @@ type Feed struct { Target []string Url string config.Options - feed *gofeed.Feed - items []feeditem + feed *gofeed.Feed + items []feeditem + cached CachedFeed } type feeditem struct { @@ -23,9 +27,18 @@ type feeditem struct { *gofeed.Item } -type Feeds map[string]*Feed +type Feeds struct { + feeds map[string]*Feed + cache Cache +} + +func NewFeeds() *Feeds { + return &Feeds{ + feeds: map[string]*Feed{}, + } +} -func (f Feeds) String() string { +func (feeds *Feeds) String() string { var b strings.Builder app := func(a ...interface{}) { _, _ = fmt.Fprint(&b, a...) @@ -33,7 +46,7 @@ func (f Feeds) String() string { app("Feeds [") first := true - for k, v := range f { + for k, v := range feeds.feeds { if !first { app(", ") } @@ -49,3 +62,44 @@ func (f Feeds) String() string { return b.String() } + +func (feeds *Feeds) Len() int { + return len(feeds.feeds) +} + +func (feeds *Feeds) Contains(name string) bool { + _, ok := feeds.feeds[name] + return ok +} + +func (feeds *Feeds) Set(name string, feed *Feed) { + feeds.feeds[name] = feed +} + +func (feeds *Feeds) Foreach(f func(*Feed)) { + for _, feed := range feeds.feeds { + f(feed) + } +} + +func (feeds *Feeds) ForeachGo(goFunc func(*Feed, *sync.WaitGroup)) { + var wg sync.WaitGroup + wg.Add(feeds.Len()) + + for _, feed := range feeds.feeds { + go goFunc(feed, &wg) + } + wg.Wait() +} + +func (feed *Feed) NeedsUpdate(updateTime time.Time) bool { + if !updateTime.IsZero() && int(time.Since(updateTime).Hours()) >= feed.MinFreq { + log.Printf("Feed '%s' does not need updating, skipping.", feed.Name) + return false + } + return true +} + +func (feed *Feed) Success() bool { + return feed.feed != nil +} diff --git a/internal/feed/parse.go b/internal/feed/parse.go index 35a7596..6deebb2 100644 --- a/internal/feed/parse.go +++ b/internal/feed/parse.go @@ -32,7 +32,7 @@ func parseFeed(feed *Feed) error { return nil } -func handleFeed(feed *Feed, group *sync.WaitGroup, success chan<- bool) { +func handleFeed(feed *Feed, group *sync.WaitGroup) { defer group.Done() log.Printf("Fetching %s from %s", feed.Name, feed.Url) @@ -40,25 +40,17 @@ func handleFeed(feed *Feed, group *sync.WaitGroup, success chan<- bool) { if err != nil { log.Error(err) } - success <- err == nil } -func Parse(feeds Feeds) int { - var wg sync.WaitGroup - wg.Add(len(feeds)) - - success := make(chan bool, len(feeds)) - - for _, feed := range feeds { - go handleFeed(feed, &wg, success) - } - - wg.Wait() - close(success) +func (feeds Feeds) Parse() int { + feeds.ForeachGo(handleFeed) ctr := 0 - for s := range success { - if s { + for _, feed := range feeds.feeds { + success := feed.Success() + feed.cached.Checked(!success) + + if success { ctr++ } } |