aboutsummaryrefslogtreecommitdiff
path: root/internal/feed/cache_v1.go
diff options
context:
space:
mode:
authorRené 'Necoro' Neumann <necoro@necoro.eu>2021-02-22 22:54:43 +0100
committerRené 'Necoro' Neumann <necoro@necoro.eu>2021-02-22 22:54:43 +0100
commit7106d5a6e7585dce5fdd552cca30063dd352dc23 (patch)
tree88a9cb7150f86fcadb8f87e3a6d0892bf68c2251 /internal/feed/cache_v1.go
parentfb2aa9b1f04d509c8215c1fa6505a144482c343d (diff)
downloadfeed2imap-go-7106d5a6e7585dce5fdd552cca30063dd352dc23.tar.gz
feed2imap-go-7106d5a6e7585dce5fdd552cca30063dd352dc23.tar.bz2
feed2imap-go-7106d5a6e7585dce5fdd552cca30063dd352dc23.zip
Split cache and feed packages
Diffstat (limited to 'internal/feed/cache_v1.go')
-rw-r--r--internal/feed/cache_v1.go324
1 files changed, 0 insertions, 324 deletions
diff --git a/internal/feed/cache_v1.go b/internal/feed/cache_v1.go
deleted file mode 100644
index a80e81c..0000000
--- a/internal/feed/cache_v1.go
+++ /dev/null
@@ -1,324 +0,0 @@
-package feed
-
-import (
- "crypto/sha256"
- "encoding/base64"
- "encoding/hex"
- "fmt"
- "strconv"
- "strings"
- "time"
-
- "github.com/google/uuid"
-
- "github.com/Necoro/feed2imap-go/pkg/log"
- "github.com/Necoro/feed2imap-go/pkg/util"
-)
-
-const (
- v1Version Version = 1
- startFeedId uint64 = 1
-)
-
-type feedId uint64
-
-func (id feedId) String() string {
- return strconv.FormatUint(uint64(id), 16)
-}
-
-func idFromString(s string) feedId {
- id, _ := strconv.ParseUint(s, 16, 64)
- return feedId(id)
-}
-
-type v1Cache struct {
- Ids map[feedDescriptor]feedId
- NextId uint64
- Feeds map[feedId]*cachedFeed
-}
-
-type cachedFeed struct {
- id feedId // not saved, has to be set on loading
- LastCheck time.Time
- currentCheck time.Time
- NumFailures int // can't be named `Failures` b/c it'll collide with the interface
- Items []cachedItem
- newItems []cachedItem
-}
-
-type itemHash [sha256.Size]byte
-
-func (h itemHash) String() string {
- return hex.EncodeToString(h[:])
-}
-
-type cachedItem struct {
- Guid string
- Title string
- Link string
- Date time.Time
- UpdatedCache time.Time
- Hash itemHash
- ID uuid.UUID
-}
-
-func (item cachedItem) String() string {
- return fmt.Sprintf(`{
- ID: %s
- Title: %q
- Guid: %q
- Link: %q
- Date: %s
- Hash: %s
-}`,
- base64.RawURLEncoding.EncodeToString(item.ID[:]),
- item.Title, item.Guid, item.Link, util.TimeFormat(item.Date), item.Hash)
-}
-
-func (cf *cachedFeed) Checked(withFailure bool) {
- cf.currentCheck = time.Now()
- if withFailure {
- cf.NumFailures++
- } else {
- cf.NumFailures = 0
- }
-}
-
-func (cf *cachedFeed) Commit() {
- if cf.newItems != nil {
- cf.Items = cf.newItems
- cf.newItems = nil
- }
- cf.LastCheck = cf.currentCheck
-}
-
-func (cf *cachedFeed) Failures() int {
- return cf.NumFailures
-}
-
-func (cf *cachedFeed) Last() time.Time {
- return cf.LastCheck
-}
-
-func (cf *cachedFeed) ID() string {
- return cf.id.String()
-}
-
-func (cache *v1Cache) Version() Version {
- return v1Version
-}
-
-func (cache *v1Cache) Info() string {
- b := strings.Builder{}
- for descr, id := range cache.Ids {
- b.WriteString(fmt.Sprintf("%3s: %s (%s)\n", id.String(), descr.Name, descr.Url))
- }
- return b.String()
-}
-
-func (cache *v1Cache) SpecificInfo(i interface{}) string {
- id := idFromString(i.(string))
-
- b := strings.Builder{}
- feed := cache.Feeds[id]
-
- for descr, fId := range cache.Ids {
- if id == fId {
- b.WriteString(descr.Name)
- b.WriteString(" -- ")
- b.WriteString(descr.Url)
- b.WriteByte('\n')
- break
- }
- }
-
- b.WriteString(fmt.Sprintf(`
-Last Check: %s
-Num Failures: %d
-Num Items: %d
-`,
- util.TimeFormat(feed.LastCheck),
- feed.NumFailures,
- len(feed.Items)))
-
- for _, item := range feed.Items {
- b.WriteString("\n--------------------\n")
- b.WriteString(item.String())
- }
- return b.String()
-}
-
-func newV1Cache() *v1Cache {
- cache := v1Cache{
- Ids: map[feedDescriptor]feedId{},
- Feeds: map[feedId]*cachedFeed{},
- NextId: startFeedId,
- }
- return &cache
-}
-
-func (cache *v1Cache) transformToCurrent() (CacheImpl, error) {
- return cache, nil
-}
-
-func (cache *v1Cache) getItem(id feedId) CachedFeed {
- feed, ok := cache.Feeds[id]
- if !ok {
- feed = &cachedFeed{}
- cache.Feeds[id] = feed
- }
- feed.id = id
- return feed
-}
-
-func (cache *v1Cache) findItem(feed *Feed) CachedFeed {
- if feed.cached != nil {
- return feed.cached.(*cachedFeed)
- }
-
- fDescr := feed.descriptor()
- id, ok := cache.Ids[fDescr]
- if !ok {
- var otherId feedDescriptor
- changed := false
- for otherId, id = range cache.Ids {
- if otherId.Name == fDescr.Name {
- log.Warnf("Feed %s seems to have changed URLs: new '%s', old '%s'. Updating.",
- fDescr.Name, fDescr.Url, otherId.Url)
- changed = true
- break
- } else if otherId.Url == fDescr.Url {
- log.Warnf("Feed with URL '%s' seems to have changed its name: new '%s', old '%s'. Updating.",
- fDescr.Url, fDescr.Name, otherId.Name)
- changed = true
- break
- }
- }
- if changed {
- delete(cache.Ids, otherId)
- } else {
- id = feedId(cache.NextId)
- cache.NextId++
- }
-
- cache.Ids[fDescr] = id
- }
-
- item := cache.getItem(id)
- feed.cached = item
- return item
-}
-
-func (item *item) newCachedItem() cachedItem {
- var ci cachedItem
-
- ci.ID = item.itemId
- ci.Title = item.Item.Title
- ci.Link = item.Item.Link
- if item.DateParsed() != nil {
- ci.Date = *item.DateParsed()
- }
- ci.Guid = item.Item.GUID
-
- contentByte := []byte(item.Item.Description + item.Item.Content)
- ci.Hash = sha256.Sum256(contentByte)
-
- return ci
-}
-
-func (item *cachedItem) similarTo(other *cachedItem, ignoreHash bool) bool {
- return other.Title == item.Title &&
- other.Link == item.Link &&
- other.Date.Equal(item.Date) &&
- (ignoreHash || other.Hash == item.Hash)
-}
-
-func (cf *cachedFeed) deleteItem(index int) {
- copy(cf.Items[index:], cf.Items[index+1:])
- cf.Items[len(cf.Items)-1] = cachedItem{}
- cf.Items = cf.Items[:len(cf.Items)-1]
-}
-
-func (cf *cachedFeed) filterItems(items []item, ignoreHash, alwaysNew bool) []item {
- if len(items) == 0 {
- return items
- }
-
- cacheItems := make(map[cachedItem]*item, len(items))
- for idx := range items {
- // remove complete duplicates on the go
- cacheItems[items[idx].newCachedItem()] = &items[idx]
- }
- log.Debugf("%d items after deduplication", len(cacheItems))
-
- filtered := make([]item, 0, len(items))
- cacheadd := make([]cachedItem, 0, len(items))
- app := func(item *item, ci cachedItem, oldIdx *int) {
- if oldIdx != nil {
- item.updateOnly = true
- prevId := cf.Items[*oldIdx].ID
- ci.ID = prevId
- item.itemId = prevId
- log.Debugf("oldIdx: %d, prevId: %s, item.id: %s", *oldIdx, prevId, item.id())
- cf.deleteItem(*oldIdx)
- }
- filtered = append(filtered, *item)
- cacheadd = append(cacheadd, ci)
- }
-
-CACHE_ITEMS:
- for ci, item := range cacheItems {
- log.Debugf("Now checking %s", ci)
-
- if ci.Guid != "" {
- for idx, oldItem := range cf.Items {
- if oldItem.Guid == ci.Guid {
- log.Debugf("Guid matches with: %s", oldItem)
- if !oldItem.similarTo(&ci, ignoreHash) {
- item.addReason("guid (upd)")
- app(item, ci, &idx)
- } else {
- log.Debugf("Similar, ignoring item %s", base64.RawURLEncoding.EncodeToString(oldItem.ID[:]))
- }
-
- continue CACHE_ITEMS
- }
- }
-
- log.Debug("Found no matching GUID, including.")
- item.addReason("guid")
- app(item, ci, nil)
- continue
- }
-
- for idx, oldItem := range cf.Items {
- if oldItem.similarTo(&ci, ignoreHash) {
- log.Debugf("Similarity matches, ignoring: %s", oldItem)
- continue CACHE_ITEMS
- }
-
- if oldItem.Link == ci.Link {
- if alwaysNew {
- log.Debugf("Link matches, but `always-new`.")
- item.addReason("always-new")
- continue
- }
- log.Debugf("Link matches, updating: %s", oldItem)
- item.addReason("link (upd)")
- app(item, ci, &idx)
-
- continue CACHE_ITEMS
- }
- }
-
- log.Debugf("No match found, inserting.")
- item.addReason("new")
- app(item, ci, nil)
- }
-
- log.Debugf("%d items after filtering", len(filtered))
-
- cf.newItems = append(cacheadd, cf.Items...)
-
- return filtered
-}