From ed0c0c41c728717de3a6afcdf071100a680649aa Mon Sep 17 00:00:00 2001 From: lnu Date: Sat, 14 May 2005 08:35:08 +0000 Subject: fixed a bug with mediawiki duplicates git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@26 f70e237a-67f3-0310-a06c-d2b8a7116972 --- lib/feed2imap/cache.rb | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'lib/feed2imap/cache.rb') diff --git a/lib/feed2imap/cache.rb b/lib/feed2imap/cache.rb index e270f05..006e99e 100644 --- a/lib/feed2imap/cache.rb +++ b/lib/feed2imap/cache.rb @@ -124,6 +124,24 @@ class CachedChannel updateditems = [] @itemstemp = @items items.each { |i| i.cacheditem ||= CachedItem::new(i) } + # remove dups + dups = true + while dups + dups = false + for i in 0...items.length do + for j in i+1...items.length do + if items[i].cacheditem.link == items[j].cacheditem.link + if UPDATEDDEBUG + puts "## Removed #{items[j].cacheditem.to_s}" + end + items.delete_at(j) + dups = true + break + end + end + break if dups + end + end # debug : dump interesting info to stdout. if UPDATEDDEBUG puts "-------Items downloaded :----------" @@ -147,8 +165,8 @@ class CachedChannel next if found # Try to find an updated item @items.each do |j| + # Do we need a better heuristic ? if i.link and i.link == j.link - # Do we need a better heuristic ? i.cacheditem.index = j.index i.cacheditem.updated = true updateditems.push(i) @@ -166,6 +184,12 @@ class CachedChannel # add i.cacheditem to @itemstemp @itemstemp.unshift(i.cacheditem) end + if UPDATEDDEBUG + puts "-------New items :----------" + newitems.each { |i| puts "#{i.cacheditem.to_s}" } + puts "-------Updated items :----------" + updateditems.each { |i| puts "#{i.cacheditem.to_s}" } + end return [newitems, updateditems] end -- cgit v1.2.3-54-g00ecf