From dd108ab5ffc959ebfaf326769e5fc74822b6647b Mon Sep 17 00:00:00 2001 From: lnu Date: Mon, 23 Oct 2006 16:21:20 +0000 Subject: git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@105 f70e237a-67f3-0310-a06c-d2b8a7116972 --- lib/feed2imap/cache.rb | 34 +++++++++++++++++++++++----- lib/feed2imap/config.rb | 6 +++-- lib/feed2imap/feed2imap.rb | 56 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 74 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/feed2imap/cache.rb b/lib/feed2imap/cache.rb index a101785..4687b76 100644 --- a/lib/feed2imap/cache.rb +++ b/lib/feed2imap/cache.rb @@ -34,13 +34,13 @@ class ItemCache end # Returns the really new items amongst items - def get_new_items(id, items, always_new = false) + def get_new_items(id, items, always_new = false, ignore_hash = false) if $updateddebug puts "=======================================================" puts "GET_NEW_ITEMS FOR #{id}... (#{Time::now})" end @channels[id] ||= CachedChannel::new - return @channels[id].get_new_items(items, always_new) + return @channels[id].get_new_items(items, always_new, ignore_hash) end # Commit changes to the cache @@ -59,9 +59,16 @@ class ItemCache def set_last_check(id, time) @channels[id] ||= CachedChannel::new @channels[id].lastcheck = time + @channels[id].failures = 0 self end + # Fetching failure. + # returns number of failures + def fetch_failed(id) + @channels[id].fetch_failed + end + # Load the cache from an IO stream def load(io) begin @@ -103,13 +110,14 @@ class CachedChannel # 100 items should be enough for everybody, even quite busy feeds CACHESIZE = 100 - attr_accessor :lastcheck, :items + attr_accessor :lastcheck, :items, :failures def initialize @lastcheck = Time::at(0) @items = [] @itemstemp = [] # see below @nbnewitems = 0 + @failures = 0 end # Let's explain @items and @itemstemp. @@ -123,7 +131,7 @@ class CachedChannel # of (old) items serialized. # Returns the really new items amongst items - def get_new_items(items, always_new = false) + def get_new_items(items, always_new = false, ignore_hash = false) # save number of new items @nbnewitems = items.length # set items' cached version if not set yet @@ -165,7 +173,10 @@ class CachedChannel found = false # Try to find a perfect match @items.each do |j| - if i.cacheditem == j + # note that simple_compare only CachedItem, not RSSItem, so we have to use + # j.simple_compare(i) and not i.simple_compare(j) + if (i.cacheditem == j and not ignore_hash) or + (j.simple_compare(i) and ignore_hash) i.cacheditem.index = j.index found = true # let's put j in front of itemstemp @@ -222,6 +233,12 @@ class CachedChannel def nbitems @items.length end + + def fetch_failed + @failures = 0 if @failures.nil? + @failures += 1 + return @failures + end end # This class is the only thing kept in the cache @@ -243,7 +260,7 @@ class CachedItem end def ==(other) - if $updateddebug and @title =~ /e325/ and other.title =~ /e325/ + if $updateddebug puts "Comparing #{self.to_s} and #{other.to_s}:" puts "Title: #{@title == other.title}" puts "Link: #{@link == other.link}" @@ -256,6 +273,11 @@ class CachedItem (@date.nil? or other.date.nil? or @date == other.date) and @hash == other.hash end + def simple_compare(other) + @title == other.title and @link == other.link and + (@creator.nil? or other.creator.nil? or @creator == other.creator) + end + def create_index @index = ItemCache.getindex end diff --git a/lib/feed2imap/config.rb b/lib/feed2imap/config.rb index 10c4d82..4ab522e 100644 --- a/lib/feed2imap/config.rb +++ b/lib/feed2imap/config.rb @@ -26,7 +26,7 @@ DEFCACHE = ENV['HOME'] + '/.feed2imap.cache' # Feed2imap configuration class F2IConfig - attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug + attr_reader :imap_accounts, :cache, :feeds, :dumpdir, :updateddebug, :max_failures # Load the configuration from the IO stream # TODO should do some sanity check on the data read. @@ -36,6 +36,7 @@ class F2IConfig @dumpdir = @conf['dumpdir'] || nil @conf['feeds'] ||= [] @feeds = [] + @max_failures = @conf['max-failures'].to_i || 5 @updateddebug = (@conf['debug-updated'] and @conf['debug-updated'] != 'false') @imap_accounts = ImapAccounts::new @conf['feeds'].each do |f| @@ -71,7 +72,7 @@ end # A configured feed. simple data container. class ConfigFeed - attr_reader :name, :url, :imapaccount, :folder, :always_new, :execurl, :filter + attr_reader :name, :url, :imapaccount, :folder, :always_new, :execurl, :filter, :ignore_hash attr_accessor :body def initialize(f, imapaccount, folder) @@ -83,6 +84,7 @@ class ConfigFeed @always_new = (f['always-new'] and f['always-new'] != 'false') @execurl = f['execurl'] @filter = f['filter'] + @ignore_hash = f['ignore-hash'] || false @freq = @freq.to_i if @freq end diff --git a/lib/feed2imap/feed2imap.rb b/lib/feed2imap/feed2imap.rb index 3cf46aa..2f62a80 100644 --- a/lib/feed2imap/feed2imap.rb +++ b/lib/feed2imap/feed2imap.rb @@ -36,14 +36,17 @@ class Feed2Imap def initialize(verbose, cacherebuild, configfile) @logger = Logger::new(STDOUT) - if verbose + if verbose == :debug @logger.level = Logger::DEBUG + require 'pp' + elsif verbose == true + @logger.level = Logger::INFO else @logger.level = Logger::WARN end @logger.info("Feed2Imap V.#{F2I_VERSION} started") # reading config - @logger.info('Reading configuration file') + @logger.info('Reading configuration file ...') if not File::exist?(configfile) @logger.fatal("Configuration file #{configfile} not found.") exit(1) @@ -60,8 +63,13 @@ class Feed2Imap @logger.fatal("Error while reading configuration file, exiting: #{$!}") exit(1) end + if @logger.level == Logger::DEBUG + @logger.debug("Configuration read:") + pp(@config) + end + # init cache - @logger.info('Initializing cache') + @logger.info('Initializing cache ...') @cache = ItemCache::new(@config.updateddebug) if not File::exist?(@config.cache + '.lock') f = File::new(@config.cache + '.lock', 'w') @@ -78,8 +86,9 @@ class Feed2Imap @cache.load(f) end end + # connecting all IMAP accounts - @logger.info('Connecting to IMAP accounts') + @logger.info('Connecting to IMAP accounts ...') @config.imap_accounts.each_value do |ac| begin ac.connect @@ -88,8 +97,9 @@ class Feed2Imap exit(1) end end + # check that IMAP folders exist - @logger.info("Checking IMAP folders") + @logger.info("Checking IMAP folders ...") @config.feeds.each do |f| begin f.imapaccount.create_folder(f.folder) if not f.imapaccount.folder_exist?(f.folder) @@ -99,7 +109,7 @@ class Feed2Imap end end # for each feed, fetch, upload to IMAP and cache - @logger.info("Fetching and filtering feeds") + @logger.info("Fetching and filtering feeds ...") ths = [] mutex = Mutex::new @config.feeds.each do |f| @@ -126,6 +136,8 @@ class Feed2Imap mutex.lock feed.body = s @cache.set_last_check(feed.name, Time::now) + else + @logger.debug("Feed #{feed.name} doesn't need to be checked again for now.") end mutex.unlock # dump if requested @@ -139,19 +151,34 @@ class Feed2Imap end rescue Timeout::Error mutex.synchronize do - @logger.fatal("Timeout::Error while fetching #{feed.url}: #{$!}") + n = @cache.fetch_failed(feed.name) + m = "Timeout::Error while fetching #{feed.url}: #{$!} (failed #{n} times)" + if n > @config.max_failures + @logger.fatal(m) + else + @logger.info(m) + end end rescue mutex.synchronize do - @logger.fatal("Error while fetching #{feed.url}: #{$!}") + n = @cache.fetch_failed(feed.name) + m = "Error while fetching #{feed.url}: #{$!} (failed #{n} times)" + if n > @config.max_failures + @logger.fatal(m) + else + @logger.info(m) + end end end end end ths.each { |t| t.join } - @logger.info("Parsing and uploading") + @logger.info("Parsing and uploading ...") @config.feeds.each do |f| - next if f.body.nil? # means 304 + if f.body.nil? # means 304 + @logger.debug("Feed #{f.name} did not change.") + next + end begin feed = FeedParser::Feed::new(f.body) rescue Exception => e @@ -159,13 +186,13 @@ class Feed2Imap next end begin - newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new) + newitems, updateditems = @cache.get_new_items(f.name, feed.items, f.always_new, f.ignore_hash) rescue @logger.fatal("Exception caught when selecting new items for #{f.name}: #{$!}") puts $!.backtrace next end - @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0 + @logger.info("#{f.name}: #{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0 or @logger.level == Logger::DEBUG begin if !cacherebuild updateditems.each do |i| @@ -190,17 +217,18 @@ class Feed2Imap next end end - @logger.info("Finished. Saving cache") + @logger.info("Finished. Saving cache ...") begin File::open(@config.cache, 'w') { |f| @cache.save(f) } rescue @logger.fatal("Exception caught while writing cache to #{@config.cache}: #{$!}") end - @logger.info("Closing IMAP connections") + @logger.info("Closing IMAP connections ...") @config.imap_accounts.each_value do |ac| begin ac.disconnect rescue + # servers tend to cause an exception to be raised here, hence the INFO level. @logger.info("Exception caught while closing connection to #{ac.to_s}: #{$!}") end end -- cgit v1.2.3-54-g00ecf