first import

git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@5 f70e237a-67f3-0310-a06c-d2b8a7116972
author: lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972> 2005-03-31 22:08:32 +0000
committer: lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972> 2005-03-31 22:08:32 +0000
commit: 16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5 (patch)
tree: fcee2e08574f55e141eeea3cb2747a4a80c04d89 /lib
parent: 94c2f3339fbe18700fcc057367784d04bb2a76d9 (diff)
download: feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.gz
feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.bz2
feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.zip
10 files changed, 1104 insertions, 0 deletions
diff --git a/lib/feed2imap.rb b/lib/feed2imap.rb
new file mode 100644
index 0000000..7168268
--- /dev/null
+++ b/lib/feed2imap.rb
@@ -0,0 +1,8 @@
+require 'feed2imap/cache'
+require 'feed2imap/channel'
+require 'feed2imap/config'
+require 'feed2imap/httpfetcher'
+require 'feed2imap/imap'
+require 'feed2imap/rexml_patch'
+require 'feed2imap/rubymail_patch'
+require 'feed2imap/textconverters'
diff --git a/lib/feed2imap/cache.rb b/lib/feed2imap/cache.rb
new file mode 100644
index 0000000..1534483
--- /dev/null
+++ b/lib/feed2imap/cache.rb
@@ -0,0 +1,176 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+# This class manages a cache of items
+# (items which have already been seen)
+
+require 'digest/md5'
+
+class ItemCache
+  def initialize
+    @channels = {}
+    @@cacheidx = 0
+    self
+  end
+
+  # Returns the really new items amongst items
+  def get_new_items(id, items)
+    @channels[id] ||= CachedChannel::new
+    return @channels[id].get_new_items(items)
+  end
+
+  # Replace the existing cached items by those ones
+  def update_cache(id, items)
+    @channels[id] ||= CachedChannel::new
+    @channels[id].update(items)
+  end
+
+  # Get the last time the cache was updated
+  def get_last_check(id)
+    @channels[id] ||= CachedChannel::new
+    @channels[id].lastcheck
+  end
+
+  # Get the last time the cache was updated
+  def set_last_check(id, time)
+    @channels[id] ||= CachedChannel::new
+    @channels[id].lastcheck = time
+    self
+  end
+
+  # Load the cache from an IO stream
+  def load(io)
+    begin
+      @@cacheidx, @channels = Marshal.load(io)
+    rescue
+      @channels = Marshal.load(io)
+      @@cacheidx = 0
+    end
+  end
+
+  # Save the cache to an IO stream
+  def save(io)
+    Marshal.dump([@@cacheidx, @channels], io)
+  end
+  
+  # Return the number of channels in the cache
+  def nbchannels
+    @channels.length
+  end
+
+  # Return the number of items in the cache
+  def nbitems
+    nb = 0
+    @channels.each_value { |c|
+      nb += c.nbitems
+    }
+    nb
+  end
+
+  def ItemCache.getindex
+    i = @@cacheidx
+    @@cacheidx += 1
+    i
+  end
+end
+
+class CachedChannel
+  attr_accessor :lastcheck, :items
+
+  def initialize
+    @lastcheck = Time::at(0)
+    @items = []
+  end
+
+  # Returns the really new items amongst items
+  def get_new_items(items)
+    # set items' cached version if not set yet
+    newitems = []
+    updateditems = []
+    items.each { |i| i.cacheditem ||= CachedItem::new(i) }
+    items.each do |i|
+      # TODO rewrite with the fact that break can return a value
+      found = false
+      # Try to find a perfect match
+      @items.each do |j|
+        if i.cacheditem == j
+          i.cacheditem.index = j.index
+          found = true
+          break
+        end
+      end
+      next if found
+      # Try to find an updated item
+      @items.each do |j|
+        if i.link and i.link == j.link
+          # TODO use a better heuristic ?
+          i.cacheditem.index = j.index
+          i.cacheditem.updated = true
+          updateditems.push(i)
+          found = true
+          break
+        end
+      end
+      next if found
+      # add as new
+      i.cacheditem.create_index
+      newitems.push(i)
+    end
+    return [newitems, updateditems]
+  end
+
+  # Replace the existing cached items by those ones
+  def update(items)
+    @items = []
+    items.each do |i|
+      @items.push(i.cacheditem)
+    end
+    self
+  end
+
+  # returns the number of items
+  def nbitems
+    @items.length
+  end
+end
+
+# This class is the only thing kept in the cache
+class CachedItem
+  attr_reader :title, :link, :hash
+  attr_accessor :index
+  attr_accessor :updated
+
+  def initialize(item)
+    @title = item.title
+    @link = item.link
+    if item.content.nil?
+      @hash = nil
+    else
+      @hash = Digest::MD5.hexdigest(item.content.to_s)
+    end
+  end
+
+  def ==(other)
+    @title == other.title and @link == other.link and @hash == other.hash
+  end
+
+  def create_index
+    @index = ItemCache.getindex
+  end
+end
diff --git a/lib/feed2imap/channel.rb b/lib/feed2imap/channel.rb
new file mode 100644
index 0000000..ae83d18
--- /dev/null
+++ b/lib/feed2imap/channel.rb
@@ -0,0 +1,326 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+# This class allows to retrieve a feed and parse it into a Channel
+
+require 'rexml/document'
+require 'time'
+require 'rmail'
+require 'feed2imap/textconverters'
+require 'feed2imap/rubymail_patch'
+require 'feed2imap/rexml_patch'
+
+class UnknownFeedTypeException < RuntimeError
+end
+# an RSS/Atom channel
+class Channel
+  attr_reader :title, :link, :description, :creator, :encoding, :items
+
+  # parse str to build a channel
+  def initialize(str = nil)
+    parse_str(str) if str
+  end
+
+  # Determines all the fields using a string containing an
+  # XML document
+  def parse_str(str)
+    # Dirty hack: some feeds contain the & char. It must be changed to &amp;
+    str.gsub!(/&(\s+)/, '&amp;\1')
+    doc = REXML::Document.new(str)
+    # get channel info
+    @encoding = doc.encoding
+    @title,@link,@description,@creator = nil
+    @items = []
+    if doc.root.elements['channel'] || doc.root.elements['rss:channel']
+      # We have a RSS feed!
+      # Title
+      if (e = doc.root.elements['channel/title'] ||
+        doc.root.elements['rss:channel/rss:title']) && e.text
+        @title = e.text.toUTF8(@encoding).rmWhiteSpace!
+      end
+      # Link
+      if (e = doc.root.elements['channel/link'] ||
+          doc.root.elements['rss:channel/rss:link']) && e.text
+        @link = e.text.rmWhiteSpace!
+      end
+      # Description
+      if (e = doc.root.elements['channel/description'] || 
+          doc.root.elements['rss:channel/rss:description']) && e.text
+        @description = e.text.toUTF8(@encoding).rmWhiteSpace!
+      end
+      # Creator
+      if ((e = doc.root.elements['channel/dc:creator']) && e.text) ||
+          ((e = doc.root.elements['channel/author'] ||
+          doc.root.elements['rss:channel/rss:author']) && e.text)
+        @creator = e.text.toUTF8(@encoding).rmWhiteSpace!
+      end
+      # Items
+      if doc.root.elements['channel/item']
+        query = 'channel/item'
+      elsif doc.root.elements['item']
+        query = 'item'
+      elsif doc.root.elements['rss:channel/rss:item']
+        query = 'rss:channel/rss:item'
+      else
+        query = 'rss:item'
+      end
+      doc.root.each_element(query) { |e| @items << Item::new(e, self) }
+
+    elsif doc.root.elements['/feed']
+      # We have an ATOM feed!
+      # Title
+      if (e = doc.root.elements['/feed/title']) && e.text
+        @title = e.text.toUTF8(@encoding).rmWhiteSpace!
+      end
+      # Link
+      doc.root.each_element('/feed/link') do |e|
+        if e.attribute('type').value == 'text/html' or
+          e.attribute('type').value == 'application/xhtml' or
+          e.attribute('type').value == 'application/xhtml+xml'
+          if (h = e.attribute('href')) && h
+            @link = h.value.rmWhiteSpace!
+          end
+        end
+      end
+      # Description
+      if e = doc.root.elements['/feed/info']
+        @description = e.elements.to_s.toUTF8(@encoding).rmWhiteSpace!
+      end
+      # Items
+      doc.root.each_element('/feed/entry') do |e|
+         @items << AtomItem::new(e, self)
+      end
+    else
+      raise UnknownFeedTypeException::new
+    end
+  end
+
+  def to_s
+    s = "Title: #{@title}\nLink: #{@link}\n\n"
+    @items.each { |i| s += i.to_s }
+    s
+  end
+end
+
+# an Item from a channel
+class Item
+  attr_accessor :title, :link, :content, :date, :creator, :subject,
+                :category, :cacheditem
+  attr_reader :channel
+
+  def initialize(item = nil, channel = nil)
+    @channel = channel
+    @title, @link, @content, @date, @creator, @subject, @category = nil
+    if item
+      # Title
+      if ((e = item.elements['title'] || item.elements['rss:title']) &&
+          e.text)  ||
+          ((e = item.elements['pubDate'] || item.elements['rss:pubDate']) &&
+           e.text)
+        @title = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+      end
+      # Link
+      if ((e = item.elements['link'] || item.elements['rss:link']) && e.text)||
+          (e = item.elements['guid'] || item.elements['rss:guid'] and
+          not (e.attribute('isPermaLink') and
+          e.attribute('isPermaLink').value == 'false'))
+        @link = e.text.rmWhiteSpace!
+      end
+      # Content
+      if (e = item.elements['content:encoded']) ||
+        (e = item.elements['description'] || item.elements['rss:description'])
+        if e.cdatas[0]
+          @content = e.cdatas[0].to_s.toUTF8(@channel.encoding).rmWhiteSpace!
+        elsif e.text
+          @content = e.text.toUTF8(@channel.encoding).text2html
+        end
+      end
+      # Date
+      if e = item.elements['dc:date'] || item.elements['pubDate'] || 
+          item.elements['rss:pubDate']
+        begin
+          @date = Time::xmlschema(e.text)
+        rescue
+          begin
+            @date = Time::rfc2822(e.text)
+          rescue
+            begin
+              @date = Time::parse(e.text)
+            rescue
+              @date = nil
+            end
+          end
+        end
+      end
+      # Creator
+      @creator = @channel.creator
+      if (e = item.elements['dc:creator'] || item.elements['author'] ||
+          item.elements['rss:author']) && e.text
+        @creator = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+      end
+      # Subject
+      if (e = item.elements['dc:subject']) && e.text
+        @subject = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+      end
+      # Category
+      if (e = item.elements['dc:category'] || item.elements['category'] ||
+          item.elements['rss:category']) && e.text
+        @category = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+      end
+    end
+  end
+
+  def to_s
+    "--------------------------------\n" +
+      "Title: #{@title}\nLink: #{@link}\n" +
+      "Date: #{@date.to_s}\nCreator: #{@creator}\n" +
+      "Subject: #{@subject}\nCategory: #{@category}\nContent:\n#{content}\n"
+  end
+
+  def to_text
+    s = ""
+    s += "Channel: "
+    s += @channel.title + ' ' if @channel.title
+    s += "<#{@channel.link}>" if @channel.link
+    s += "\n"
+    s += "Item: "
+    s += @title + ' ' if @title
+    s += "<#{@link}>" if @link
+    s += "\n"
+    s += "\nDate: #{@date.to_s}" if @date # TODO improve date rendering ?
+    s += "\nAuthor: #{@creator}" if @creator
+    s += "\nSubject: #{@subject}" if @subject
+    s += "\nCategory: #{@category}" if @category
+    s += "\n\n"
+    s += "#{@content.html2text}" if @content
+    s
+  end
+
+  def to_html
+    s = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
+    s += '<html>'
+    s += '<body>'
+    s += "<p>Channel: "
+    s += "<a href=\"#{@channel.link}\">" if @channel.link
+    s += @channel.title if @channel.title
+    s += "</a>" if @channel.link
+    s += "<br/>\nItem: "
+    s += "<a href=\"#{@link}\">" if @link
+    s += @title if @title
+    s += "</a>" if @link
+    s += "\n"
+    s += "<br/>Date: #{@date.to_s}" if @date # TODO improve date rendering ?
+    s += "<br/>Author: #{@creator}" if @creator
+    s += "<br/>Subject: #{@subject}" if @subject
+    s += "<br/>Category: #{@category}" if @category
+    s += "</p>"
+    s += "<p>#{@content}</p>" if @content
+    s += '</body></html>'
+    s
+  end
+
+  # TODO from significatif
+  def to_mail(from = 'Feed2Imap')
+    message = RMail::Message::new
+    message.header['From'] = "#{from} <feed2imap@feed2imap.net>"
+    message.header['To'] = "#{from} <feed2imap@feed2imap.net>"
+    if @date.nil?
+      message.header['Date'] = Time::new.rfc2822
+    else
+      message.header['Date'] = @date.rfc2822
+    end
+    message.header['X-Feed2Imap-Version'] = F2I_VERSION if defined?(F2I_VERSION)
+    message.header['X-CacheIndex'] = "-#{@cacheditem.index}-"
+    message.header['X-F2IStatus'] = "Updated" if @cacheditem.updated
+    # TODO encode in ISO ?
+    if @title
+      message.header['Subject'] = @title
+    elsif @date
+      message.header['Subject'] = @date.to_s
+    elsif @link
+      message.header['Subject'] = @link
+    end
+    textpart = RMail::Message::new
+    textpart.header['Content-Type'] = 'text/plain; charset=UTF-8; format=flowed'
+    textpart.header['Content-Transfer-Encoding'] = '7bit'
+    textpart.body = to_text
+    htmlpart = RMail::Message::new
+    htmlpart.header['Content-Type'] = 'text/html; charset=UTF-8'
+    htmlpart.header['Content-Transfer-Encoding'] = '7bit'
+    htmlpart.body = to_html
+    message.add_part(textpart)
+    message.add_part(htmlpart)
+    return message.to_s
+  end
+end
+
+class AtomItem < Item
+  def initialize(item = nil, channel = nil)
+    @channel = channel
+    @title, @link, @content, @date, @creator, @subject, @category = nil
+    if item
+      # Title
+      if (e = item.elements['title']) && e.text
+        @title = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+      end
+      # Link
+      item.each_element('link') do |e|
+        if e.attribute('type').value == 'text/html' or
+          e.attribute('type').value == 'application/xhtml' or
+          e.attribute('type').value == 'application/xhtml+xml'
+          if (h = e.attribute('href')) && h.value
+            @link = h.value
+          end
+        end
+      end
+      # Content
+      if e = item.elements['content'] || item.elements['summary']
+        if (e.attribute('mode') and e.attribute('mode').value == 'escaped') &&
+          e.text
+          @content = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+        else
+          # go one step deeper in the recursion if possible
+          e = e.elements['div'] || e
+          @content = e.to_s.toUTF8(@channel.encoding).rmWhiteSpace!
+        end
+      end
+      # Date
+      if (e = item.elements['issued'] || e = item.elements['created']) && e.text
+        begin
+          @date = Time::xmlschema(e.text)
+        rescue
+          begin
+            @date = Time::rfc2822(e.text)
+          rescue
+            begin
+              @date = Time::parse(e.text)
+            rescue
+              @date = nil
+            end
+          end
+        end
+      end
+      # Creator
+      @creator = @channel.creator
+      if (e = item.elements['author/name']) && e.text
+        @creator = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+      end
+    end
+  end
+end
diff --git a/lib/feed2imap/config.rb b/lib/feed2imap/config.rb
new file mode 100644
index 0000000..8129fd2
--- /dev/null
+++ b/lib/feed2imap/config.rb
@@ -0,0 +1,76 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+require 'yaml'
+require 'uri'
+require 'feed2imap/imap'
+
+# Default cache file
+DEFCACHE = ENV['HOME'] + '/.feed2imap.cache'
+
+# Feed2imap configuration
+class F2IConfig
+  attr_reader :imap_accounts, :cache, :feeds
+
+  # Load the configuration from the IO stream
+  # TODO should do some sanity check on the data read.
+  def initialize(io)
+    @conf = YAML::load(io)
+    @cache = @conf['cache'] || DEFCACHE
+    @conf['feeds'] ||= []
+    @feeds = []
+    @imap_accounts = ImapAccounts::new
+    @conf['feeds'].each { |f|
+      uri = URI::parse(f['target'])
+      path = uri.path
+      path = path[1..-1] if path[0,1] == '/'
+      @feeds.push(ConfigFeed::new(f['name'], f['url'],
+                                  @imap_accounts.add_account(uri), path))
+    }
+  end
+
+  def to_s
+    s =  "Your Feed2Imap config :\n"
+    s += "=======================\n"
+    s += "Cache file: #{@cache}\n\n"
+    s += "Imap accounts I'll have to connect to :\n"
+    s += "---------------------------------------\n"
+    @imap_accounts.each_value { |i| s += i.to_s + "\n" }
+    s += "\nFeeds :\n"
+    s +=   "-------\n"
+    i = 1
+    @feeds.each do |f|
+      s += "#{i}. #{f.name}\n"
+      s += "    URL: #{f.url}\n"
+      s += "    IMAP Account: #{f.imapaccount}\n"
+      s += "    Folder: #{f.folder}\n\n"
+      i += 1
+    end
+    s
+  end
+end
+
+# A configured feed. simple data container.
+class ConfigFeed
+  attr_reader :name, :url, :imapaccount, :folder
+
+  def initialize(name, url, imapaccount, folder)
+    @name, @url, @imapaccount, @folder = name, url, imapaccount, folder
+  end
+end
diff --git a/lib/feed2imap/feed2imap.rb b/lib/feed2imap/feed2imap.rb
new file mode 100644
index 0000000..0f09c51
--- /dev/null
+++ b/lib/feed2imap/feed2imap.rb
@@ -0,0 +1,135 @@
+#!/usr/bin/ruby
+
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+require 'feed2imap/config'
+require 'feed2imap/cache'
+require 'feed2imap/channel'
+require 'feed2imap/httpfetcher'
+require 'logger'
+
+# Feed2Imap version
+F2I_VERSION = '0.1'
+
+class Feed2Imap
+  def initialize(verbose, cacherebuild, configfile)
+    @logger = Logger::new(STDOUT)
+    if verbose
+      @logger.level = Logger::DEBUG
+    else
+      @logger.level = Logger::WARN
+    end
+    @logger.info("Feed2Imap V.#{F2I_VERSION} started")
+    # reading config
+    @logger.info('Reading configuration file')
+    if not File::exist?(configfile)
+      @logger.fatal("Configuration file #{configfile} not found.")
+      exit(1)
+    end
+    begin
+      File::open(configfile) { 
+        |f| @config = F2IConfig::new(f)
+      }
+    rescue
+      @logger.fatal("Error while reading configuration file, exiting: #{$!}")
+      exit(1)
+    end
+    # init cache
+    @logger.info('Initializing cache')
+    @cache = ItemCache::new
+    if not File::exist?(@config.cache) 
+      @logger.warn("Cache file #{@config.cache} not found, using a new one")
+    else
+      File::open(@config.cache) { |f| @cache.load(f) }
+    end
+    # connecting all IMAP accounts
+    @logger.info('Connecting to IMAP accounts')
+    @config.imap_accounts.each_value do |ac|
+      begin
+        ac.connect
+      rescue
+        @logger.fatal("Error while connecting to #{ac}, exiting: #{$!}")
+        exit(1)
+      end
+    end
+    # for each feed, fetch, upload to IMAP and cache
+    @config.feeds.each do |f|
+      @logger.info("Processing #{f.url}")
+      begin
+        # check that folder exist
+        f.imapaccount.create_folder(f.folder) if not f.imapaccount.folder_exist?(f.folder)
+      rescue
+        @logger.fatal("Error while creating IMAP folder #{f.folder}: #{$!}")
+        exit(1)
+      end
+      begin
+        body = HTTPFetcher::fetch(f.url, @cache.get_last_check(f.name))
+      rescue Timeout::Error
+        @logger.fatal("Timeout::Error while fetching #{f.url}: #{$!}")
+        next
+      rescue
+        @logger.fatal("Error while fetching #{f.url}: #{$!}")
+        next
+      end
+      next if body.nil? # means 304
+      begin
+        channel = Channel::new(body)
+      rescue
+        @logger.fatal("Error while parsing #{f.url}: #{$!}")
+        next
+      end
+      begin
+        newitems, updateditems = @cache.get_new_items(f.name, channel.items)
+      rescue
+        @logger.fatal("Exception caught when selecting new items for #{f.url}: #{$!}")
+        puts $!.backtrace
+        next
+      end
+      @logger.info("#{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0
+      begin
+        if !cacherebuild
+          updateditems.each { |i| f.imapaccount.updatemail(f.folder, i.to_mail(f.name), i.cacheditem.index) }
+          newitems.each { |i| f.imapaccount.putmail(f.folder, i.to_mail(f.name)) }
+        end
+      rescue
+        @logger.fatal("Exception caught while uploading mail to #{f.folder}: #{$!}")
+        next
+      end
+      begin
+        @cache.update_cache(f.name, channel.items)
+      rescue
+        @logger.fatal("Exception caught while updating cache for #{f.name}: #{$!}")
+        next
+      end
+    end
+    @logger.info("Finished. Saving cache")
+    begin
+      File::open(@config.cache, 'w') { |f| @cache.save(f) }
+    rescue
+      @logger.fatal("Exception caught while writing cache to #{@config.cache}: #{$!}")
+    end
+    @logger.info("Closing IMAP connections")
+    begin
+      @config.imap_accounts.each_value { |ac| ac.disconnect }
+    rescue
+      @logger.fatal("Exception caught while closing connection to #{ac.to_s}: #{$!}")
+    end
+  end
+end
diff --git a/lib/feed2imap/httpfetcher.rb b/lib/feed2imap/httpfetcher.rb
new file mode 100644
index 0000000..6c72bf3
--- /dev/null
+++ b/lib/feed2imap/httpfetcher.rb
@@ -0,0 +1,89 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+require 'net/http'
+# get openssl if available
+begin
+  require 'openssl'
+rescue
+end
+require 'uri'
+
+# Class used to retrieve the feed over HTTP
+# TODO non standard port, authentification
+# TODO don't use If-Mod-Since if = 0
+
+if defined?(F2I_VERSION)
+  USERAGENT = 'Feed2Imap v#{F2I_VERSION} http://home.gna.org/feed2imap/'
+else
+  USERAGENT = 'Feed2Imap http://home.gna.org/feed2imap/'
+end
+
+class HTTPFetcher
+  def HTTPFetcher::fetcher(baseuri, uri, lastcheck, recursion)
+    if uri.scheme == 'http'
+      http = Net::HTTP::new(uri.host, uri.port)
+    else
+      http = Net::HTTPS::new(uri.host, uri.port)
+    end
+    req = Net::HTTP::Get::new(uri.request_uri, {'User-Agent' => USERAGENT, 'If-Modified-Since' => lastcheck.httpdate})
+    if uri.userinfo
+      login, pw = uri.userinfo.split(':')
+      req.basic_auth(login, pw)
+    # workaround. eg. wikini redirects and loses auth info.
+    elsif uri.host == baseuri.host and baseuri.userinfo
+      login, pw = baseuri.userinfo.split(':')
+      req.basic_auth(login, pw)
+    end
+    begin
+      response = http.request(req)
+    rescue Timeout::Error
+      raise "Timeout while fetching #{uri.to_s}"
+    end
+    case response
+    when Net::HTTPSuccess
+      return response.body
+    when Net::HTTPRedirection
+      # if not modified
+      return nil if Net::HTTPNotModified === response
+      if recursion > 0
+        redir = URI::join(uri.to_s, response['location'])
+        return fetcher(baseuri, redir, lastcheck, recursion - 1)
+      end
+    end
+    # or raise en exception
+    response.error!
+  end
+    
+  def HTTPFetcher::fetch(url, lastcheck)
+    uri = URI::parse(url)
+    return HTTPFetcher::fetcher(uri, uri, lastcheck, 5)
+    http = Net::HTTP::new(uri.host)
+    response = http.get(uri.path, {'User-Agent' => USERAGENT, 'If-Modified-Since' => lastcheck.httpdate})
+    if response.class == Net::HTTPOK
+      return response.body
+    elsif response.class == Net::HTTPNotModified
+      return nil
+    elsif response.class == Net::HTTPNotFound
+      raise "Page not found (404)"
+    else
+      raise "Unknown response #{response.class}"
+    end
+  end
+end
diff --git a/lib/feed2imap/imap.rb b/lib/feed2imap/imap.rb
new file mode 100644
index 0000000..591f561
--- /dev/null
+++ b/lib/feed2imap/imap.rb
@@ -0,0 +1,118 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+# Imap connection handling
+require 'net/imap'
+begin
+  require 'openssl'
+rescue
+end
+require 'uri'
+
+# This class is a container of IMAP accounts.
+# Thanks to it, accounts are re-used : several feeds
+# using the same IMAP account will create only one
+# IMAP connection.
+class ImapAccounts < Hash
+  def add_account(uri)
+    u = URI::Generic::build({ :scheme => uri.scheme,
+                              :userinfo => uri.userinfo,
+                              :host => uri.host,
+                              :port => uri.port })
+    if not include?(u)
+      ac = ImapAccount::new(u)
+      self[u] = ac
+    end
+    return self[u]
+  end
+end
+
+# This class is an IMAP account, with the given fd
+# once the connection has been established
+class ImapAccount
+  attr_reader :uri
+
+  def initialize(uri)
+    @uri = uri
+  end
+
+  # connects to the IMAP server
+  # raises an exception if it fails
+  def connect
+    port = 143
+    usessl = false
+    if uri.scheme == 'imap'
+      port = 143
+      usessl = false
+    elsif uri.scheme == 'imaps'
+      port = 993
+      usessl = true
+    else
+      raise "Unknown scheme: #{uri.scheme}"
+    end
+    # use given port if port given
+    port = uri.port if uri.port 
+    @connection = Net::IMAP::new(uri.host, port, usessl)
+    user, password = uri.userinfo.split(':',2)
+    @connection.login(user, password)
+  end
+
+  # disconnect from the IMAP server
+  def disconnect
+    @connection.disconnect if @connection
+  end
+
+  # Returns true if the folder exist
+  def folder_exist?(folder)
+    return !@connection.list('', folder).nil?
+  end
+
+  # Creates the given folder
+  def create_folder(folder)
+    @connection.create(folder)
+    @connection.subscribe(folder)
+    self
+  end
+
+  # Put the mail in the given folder
+  # You should check whether the folder exist first.
+  def putmail(folder, mail)
+    # TODO check response
+    @connection.append(folder, mail)
+  end
+
+  def updatemail(folder, mail, idx)
+    # TODO check response
+    # TODO keep flags of deleted mail
+    @connection.select(folder)
+    searchres = @connection.search(['HEADER', 'X-CacheIndex', "-#{idx}-"])
+    if searchres.length == 1
+      @connection.store(searchres[0], "+FLAGS", [:Deleted])
+      @connection.expunge
+    elsif searchres.length != 0
+      raise "Search returned multiple results !!"
+    end
+    putmail(folder, mail)
+  end
+
+  def to_s
+    uri.to_s
+  end
+end
+
diff --git a/lib/feed2imap/rexml_patch.rb b/lib/feed2imap/rexml_patch.rb
new file mode 100644
index 0000000..3d919ae
--- /dev/null
+++ b/lib/feed2imap/rexml_patch.rb
@@ -0,0 +1,41 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+require 'feed2imap/textconverters'
+
+# Patch for REXML
+# Very ugly patch to make REXML error-proof.
+# The problem is REXML uses IConv, which isn't error-proof at all.
+# With those changes, it uses unpack/pack with some error handling
+module REXML
+  module Encoding
+    def decode(str)
+      return str.toUTF8(@encoding)
+    end
+
+    def encode(str)
+      return str
+    end
+
+    def encoding=(enc)
+      return if defined? @encoding and enc == @encoding
+      @encoding = enc || 'utf-8'
+    end
+  end
+end
diff --git a/lib/feed2imap/rubymail_patch.rb b/lib/feed2imap/rubymail_patch.rb
new file mode 100644
index 0000000..208228c
--- /dev/null
+++ b/lib/feed2imap/rubymail_patch.rb
@@ -0,0 +1,50 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+# Patches for ruby mail
+# The problem is it creates a mail with multipart/mixed (= for attachments), but I need
+# multipart/alternative. I just overwrite the two methods doing this.
+
+require 'rmail'
+
+module RMail
+  class Header
+    undef set_boundary
+    def set_boundary(boundary)
+      params = params_quoted('content-type')
+      params ||= {}
+      params['boundary'] = boundary
+      content_type = content_type()
+      content_type ||= "multipart/alternative"
+      delete('Content-Type')
+      add('Content-Type', content_type, nil, params)
+    end
+  end
+
+  class Message
+    # TODO find a way to avoid the warning. undef'ing initialize causes a warning.
+    def initialize
+      @header = RMail::Header.new
+      @body = nil
+      @epilogue = nil
+      @preamble = nil
+      @delimiters = nil
+    end
+  end
+end
diff --git a/lib/feed2imap/textconverters.rb b/lib/feed2imap/textconverters.rb
new file mode 100644
index 0000000..ba49193
--- /dev/null
+++ b/lib/feed2imap/textconverters.rb
@@ -0,0 +1,85 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+require 'uri' # for URI::regexp
+
+# This class provides various converters
+class String
+  # is this text HTML ? search for tags
+  def html?
+    return (self =~ /<p>/) || (self =~ /<br>/) || (self =~ /<br\s*(\/)?\s*>/)
+  end
+
+  # convert text to HTML
+  def text2html
+    text = self.clone
+    return text if text.html?
+    # paragraphs
+    text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
+    text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")
+    # uris
+    text.gsub!(/(#{URI::regexp(['http','ftp','https'])})/,
+        '<a href="\1">\1</a>')
+    text
+  end
+
+  # Convert an HTML text to plain text
+  def html2text
+    text = self.clone
+    # let's remove all CR
+    text.gsub!(/\n/, '')
+    # convert <p> and <br>
+    text.gsub!(/\s*<\/p>\s*/, '')
+    text.gsub!(/\s*<p(\s[^>]*)?>\s*/, "\n\n")
+    text.gsub!(/\s*<br(\s*)\/?(\s*)>\s*/, "\n")
+    # remove other tags
+    text.gsub!(/<[^>]*>/, '')
+    # remove leading and trailing whilespace
+    text.gsub!(/\A\s*/m, '')
+    text.gsub!(/\s*\Z/m, '')
+    text
+  end
+
+  # Remove white space around the text
+  def rmWhiteSpace!
+    return self.gsub!(/\A\s*/m, '').gsub!(/\s*\Z/m,'')
+  end
+
+  # Convert a text in inputenc to a text in UTF8
+  # must take care of wrong input locales
+  def toUTF8(inputenc)
+    if inputenc.downcase! != 'utf-8'
+      # it is said it is not UTF-8. Ensure it is REALLY not UTF-8
+      begin
+        if self.unpack('U*').pack('U*') == self
+          return self
+        end
+      rescue
+        # do nothing
+      end
+      begin
+        return self.unpack('C*').pack('U*')
+      rescue
+        return self #failsafe solution. but a dirty one :-)
+      end
+    else
+      return self
+    end
+  end
+end
author	lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>	2005-03-31 22:08:32 +0000
committer	lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>	2005-03-31 22:08:32 +0000
commit	16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5 (patch)
tree	fcee2e08574f55e141eeea3cb2747a4a80c04d89 /lib
parent	94c2f3339fbe18700fcc057367784d04bb2a76d9 (diff)
download	feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.gz feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.bz2 feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.zip