summaryrefslogtreecommitdiff
path: root/lib/feed2imap
diff options
context:
space:
mode:
authorlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2005-03-31 22:08:32 +0000
committerlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2005-03-31 22:08:32 +0000
commit16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5 (patch)
treefcee2e08574f55e141eeea3cb2747a4a80c04d89 /lib/feed2imap
parent94c2f3339fbe18700fcc057367784d04bb2a76d9 (diff)
downloadfeed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.gz
feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.bz2
feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.zip
first import
git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@5 f70e237a-67f3-0310-a06c-d2b8a7116972
Diffstat (limited to '')
-rw-r--r--lib/feed2imap.rb8
-rw-r--r--lib/feed2imap/cache.rb176
-rw-r--r--lib/feed2imap/channel.rb326
-rw-r--r--lib/feed2imap/config.rb76
-rw-r--r--lib/feed2imap/feed2imap.rb135
-rw-r--r--lib/feed2imap/httpfetcher.rb89
-rw-r--r--lib/feed2imap/imap.rb118
-rw-r--r--lib/feed2imap/rexml_patch.rb41
-rw-r--r--lib/feed2imap/rubymail_patch.rb50
-rw-r--r--lib/feed2imap/textconverters.rb85
10 files changed, 1104 insertions, 0 deletions
diff --git a/lib/feed2imap.rb b/lib/feed2imap.rb
new file mode 100644
index 0000000..7168268
--- /dev/null
+++ b/lib/feed2imap.rb
@@ -0,0 +1,8 @@
+require 'feed2imap/cache'
+require 'feed2imap/channel'
+require 'feed2imap/config'
+require 'feed2imap/httpfetcher'
+require 'feed2imap/imap'
+require 'feed2imap/rexml_patch'
+require 'feed2imap/rubymail_patch'
+require 'feed2imap/textconverters'
diff --git a/lib/feed2imap/cache.rb b/lib/feed2imap/cache.rb
new file mode 100644
index 0000000..1534483
--- /dev/null
+++ b/lib/feed2imap/cache.rb
@@ -0,0 +1,176 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+# This class manages a cache of items
+# (items which have already been seen)
+
+require 'digest/md5'
+
+class ItemCache
+ def initialize
+ @channels = {}
+ @@cacheidx = 0
+ self
+ end
+
+ # Returns the really new items amongst items
+ def get_new_items(id, items)
+ @channels[id] ||= CachedChannel::new
+ return @channels[id].get_new_items(items)
+ end
+
+ # Replace the existing cached items by those ones
+ def update_cache(id, items)
+ @channels[id] ||= CachedChannel::new
+ @channels[id].update(items)
+ end
+
+ # Get the last time the cache was updated
+ def get_last_check(id)
+ @channels[id] ||= CachedChannel::new
+ @channels[id].lastcheck
+ end
+
+ # Get the last time the cache was updated
+ def set_last_check(id, time)
+ @channels[id] ||= CachedChannel::new
+ @channels[id].lastcheck = time
+ self
+ end
+
+ # Load the cache from an IO stream
+ def load(io)
+ begin
+ @@cacheidx, @channels = Marshal.load(io)
+ rescue
+ @channels = Marshal.load(io)
+ @@cacheidx = 0
+ end
+ end
+
+ # Save the cache to an IO stream
+ def save(io)
+ Marshal.dump([@@cacheidx, @channels], io)
+ end
+
+ # Return the number of channels in the cache
+ def nbchannels
+ @channels.length
+ end
+
+ # Return the number of items in the cache
+ def nbitems
+ nb = 0
+ @channels.each_value { |c|
+ nb += c.nbitems
+ }
+ nb
+ end
+
+ def ItemCache.getindex
+ i = @@cacheidx
+ @@cacheidx += 1
+ i
+ end
+end
+
+class CachedChannel
+ attr_accessor :lastcheck, :items
+
+ def initialize
+ @lastcheck = Time::at(0)
+ @items = []
+ end
+
+ # Returns the really new items amongst items
+ def get_new_items(items)
+ # set items' cached version if not set yet
+ newitems = []
+ updateditems = []
+ items.each { |i| i.cacheditem ||= CachedItem::new(i) }
+ items.each do |i|
+ # TODO rewrite with the fact that break can return a value
+ found = false
+ # Try to find a perfect match
+ @items.each do |j|
+ if i.cacheditem == j
+ i.cacheditem.index = j.index
+ found = true
+ break
+ end
+ end
+ next if found
+ # Try to find an updated item
+ @items.each do |j|
+ if i.link and i.link == j.link
+ # TODO use a better heuristic ?
+ i.cacheditem.index = j.index
+ i.cacheditem.updated = true
+ updateditems.push(i)
+ found = true
+ break
+ end
+ end
+ next if found
+ # add as new
+ i.cacheditem.create_index
+ newitems.push(i)
+ end
+ return [newitems, updateditems]
+ end
+
+ # Replace the existing cached items by those ones
+ def update(items)
+ @items = []
+ items.each do |i|
+ @items.push(i.cacheditem)
+ end
+ self
+ end
+
+ # returns the number of items
+ def nbitems
+ @items.length
+ end
+end
+
+# This class is the only thing kept in the cache
+class CachedItem
+ attr_reader :title, :link, :hash
+ attr_accessor :index
+ attr_accessor :updated
+
+ def initialize(item)
+ @title = item.title
+ @link = item.link
+ if item.content.nil?
+ @hash = nil
+ else
+ @hash = Digest::MD5.hexdigest(item.content.to_s)
+ end
+ end
+
+ def ==(other)
+ @title == other.title and @link == other.link and @hash == other.hash
+ end
+
+ def create_index
+ @index = ItemCache.getindex
+ end
+end
diff --git a/lib/feed2imap/channel.rb b/lib/feed2imap/channel.rb
new file mode 100644
index 0000000..ae83d18
--- /dev/null
+++ b/lib/feed2imap/channel.rb
@@ -0,0 +1,326 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+# This class allows to retrieve a feed and parse it into a Channel
+
+require 'rexml/document'
+require 'time'
+require 'rmail'
+require 'feed2imap/textconverters'
+require 'feed2imap/rubymail_patch'
+require 'feed2imap/rexml_patch'
+
+class UnknownFeedTypeException < RuntimeError
+end
+# an RSS/Atom channel
+class Channel
+ attr_reader :title, :link, :description, :creator, :encoding, :items
+
+ # parse str to build a channel
+ def initialize(str = nil)
+ parse_str(str) if str
+ end
+
+ # Determines all the fields using a string containing an
+ # XML document
+ def parse_str(str)
+ # Dirty hack: some feeds contain the & char. It must be changed to &amp;
+ str.gsub!(/&(\s+)/, '&amp;\1')
+ doc = REXML::Document.new(str)
+ # get channel info
+ @encoding = doc.encoding
+ @title,@link,@description,@creator = nil
+ @items = []
+ if doc.root.elements['channel'] || doc.root.elements['rss:channel']
+ # We have a RSS feed!
+ # Title
+ if (e = doc.root.elements['channel/title'] ||
+ doc.root.elements['rss:channel/rss:title']) && e.text
+ @title = e.text.toUTF8(@encoding).rmWhiteSpace!
+ end
+ # Link
+ if (e = doc.root.elements['channel/link'] ||
+ doc.root.elements['rss:channel/rss:link']) && e.text
+ @link = e.text.rmWhiteSpace!
+ end
+ # Description
+ if (e = doc.root.elements['channel/description'] ||
+ doc.root.elements['rss:channel/rss:description']) && e.text
+ @description = e.text.toUTF8(@encoding).rmWhiteSpace!
+ end
+ # Creator
+ if ((e = doc.root.elements['channel/dc:creator']) && e.text) ||
+ ((e = doc.root.elements['channel/author'] ||
+ doc.root.elements['rss:channel/rss:author']) && e.text)
+ @creator = e.text.toUTF8(@encoding).rmWhiteSpace!
+ end
+ # Items
+ if doc.root.elements['channel/item']
+ query = 'channel/item'
+ elsif doc.root.elements['item']
+ query = 'item'
+ elsif doc.root.elements['rss:channel/rss:item']
+ query = 'rss:channel/rss:item'
+ else
+ query = 'rss:item'
+ end
+ doc.root.each_element(query) { |e| @items << Item::new(e, self) }
+
+ elsif doc.root.elements['/feed']
+ # We have an ATOM feed!
+ # Title
+ if (e = doc.root.elements['/feed/title']) && e.text
+ @title = e.text.toUTF8(@encoding).rmWhiteSpace!
+ end
+ # Link
+ doc.root.each_element('/feed/link') do |e|
+ if e.attribute('type').value == 'text/html' or
+ e.attribute('type').value == 'application/xhtml' or
+ e.attribute('type').value == 'application/xhtml+xml'
+ if (h = e.attribute('href')) && h
+ @link = h.value.rmWhiteSpace!
+ end
+ end
+ end
+ # Description
+ if e = doc.root.elements['/feed/info']
+ @description = e.elements.to_s.toUTF8(@encoding).rmWhiteSpace!
+ end
+ # Items
+ doc.root.each_element('/feed/entry') do |e|
+ @items << AtomItem::new(e, self)
+ end
+ else
+ raise UnknownFeedTypeException::new
+ end
+ end
+
+ def to_s
+ s = "Title: #{@title}\nLink: #{@link}\n\n"
+ @items.each { |i| s += i.to_s }
+ s
+ end
+end
+
+# an Item from a channel
+class Item
+ attr_accessor :title, :link, :content, :date, :creator, :subject,
+ :category, :cacheditem
+ attr_reader :channel
+
+ def initialize(item = nil, channel = nil)
+ @channel = channel
+ @title, @link, @content, @date, @creator, @subject, @category = nil
+ if item
+ # Title
+ if ((e = item.elements['title'] || item.elements['rss:title']) &&
+ e.text) ||
+ ((e = item.elements['pubDate'] || item.elements['rss:pubDate']) &&
+ e.text)
+ @title = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+ end
+ # Link
+ if ((e = item.elements['link'] || item.elements['rss:link']) && e.text)||
+ (e = item.elements['guid'] || item.elements['rss:guid'] and
+ not (e.attribute('isPermaLink') and
+ e.attribute('isPermaLink').value == 'false'))
+ @link = e.text.rmWhiteSpace!
+ end
+ # Content
+ if (e = item.elements['content:encoded']) ||
+ (e = item.elements['description'] || item.elements['rss:description'])
+ if e.cdatas[0]
+ @content = e.cdatas[0].to_s.toUTF8(@channel.encoding).rmWhiteSpace!
+ elsif e.text
+ @content = e.text.toUTF8(@channel.encoding).text2html
+ end
+ end
+ # Date
+ if e = item.elements['dc:date'] || item.elements['pubDate'] ||
+ item.elements['rss:pubDate']
+ begin
+ @date = Time::xmlschema(e.text)
+ rescue
+ begin
+ @date = Time::rfc2822(e.text)
+ rescue
+ begin
+ @date = Time::parse(e.text)
+ rescue
+ @date = nil
+ end
+ end
+ end
+ end
+ # Creator
+ @creator = @channel.creator
+ if (e = item.elements['dc:creator'] || item.elements['author'] ||
+ item.elements['rss:author']) && e.text
+ @creator = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+ end
+ # Subject
+ if (e = item.elements['dc:subject']) && e.text
+ @subject = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+ end
+ # Category
+ if (e = item.elements['dc:category'] || item.elements['category'] ||
+ item.elements['rss:category']) && e.text
+ @category = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+ end
+ end
+ end
+
+ def to_s
+ "--------------------------------\n" +
+ "Title: #{@title}\nLink: #{@link}\n" +
+ "Date: #{@date.to_s}\nCreator: #{@creator}\n" +
+ "Subject: #{@subject}\nCategory: #{@category}\nContent:\n#{content}\n"
+ end
+
+ def to_text
+ s = ""
+ s += "Channel: "
+ s += @channel.title + ' ' if @channel.title
+ s += "<#{@channel.link}>" if @channel.link
+ s += "\n"
+ s += "Item: "
+ s += @title + ' ' if @title
+ s += "<#{@link}>" if @link
+ s += "\n"
+ s += "\nDate: #{@date.to_s}" if @date # TODO improve date rendering ?
+ s += "\nAuthor: #{@creator}" if @creator
+ s += "\nSubject: #{@subject}" if @subject
+ s += "\nCategory: #{@category}" if @category
+ s += "\n\n"
+ s += "#{@content.html2text}" if @content
+ s
+ end
+
+ def to_html
+ s = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
+ s += '<html>'
+ s += '<body>'
+ s += "<p>Channel: "
+ s += "<a href=\"#{@channel.link}\">" if @channel.link
+ s += @channel.title if @channel.title
+ s += "</a>" if @channel.link
+ s += "<br/>\nItem: "
+ s += "<a href=\"#{@link}\">" if @link
+ s += @title if @title
+ s += "</a>" if @link
+ s += "\n"
+ s += "<br/>Date: #{@date.to_s}" if @date # TODO improve date rendering ?
+ s += "<br/>Author: #{@creator}" if @creator
+ s += "<br/>Subject: #{@subject}" if @subject
+ s += "<br/>Category: #{@category}" if @category
+ s += "</p>"
+ s += "<p>#{@content}</p>" if @content
+ s += '</body></html>'
+ s
+ end
+
+ # TODO from significatif
+ def to_mail(from = 'Feed2Imap')
+ message = RMail::Message::new
+ message.header['From'] = "#{from} <feed2imap@feed2imap.net>"
+ message.header['To'] = "#{from} <feed2imap@feed2imap.net>"
+ if @date.nil?
+ message.header['Date'] = Time::new.rfc2822
+ else
+ message.header['Date'] = @date.rfc2822
+ end
+ message.header['X-Feed2Imap-Version'] = F2I_VERSION if defined?(F2I_VERSION)
+ message.header['X-CacheIndex'] = "-#{@cacheditem.index}-"
+ message.header['X-F2IStatus'] = "Updated" if @cacheditem.updated
+ # TODO encode in ISO ?
+ if @title
+ message.header['Subject'] = @title
+ elsif @date
+ message.header['Subject'] = @date.to_s
+ elsif @link
+ message.header['Subject'] = @link
+ end
+ textpart = RMail::Message::new
+ textpart.header['Content-Type'] = 'text/plain; charset=UTF-8; format=flowed'
+ textpart.header['Content-Transfer-Encoding'] = '7bit'
+ textpart.body = to_text
+ htmlpart = RMail::Message::new
+ htmlpart.header['Content-Type'] = 'text/html; charset=UTF-8'
+ htmlpart.header['Content-Transfer-Encoding'] = '7bit'
+ htmlpart.body = to_html
+ message.add_part(textpart)
+ message.add_part(htmlpart)
+ return message.to_s
+ end
+end
+
+class AtomItem < Item
+ def initialize(item = nil, channel = nil)
+ @channel = channel
+ @title, @link, @content, @date, @creator, @subject, @category = nil
+ if item
+ # Title
+ if (e = item.elements['title']) && e.text
+ @title = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+ end
+ # Link
+ item.each_element('link') do |e|
+ if e.attribute('type').value == 'text/html' or
+ e.attribute('type').value == 'application/xhtml' or
+ e.attribute('type').value == 'application/xhtml+xml'
+ if (h = e.attribute('href')) && h.value
+ @link = h.value
+ end
+ end
+ end
+ # Content
+ if e = item.elements['content'] || item.elements['summary']
+ if (e.attribute('mode') and e.attribute('mode').value == 'escaped') &&
+ e.text
+ @content = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+ else
+ # go one step deeper in the recursion if possible
+ e = e.elements['div'] || e
+ @content = e.to_s.toUTF8(@channel.encoding).rmWhiteSpace!
+ end
+ end
+ # Date
+ if (e = item.elements['issued'] || e = item.elements['created']) && e.text
+ begin
+ @date = Time::xmlschema(e.text)
+ rescue
+ begin
+ @date = Time::rfc2822(e.text)
+ rescue
+ begin
+ @date = Time::parse(e.text)
+ rescue
+ @date = nil
+ end
+ end
+ end
+ end
+ # Creator
+ @creator = @channel.creator
+ if (e = item.elements['author/name']) && e.text
+ @creator = e.text.toUTF8(@channel.encoding).rmWhiteSpace!
+ end
+ end
+ end
+end
diff --git a/lib/feed2imap/config.rb b/lib/feed2imap/config.rb
new file mode 100644
index 0000000..8129fd2
--- /dev/null
+++ b/lib/feed2imap/config.rb
@@ -0,0 +1,76 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+require 'yaml'
+require 'uri'
+require 'feed2imap/imap'
+
+# Default cache file
+DEFCACHE = ENV['HOME'] + '/.feed2imap.cache'
+
+# Feed2imap configuration
+class F2IConfig
+ attr_reader :imap_accounts, :cache, :feeds
+
+ # Load the configuration from the IO stream
+ # TODO should do some sanity check on the data read.
+ def initialize(io)
+ @conf = YAML::load(io)
+ @cache = @conf['cache'] || DEFCACHE
+ @conf['feeds'] ||= []
+ @feeds = []
+ @imap_accounts = ImapAccounts::new
+ @conf['feeds'].each { |f|
+ uri = URI::parse(f['target'])
+ path = uri.path
+ path = path[1..-1] if path[0,1] == '/'
+ @feeds.push(ConfigFeed::new(f['name'], f['url'],
+ @imap_accounts.add_account(uri), path))
+ }
+ end
+
+ def to_s
+ s = "Your Feed2Imap config :\n"
+ s += "=======================\n"
+ s += "Cache file: #{@cache}\n\n"
+ s += "Imap accounts I'll have to connect to :\n"
+ s += "---------------------------------------\n"
+ @imap_accounts.each_value { |i| s += i.to_s + "\n" }
+ s += "\nFeeds :\n"
+ s += "-------\n"
+ i = 1
+ @feeds.each do |f|
+ s += "#{i}. #{f.name}\n"
+ s += " URL: #{f.url}\n"
+ s += " IMAP Account: #{f.imapaccount}\n"
+ s += " Folder: #{f.folder}\n\n"
+ i += 1
+ end
+ s
+ end
+end
+
+# A configured feed. simple data container.
+class ConfigFeed
+ attr_reader :name, :url, :imapaccount, :folder
+
+ def initialize(name, url, imapaccount, folder)
+ @name, @url, @imapaccount, @folder = name, url, imapaccount, folder
+ end
+end
diff --git a/lib/feed2imap/feed2imap.rb b/lib/feed2imap/feed2imap.rb
new file mode 100644
index 0000000..0f09c51
--- /dev/null
+++ b/lib/feed2imap/feed2imap.rb
@@ -0,0 +1,135 @@
+#!/usr/bin/ruby
+
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+require 'feed2imap/config'
+require 'feed2imap/cache'
+require 'feed2imap/channel'
+require 'feed2imap/httpfetcher'
+require 'logger'
+
+# Feed2Imap version
+F2I_VERSION = '0.1'
+
+class Feed2Imap
+ def initialize(verbose, cacherebuild, configfile)
+ @logger = Logger::new(STDOUT)
+ if verbose
+ @logger.level = Logger::DEBUG
+ else
+ @logger.level = Logger::WARN
+ end
+ @logger.info("Feed2Imap V.#{F2I_VERSION} started")
+ # reading config
+ @logger.info('Reading configuration file')
+ if not File::exist?(configfile)
+ @logger.fatal("Configuration file #{configfile} not found.")
+ exit(1)
+ end
+ begin
+ File::open(configfile) {
+ |f| @config = F2IConfig::new(f)
+ }
+ rescue
+ @logger.fatal("Error while reading configuration file, exiting: #{$!}")
+ exit(1)
+ end
+ # init cache
+ @logger.info('Initializing cache')
+ @cache = ItemCache::new
+ if not File::exist?(@config.cache)
+ @logger.warn("Cache file #{@config.cache} not found, using a new one")
+ else
+ File::open(@config.cache) { |f| @cache.load(f) }
+ end
+ # connecting all IMAP accounts
+ @logger.info('Connecting to IMAP accounts')
+ @config.imap_accounts.each_value do |ac|
+ begin
+ ac.connect
+ rescue
+ @logger.fatal("Error while connecting to #{ac}, exiting: #{$!}")
+ exit(1)
+ end
+ end
+ # for each feed, fetch, upload to IMAP and cache
+ @config.feeds.each do |f|
+ @logger.info("Processing #{f.url}")
+ begin
+ # check that folder exist
+ f.imapaccount.create_folder(f.folder) if not f.imapaccount.folder_exist?(f.folder)
+ rescue
+ @logger.fatal("Error while creating IMAP folder #{f.folder}: #{$!}")
+ exit(1)
+ end
+ begin
+ body = HTTPFetcher::fetch(f.url, @cache.get_last_check(f.name))
+ rescue Timeout::Error
+ @logger.fatal("Timeout::Error while fetching #{f.url}: #{$!}")
+ next
+ rescue
+ @logger.fatal("Error while fetching #{f.url}: #{$!}")
+ next
+ end
+ next if body.nil? # means 304
+ begin
+ channel = Channel::new(body)
+ rescue
+ @logger.fatal("Error while parsing #{f.url}: #{$!}")
+ next
+ end
+ begin
+ newitems, updateditems = @cache.get_new_items(f.name, channel.items)
+ rescue
+ @logger.fatal("Exception caught when selecting new items for #{f.url}: #{$!}")
+ puts $!.backtrace
+ next
+ end
+ @logger.info("#{newitems.length} new items, #{updateditems.length} updated items.") if newitems.length > 0 or updateditems.length > 0
+ begin
+ if !cacherebuild
+ updateditems.each { |i| f.imapaccount.updatemail(f.folder, i.to_mail(f.name), i.cacheditem.index) }
+ newitems.each { |i| f.imapaccount.putmail(f.folder, i.to_mail(f.name)) }
+ end
+ rescue
+ @logger.fatal("Exception caught while uploading mail to #{f.folder}: #{$!}")
+ next
+ end
+ begin
+ @cache.update_cache(f.name, channel.items)
+ rescue
+ @logger.fatal("Exception caught while updating cache for #{f.name}: #{$!}")
+ next
+ end
+ end
+ @logger.info("Finished. Saving cache")
+ begin
+ File::open(@config.cache, 'w') { |f| @cache.save(f) }
+ rescue
+ @logger.fatal("Exception caught while writing cache to #{@config.cache}: #{$!}")
+ end
+ @logger.info("Closing IMAP connections")
+ begin
+ @config.imap_accounts.each_value { |ac| ac.disconnect }
+ rescue
+ @logger.fatal("Exception caught while closing connection to #{ac.to_s}: #{$!}")
+ end
+ end
+end
diff --git a/lib/feed2imap/httpfetcher.rb b/lib/feed2imap/httpfetcher.rb
new file mode 100644
index 0000000..6c72bf3
--- /dev/null
+++ b/lib/feed2imap/httpfetcher.rb
@@ -0,0 +1,89 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+require 'net/http'
+# get openssl if available
+begin
+ require 'openssl'
+rescue
+end
+require 'uri'
+
+# Class used to retrieve the feed over HTTP
+# TODO non standard port, authentification
+# TODO don't use If-Mod-Since if = 0
+
+if defined?(F2I_VERSION)
+ USERAGENT = 'Feed2Imap v#{F2I_VERSION} http://home.gna.org/feed2imap/'
+else
+ USERAGENT = 'Feed2Imap http://home.gna.org/feed2imap/'
+end
+
+class HTTPFetcher
+ def HTTPFetcher::fetcher(baseuri, uri, lastcheck, recursion)
+ if uri.scheme == 'http'
+ http = Net::HTTP::new(uri.host, uri.port)
+ else
+ http = Net::HTTPS::new(uri.host, uri.port)
+ end
+ req = Net::HTTP::Get::new(uri.request_uri, {'User-Agent' => USERAGENT, 'If-Modified-Since' => lastcheck.httpdate})
+ if uri.userinfo
+ login, pw = uri.userinfo.split(':')
+ req.basic_auth(login, pw)
+ # workaround. eg. wikini redirects and loses auth info.
+ elsif uri.host == baseuri.host and baseuri.userinfo
+ login, pw = baseuri.userinfo.split(':')
+ req.basic_auth(login, pw)
+ end
+ begin
+ response = http.request(req)
+ rescue Timeout::Error
+ raise "Timeout while fetching #{uri.to_s}"
+ end
+ case response
+ when Net::HTTPSuccess
+ return response.body
+ when Net::HTTPRedirection
+ # if not modified
+ return nil if Net::HTTPNotModified === response
+ if recursion > 0
+ redir = URI::join(uri.to_s, response['location'])
+ return fetcher(baseuri, redir, lastcheck, recursion - 1)
+ end
+ end
+ # or raise en exception
+ response.error!
+ end
+
+ def HTTPFetcher::fetch(url, lastcheck)
+ uri = URI::parse(url)
+ return HTTPFetcher::fetcher(uri, uri, lastcheck, 5)
+ http = Net::HTTP::new(uri.host)
+ response = http.get(uri.path, {'User-Agent' => USERAGENT, 'If-Modified-Since' => lastcheck.httpdate})
+ if response.class == Net::HTTPOK
+ return response.body
+ elsif response.class == Net::HTTPNotModified
+ return nil
+ elsif response.class == Net::HTTPNotFound
+ raise "Page not found (404)"
+ else
+ raise "Unknown response #{response.class}"
+ end
+ end
+end
diff --git a/lib/feed2imap/imap.rb b/lib/feed2imap/imap.rb
new file mode 100644
index 0000000..591f561
--- /dev/null
+++ b/lib/feed2imap/imap.rb
@@ -0,0 +1,118 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+# Imap connection handling
+require 'net/imap'
+begin
+ require 'openssl'
+rescue
+end
+require 'uri'
+
+# This class is a container of IMAP accounts.
+# Thanks to it, accounts are re-used : several feeds
+# using the same IMAP account will create only one
+# IMAP connection.
+class ImapAccounts < Hash
+ def add_account(uri)
+ u = URI::Generic::build({ :scheme => uri.scheme,
+ :userinfo => uri.userinfo,
+ :host => uri.host,
+ :port => uri.port })
+ if not include?(u)
+ ac = ImapAccount::new(u)
+ self[u] = ac
+ end
+ return self[u]
+ end
+end
+
+# This class is an IMAP account, with the given fd
+# once the connection has been established
+class ImapAccount
+ attr_reader :uri
+
+ def initialize(uri)
+ @uri = uri
+ end
+
+ # connects to the IMAP server
+ # raises an exception if it fails
+ def connect
+ port = 143
+ usessl = false
+ if uri.scheme == 'imap'
+ port = 143
+ usessl = false
+ elsif uri.scheme == 'imaps'
+ port = 993
+ usessl = true
+ else
+ raise "Unknown scheme: #{uri.scheme}"
+ end
+ # use given port if port given
+ port = uri.port if uri.port
+ @connection = Net::IMAP::new(uri.host, port, usessl)
+ user, password = uri.userinfo.split(':',2)
+ @connection.login(user, password)
+ end
+
+ # disconnect from the IMAP server
+ def disconnect
+ @connection.disconnect if @connection
+ end
+
+ # Returns true if the folder exist
+ def folder_exist?(folder)
+ return !@connection.list('', folder).nil?
+ end
+
+ # Creates the given folder
+ def create_folder(folder)
+ @connection.create(folder)
+ @connection.subscribe(folder)
+ self
+ end
+
+ # Put the mail in the given folder
+ # You should check whether the folder exist first.
+ def putmail(folder, mail)
+ # TODO check response
+ @connection.append(folder, mail)
+ end
+
+ def updatemail(folder, mail, idx)
+ # TODO check response
+ # TODO keep flags of deleted mail
+ @connection.select(folder)
+ searchres = @connection.search(['HEADER', 'X-CacheIndex', "-#{idx}-"])
+ if searchres.length == 1
+ @connection.store(searchres[0], "+FLAGS", [:Deleted])
+ @connection.expunge
+ elsif searchres.length != 0
+ raise "Search returned multiple results !!"
+ end
+ putmail(folder, mail)
+ end
+
+ def to_s
+ uri.to_s
+ end
+end
+
diff --git a/lib/feed2imap/rexml_patch.rb b/lib/feed2imap/rexml_patch.rb
new file mode 100644
index 0000000..3d919ae
--- /dev/null
+++ b/lib/feed2imap/rexml_patch.rb
@@ -0,0 +1,41 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+require 'feed2imap/textconverters'
+
+# Patch for REXML
+# Very ugly patch to make REXML error-proof.
+# The problem is REXML uses IConv, which isn't error-proof at all.
+# With those changes, it uses unpack/pack with some error handling
+module REXML
+ module Encoding
+ def decode(str)
+ return str.toUTF8(@encoding)
+ end
+
+ def encode(str)
+ return str
+ end
+
+ def encoding=(enc)
+ return if defined? @encoding and enc == @encoding
+ @encoding = enc || 'utf-8'
+ end
+ end
+end
diff --git a/lib/feed2imap/rubymail_patch.rb b/lib/feed2imap/rubymail_patch.rb
new file mode 100644
index 0000000..208228c
--- /dev/null
+++ b/lib/feed2imap/rubymail_patch.rb
@@ -0,0 +1,50 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+# Patches for ruby mail
+# The problem is it creates a mail with multipart/mixed (= for attachments), but I need
+# multipart/alternative. I just overwrite the two methods doing this.
+
+require 'rmail'
+
+module RMail
+ class Header
+ undef set_boundary
+ def set_boundary(boundary)
+ params = params_quoted('content-type')
+ params ||= {}
+ params['boundary'] = boundary
+ content_type = content_type()
+ content_type ||= "multipart/alternative"
+ delete('Content-Type')
+ add('Content-Type', content_type, nil, params)
+ end
+ end
+
+ class Message
+ # TODO find a way to avoid the warning. undef'ing initialize causes a warning.
+ def initialize
+ @header = RMail::Header.new
+ @body = nil
+ @epilogue = nil
+ @preamble = nil
+ @delimiters = nil
+ end
+ end
+end
diff --git a/lib/feed2imap/textconverters.rb b/lib/feed2imap/textconverters.rb
new file mode 100644
index 0000000..ba49193
--- /dev/null
+++ b/lib/feed2imap/textconverters.rb
@@ -0,0 +1,85 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+require 'uri' # for URI::regexp
+
+# This class provides various converters
+class String
+ # is this text HTML ? search for tags
+ def html?
+ return (self =~ /<p>/) || (self =~ /<br>/) || (self =~ /<br\s*(\/)?\s*>/)
+ end
+
+ # convert text to HTML
+ def text2html
+ text = self.clone
+ return text if text.html?
+ # paragraphs
+ text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
+ text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")
+ # uris
+ text.gsub!(/(#{URI::regexp(['http','ftp','https'])})/,
+ '<a href="\1">\1</a>')
+ text
+ end
+
+ # Convert an HTML text to plain text
+ def html2text
+ text = self.clone
+ # let's remove all CR
+ text.gsub!(/\n/, '')
+ # convert <p> and <br>
+ text.gsub!(/\s*<\/p>\s*/, '')
+ text.gsub!(/\s*<p(\s[^>]*)?>\s*/, "\n\n")
+ text.gsub!(/\s*<br(\s*)\/?(\s*)>\s*/, "\n")
+ # remove other tags
+ text.gsub!(/<[^>]*>/, '')
+ # remove leading and trailing whilespace
+ text.gsub!(/\A\s*/m, '')
+ text.gsub!(/\s*\Z/m, '')
+ text
+ end
+
+ # Remove white space around the text
+ def rmWhiteSpace!
+ return self.gsub!(/\A\s*/m, '').gsub!(/\s*\Z/m,'')
+ end
+
+ # Convert a text in inputenc to a text in UTF8
+ # must take care of wrong input locales
+ def toUTF8(inputenc)
+ if inputenc.downcase! != 'utf-8'
+ # it is said it is not UTF-8. Ensure it is REALLY not UTF-8
+ begin
+ if self.unpack('U*').pack('U*') == self
+ return self
+ end
+ rescue
+ # do nothing
+ end
+ begin
+ return self.unpack('C*').pack('U*')
+ rescue
+ return self #failsafe solution. but a dirty one :-)
+ end
+ else
+ return self
+ end
+ end
+end