diff options
author | lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972> | 2006-01-02 20:08:52 +0000 |
---|---|---|
committer | lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972> | 2006-01-02 20:08:52 +0000 |
commit | c8a7a285dc83f05cbbd8b935fcf1d9c780f77ced (patch) | |
tree | 0f2f6a59c2985aaf72173f5e947ef4caa739ff5b /lib/feed2imap/channel.rb | |
parent | cab3234d550ce0d76c53a22be3376571654b4e09 (diff) | |
download | feed2imap-c8a7a285dc83f05cbbd8b935fcf1d9c780f77ced.tar.gz feed2imap-c8a7a285dc83f05cbbd8b935fcf1d9c780f77ced.tar.bz2 feed2imap-c8a7a285dc83f05cbbd8b935fcf1d9c780f77ced.zip |
Now uses ruby-feedparser for feed parsing
git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@73 f70e237a-67f3-0310-a06c-d2b8a7116972
Diffstat (limited to 'lib/feed2imap/channel.rb')
-rw-r--r-- | lib/feed2imap/channel.rb | 334 |
1 files changed, 0 insertions, 334 deletions
diff --git a/lib/feed2imap/channel.rb b/lib/feed2imap/channel.rb deleted file mode 100644 index c43c254..0000000 --- a/lib/feed2imap/channel.rb +++ /dev/null @@ -1,334 +0,0 @@ -=begin -Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server -Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -=end - -# This class allows to retrieve a feed and parse it into a Channel - -require 'rexml/document' -require 'time' -require 'rmail' -require 'feed2imap/textconverters' -require 'feed2imap/rubymail_patch' -require 'feed2imap/rexml_patch' -require 'base64' - -class UnknownFeedTypeException < RuntimeError -end -# an RSS/Atom channel -class Channel - attr_reader :title, :link, :description, :creator, :encoding, :items - - # parse str to build a channel - def initialize(str = nil) - parse_str(str) if str - end - - # Determines all the fields using a string containing an - # XML document - def parse_str(str) - # Dirty hack: some feeds contain the & char. It must be changed to & - str.gsub!(/&(\s+)/, '&\1') - doc = REXML::Document.new(str) - # get channel info - @encoding = doc.encoding - @title,@link,@description,@creator = nil - @items = [] - if doc.root.elements['channel'] || doc.root.elements['rss:channel'] - # We have a RSS feed! - # Title - if (e = doc.root.elements['channel/title'] || - doc.root.elements['rss:channel/rss:title']) && e.text - @title = e.text.toUTF8(@encoding).rmWhiteSpace! - end - # Link - if (e = doc.root.elements['channel/link'] || - doc.root.elements['rss:channel/rss:link']) && e.text - @link = e.text.rmWhiteSpace! - end - # Description - if (e = doc.root.elements['channel/description'] || - doc.root.elements['rss:channel/rss:description']) && e.text - @description = e.text.toUTF8(@encoding).rmWhiteSpace! - end - # Creator - if ((e = doc.root.elements['channel/dc:creator']) && e.text) || - ((e = doc.root.elements['channel/author'] || - doc.root.elements['rss:channel/rss:author']) && e.text) - @creator = e.text.toUTF8(@encoding).rmWhiteSpace! - end - # Items - if doc.root.elements['channel/item'] - query = 'channel/item' - elsif doc.root.elements['item'] - query = 'item' - elsif doc.root.elements['rss:channel/rss:item'] - query = 'rss:channel/rss:item' - else - query = 'rss:item' - end - doc.root.each_element(query) { |e| @items << Item::new(e, self) } - - elsif doc.root.elements['/feed'] - # We have an ATOM feed! - # Title - if (e = doc.root.elements['/feed/title']) && e.text - @title = e.text.toUTF8(@encoding).rmWhiteSpace! - end - # Link - doc.root.each_element('/feed/link') do |e| - if e.attribute('type') and ( - e.attribute('type').value == 'text/html' or - e.attribute('type').value == 'application/xhtml' or - e.attribute('type').value == 'application/xhtml+xml') - if (h = e.attribute('href')) && h - @link = h.value.rmWhiteSpace! - end - end - end - # Description - if e = doc.root.elements['/feed/info'] - @description = e.elements.to_s.toUTF8(@encoding).rmWhiteSpace! - end - # Items - doc.root.each_element('/feed/entry') do |e| - @items << AtomItem::new(e, self) - end - else - raise UnknownFeedTypeException::new - end - end - - def to_s - s = "Title: #{@title}\nLink: #{@link}\n\n" - @items.each { |i| s += i.to_s } - s - end -end - -# an Item from a channel -class Item - attr_accessor :title, :link, :content, :date, :creator, :subject, - :category, :cacheditem - attr_reader :channel - def initialize(item = nil, channel = nil) - @channel = channel - @title, @link, @content, @date, @creator, @subject, @category = nil - if item - # Title - if ((e = item.elements['title'] || item.elements['rss:title']) && - e.text) || - ((e = item.elements['pubDate'] || item.elements['rss:pubDate']) && - e.text) - @title = e.text.toUTF8(@channel.encoding).rmWhiteSpace! - end - # Link - if ((e = item.elements['link'] || item.elements['rss:link']) && e.text)|| - (e = item.elements['guid'] || item.elements['rss:guid'] and - not (e.attribute('isPermaLink') and - e.attribute('isPermaLink').value == 'false')) - @link = e.text.rmWhiteSpace! - end - # Content - if (e = item.elements['content:encoded']) || - (e = item.elements['description'] || item.elements['rss:description']) - if e.children.length > 1 - s = '' - e.children.each { |c| s += c.to_s } - @content = s.toUTF8(@channel.encoding).rmWhiteSpace!.text2html - elsif e.children.length == 1 - if e.cdatas[0] - @content = e.cdatas[0].to_s.toUTF8(@channel.encoding).rmWhiteSpace! - elsif e.text - @content = e.text.toUTF8(@channel.encoding).text2html - end - end - end - # Date - if e = item.elements['dc:date'] || item.elements['pubDate'] || - item.elements['rss:pubDate'] - begin - @date = Time::xmlschema(e.text) - rescue - begin - @date = Time::rfc2822(e.text) - rescue - begin - @date = Time::parse(e.text) - rescue - @date = nil - end - end - end - end - # Creator - @creator = @channel.creator - if (e = item.elements['dc:creator'] || item.elements['author'] || - item.elements['rss:author']) && e.text - @creator = e.text.toUTF8(@channel.encoding).rmWhiteSpace! - end - # Subject - if (e = item.elements['dc:subject']) && e.text - @subject = e.text.toUTF8(@channel.encoding).rmWhiteSpace! - end - # Category - if (e = item.elements['dc:category'] || item.elements['category'] || - item.elements['rss:category']) && e.text - @category = e.text.toUTF8(@channel.encoding).rmWhiteSpace! - end - end - end - - def to_s - "--------------------------------\n" + - "Title: #{@title}\nLink: #{@link}\n" + - "Date: #{@date.to_s}\nCreator: #{@creator}\n" + - "Subject: #{@subject}\nCategory: #{@category}\nContent:\n#{content}\n" - end - - def to_text - s = "" - s += "Channel: " - s += @channel.title.toISO_8859_1('utf-8') + ' ' if @channel.title - s += "<#{@channel.link.toISO_8859_1('utf-8')}>" if @channel.link - s += "\n" - s += "Item: " - s += @title.toISO_8859_1('utf-8') + ' ' if @title - s += "<#{@link.toISO_8859_1('utf-8')}>" if @link - s += "\n" - s += "\nDate: #{@date.to_s.toISO_8859_1('utf-8')}" if @date # TODO improve date rendering ? - s += "\nAuthor: #{@creator.toISO_8859_1('utf-8')}" if @creator - - s += "\nSubject: #{@subject.toISO_8859_1('utf-8')}" if @subject - s += "\nCategory: #{@category.toISO_8859_1('utf-8')}" if @category - s += "\n\n" - s += "#{@content.html2text.toISO_8859_1('utf-8')}" if @content - s - end - - def to_html - s = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">' - s += '<html>' - s += '<body>' - s += "<p>Channel: " - s += "<a href=\"#{@channel.link}\">" if @channel.link - s += @channel.title if @channel.title - s += "</a>" if @channel.link - s += "<br/>\nItem: " - s += "<a href=\"#{@link}\">" if @link - s += @title if @title - s += "</a>" if @link - s += "\n" - s += "<br/>Date: #{@date.to_s}" if @date # TODO improve date rendering ? - s += "<br/>Author: #{@creator}" if @creator - s += "<br/>Subject: #{@subject}" if @subject - s += "<br/>Category: #{@category}" if @category - s += "</p>" - s += "<p>#{@content}</p>" if @content - s += '</body></html>' - s - end - - def to_mail(from = 'Feed2Imap') - message = RMail::Message::new - message.header['From'] = "#{from} <feed2imap@feed2imap.net>" - message.header['To'] = "#{from} <feed2imap@feed2imap.net>" - if @date.nil? - message.header['Date'] = Time::new.rfc2822 - else - message.header['Date'] = @date.rfc2822 - end - message.header['X-Feed2Imap-Version'] = F2I_VERSION if defined?(F2I_VERSION) - message.header['X-CacheIndex'] = "-#{@cacheditem.index}-" - message.header['X-F2IStatus'] = "Updated" if @cacheditem.updated - # treat subject. Might need MIME encoding. - subj = @title or (@date and @date.to_s) or @link - if subj - if subj.needMIME - message.header['Subject'] = "=?utf-8?b?#{Base64::encode64(subj).gsub("\n",'')}?=" - else - message.header['Subject'] = subj - end - end - textpart = RMail::Message::new - textpart.header['Content-Type'] = 'text/plain; charset=iso-8859-1' - textpart.header['Content-Transfer-Encoding'] = '7bit' - textpart.body = to_text - htmlpart = RMail::Message::new - htmlpart.header['Content-Type'] = 'text/html; charset=utf-8' - htmlpart.header['Content-Transfer-Encoding'] = '7bit' - htmlpart.body = to_html - message.add_part(textpart) - message.add_part(htmlpart) - return message.to_s - end -end - -class AtomItem < Item - def initialize(item = nil, channel = nil) - @channel = channel - @title, @link, @content, @date, @creator, @subject, @category = nil - if item - # Title - if (e = item.elements['title']) && e.text - @title = e.text.toUTF8(@channel.encoding).rmWhiteSpace! - end - # Link - item.each_element('link') do |e| - if e.attribute('type').value == 'text/html' or - e.attribute('type').value == 'application/xhtml' or - e.attribute('type').value == 'application/xhtml+xml' - if (h = e.attribute('href')) && h.value - @link = h.value - end - end - end - # Content - if e = item.elements['content'] || item.elements['summary'] - if (e.attribute('mode') and e.attribute('mode').value == 'escaped') && - e.text - @content = e.text.toUTF8(@channel.encoding).rmWhiteSpace! - else - # go one step deeper in the recursion if possible - e = e.elements['div'] || e - @content = e.to_s.toUTF8(@channel.encoding).rmWhiteSpace! - end - end - # Date - if (e = item.elements['issued'] || e = item.elements['created']) && e.text - begin - @date = Time::xmlschema(e.text) - rescue - begin - @date = Time::rfc2822(e.text) - rescue - begin - @date = Time::parse(e.text) - rescue - @date = nil - end - end - end - end - # Creator - @creator = @channel.creator - if (e = item.elements['author/name']) && e.text - @creator = e.text.toUTF8(@channel.encoding).rmWhiteSpace! - end - end - end -end |