first import

git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@5 f70e237a-67f3-0310-a06c-d2b8a7116972
author: lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972> 2005-03-31 22:08:32 +0000
committer: lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972> 2005-03-31 22:08:32 +0000
commit: 16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5 (patch)
tree: fcee2e08574f55e141eeea3cb2747a4a80c04d89 /lib/feed2imap/textconverters.rb
parent: 94c2f3339fbe18700fcc057367784d04bb2a76d9 (diff)
download: feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.gz
feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.bz2
feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.zip
1 files changed, 85 insertions, 0 deletions
diff --git a/lib/feed2imap/textconverters.rb b/lib/feed2imap/textconverters.rb
new file mode 100644
index 0000000..ba49193
--- /dev/null
+++ b/lib/feed2imap/textconverters.rb
@@ -0,0 +1,85 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+
+require 'uri' # for URI::regexp
+
+# This class provides various converters
+class String
+  # is this text HTML ? search for tags
+  def html?
+    return (self =~ /<p>/) || (self =~ /<br>/) || (self =~ /<br\s*(\/)?\s*>/)
+  end
+
+  # convert text to HTML
+  def text2html
+    text = self.clone
+    return text if text.html?
+    # paragraphs
+    text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
+    text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")
+    # uris
+    text.gsub!(/(#{URI::regexp(['http','ftp','https'])})/,
+        '<a href="\1">\1</a>')
+    text
+  end
+
+  # Convert an HTML text to plain text
+  def html2text
+    text = self.clone
+    # let's remove all CR
+    text.gsub!(/\n/, '')
+    # convert <p> and <br>
+    text.gsub!(/\s*<\/p>\s*/, '')
+    text.gsub!(/\s*<p(\s[^>]*)?>\s*/, "\n\n")
+    text.gsub!(/\s*<br(\s*)\/?(\s*)>\s*/, "\n")
+    # remove other tags
+    text.gsub!(/<[^>]*>/, '')
+    # remove leading and trailing whilespace
+    text.gsub!(/\A\s*/m, '')
+    text.gsub!(/\s*\Z/m, '')
+    text
+  end
+
+  # Remove white space around the text
+  def rmWhiteSpace!
+    return self.gsub!(/\A\s*/m, '').gsub!(/\s*\Z/m,'')
+  end
+
+  # Convert a text in inputenc to a text in UTF8
+  # must take care of wrong input locales
+  def toUTF8(inputenc)
+    if inputenc.downcase! != 'utf-8'
+      # it is said it is not UTF-8. Ensure it is REALLY not UTF-8
+      begin
+        if self.unpack('U*').pack('U*') == self
+          return self
+        end
+      rescue
+        # do nothing
+      end
+      begin
+        return self.unpack('C*').pack('U*')
+      rescue
+        return self #failsafe solution. but a dirty one :-)
+      end
+    else
+      return self
+    end
+  end
+end
author	lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>	2005-03-31 22:08:32 +0000
committer	lnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>	2005-03-31 22:08:32 +0000
commit	16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5 (patch)
tree	fcee2e08574f55e141eeea3cb2747a4a80c04d89 /lib/feed2imap/textconverters.rb
parent	94c2f3339fbe18700fcc057367784d04bb2a76d9 (diff)
download	feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.gz feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.bz2 feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.zip