summaryrefslogtreecommitdiff
path: root/lib/feed2imap/textconverters.rb
diff options
context:
space:
mode:
authorlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2005-03-31 22:08:32 +0000
committerlnu <lnu@f70e237a-67f3-0310-a06c-d2b8a7116972>2005-03-31 22:08:32 +0000
commit16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5 (patch)
treefcee2e08574f55e141eeea3cb2747a4a80c04d89 /lib/feed2imap/textconverters.rb
parent94c2f3339fbe18700fcc057367784d04bb2a76d9 (diff)
downloadfeed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.gz
feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.tar.bz2
feed2imap-16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5.zip
first import
git-svn-id: svn+ssh://svn.gna.org/svn/feed2imap/trunk/feed2imap@5 f70e237a-67f3-0310-a06c-d2b8a7116972
Diffstat (limited to '')
-rw-r--r--lib/feed2imap/textconverters.rb85
1 files changed, 85 insertions, 0 deletions
diff --git a/lib/feed2imap/textconverters.rb b/lib/feed2imap/textconverters.rb
new file mode 100644
index 0000000..ba49193
--- /dev/null
+++ b/lib/feed2imap/textconverters.rb
@@ -0,0 +1,85 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+
+require 'uri' # for URI::regexp
+
+# This class provides various converters
+class String
+ # is this text HTML ? search for tags
+ def html?
+ return (self =~ /<p>/) || (self =~ /<br>/) || (self =~ /<br\s*(\/)?\s*>/)
+ end
+
+ # convert text to HTML
+ def text2html
+ text = self.clone
+ return text if text.html?
+ # paragraphs
+ text.gsub!(/\A\s*(.*)\Z/m, '<p>\1</p>')
+ text.gsub!(/\s*\n(\s*\n)+\s*/, "</p>\n<p>")
+ # uris
+ text.gsub!(/(#{URI::regexp(['http','ftp','https'])})/,
+ '<a href="\1">\1</a>')
+ text
+ end
+
+ # Convert an HTML text to plain text
+ def html2text
+ text = self.clone
+ # let's remove all CR
+ text.gsub!(/\n/, '')
+ # convert <p> and <br>
+ text.gsub!(/\s*<\/p>\s*/, '')
+ text.gsub!(/\s*<p(\s[^>]*)?>\s*/, "\n\n")
+ text.gsub!(/\s*<br(\s*)\/?(\s*)>\s*/, "\n")
+ # remove other tags
+ text.gsub!(/<[^>]*>/, '')
+ # remove leading and trailing whilespace
+ text.gsub!(/\A\s*/m, '')
+ text.gsub!(/\s*\Z/m, '')
+ text
+ end
+
+ # Remove white space around the text
+ def rmWhiteSpace!
+ return self.gsub!(/\A\s*/m, '').gsub!(/\s*\Z/m,'')
+ end
+
+ # Convert a text in inputenc to a text in UTF8
+ # must take care of wrong input locales
+ def toUTF8(inputenc)
+ if inputenc.downcase! != 'utf-8'
+ # it is said it is not UTF-8. Ensure it is REALLY not UTF-8
+ begin
+ if self.unpack('U*').pack('U*') == self
+ return self
+ end
+ rescue
+ # do nothing
+ end
+ begin
+ return self.unpack('C*').pack('U*')
+ rescue
+ return self #failsafe solution. but a dirty one :-)
+ end
+ else
+ return self
+ end
+ end
+end