From 16ec9aba7e94e628f22bcaeb3ecdd7916f3a3df5 Mon Sep 17 00:00:00 2001
From: lnu /) || (self =~ / \1
/) || (self =~ /
/)
+ end
+
+ # convert text to HTML
+ def text2html
+ text = self.clone
+ return text if text.html?
+ # paragraphs
+ text.gsub!(/\A\s*(.*)\Z/m, '
") + # uris + text.gsub!(/(#{URI::regexp(['http','ftp','https'])})/, + '\1') + text + end + + # Convert an HTML text to plain text + def html2text + text = self.clone + # let's remove all CR + text.gsub!(/\n/, '') + # convert
and
+ text.gsub!(/\s*<\/p>\s*/, '')
+ text.gsub!(/\s*
]*)?>\s*/, "\n\n")
+ text.gsub!(/\s*
\s*/, "\n")
+ # remove other tags
+ text.gsub!(/<[^>]*>/, '')
+ # remove leading and trailing whilespace
+ text.gsub!(/\A\s*/m, '')
+ text.gsub!(/\s*\Z/m, '')
+ text
+ end
+
+ # Remove white space around the text
+ def rmWhiteSpace!
+ return self.gsub!(/\A\s*/m, '').gsub!(/\s*\Z/m,'')
+ end
+
+ # Convert a text in inputenc to a text in UTF8
+ # must take care of wrong input locales
+ def toUTF8(inputenc)
+ if inputenc.downcase! != 'utf-8'
+ # it is said it is not UTF-8. Ensure it is REALLY not UTF-8
+ begin
+ if self.unpack('U*').pack('U*') == self
+ return self
+ end
+ rescue
+ # do nothing
+ end
+ begin
+ return self.unpack('C*').pack('U*')
+ rescue
+ return self #failsafe solution. but a dirty one :-)
+ end
+ else
+ return self
+ end
+ end
+end
--
cgit v1.2.3-54-g00ecf