summaryrefslogtreecommitdiff
path: root/.vim/autoload/omni/cpp/tokenizer.vim
diff options
context:
space:
mode:
authorRené 'Necoro' Neumann <necoro@necoro.net>2009-10-07 17:05:19 +0200
committerRené 'Necoro' Neumann <necoro@necoro.net>2009-10-07 17:05:19 +0200
commitdd5427baaf49f8de4355abeb6bc8c6dd14f74e25 (patch)
tree46fcfc70bd792e80ceebaab89a7f8fc06bc29101 /.vim/autoload/omni/cpp/tokenizer.vim
downloaddotfiles-dd5427baaf49f8de4355abeb6bc8c6dd14f74e25.tar.gz
dotfiles-dd5427baaf49f8de4355abeb6bc8c6dd14f74e25.tar.bz2
dotfiles-dd5427baaf49f8de4355abeb6bc8c6dd14f74e25.zip
Initial check-in of files
Diffstat (limited to '')
-rw-r--r--.vim/autoload/omni/cpp/tokenizer.vim93
1 files changed, 93 insertions, 0 deletions
diff --git a/.vim/autoload/omni/cpp/tokenizer.vim b/.vim/autoload/omni/cpp/tokenizer.vim
new file mode 100644
index 0000000..16e0be2
--- /dev/null
+++ b/.vim/autoload/omni/cpp/tokenizer.vim
@@ -0,0 +1,93 @@
+" Description: Omni completion tokenizer
+" Maintainer: Vissale NEANG
+" Last Change: 26 sept. 2007
+" TODO: Generic behaviour for Tokenize()
+
+" From the C++ BNF
+let s:cppKeyword = ['asm', 'auto', 'bool', 'break', 'case', 'catch', 'char', 'class', 'const', 'const_cast', 'continue', 'default', 'delete', 'do', 'double', 'dynamic_cast', 'else', 'enum', 'explicit', 'export', 'extern', 'false', 'float', 'for', 'friend', 'goto', 'if', 'inline', 'int', 'long', 'mutable', 'namespace', 'new', 'operator', 'private', 'protected', 'public', 'register', 'reinterpret_cast', 'return', 'short', 'signed', 'sizeof', 'static', 'static_cast', 'struct', 'switch', 'template', 'this', 'throw', 'true', 'try', 'typedef', 'typeid', 'typename', 'union', 'unsigned', 'using', 'virtual', 'void', 'volatile', 'wchar_t', 'while', 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq', 'xor', 'xor_eq']
+
+let s:reCppKeyword = '\C\<'.join(s:cppKeyword, '\>\|\<').'\>'
+
+" The order of items in this list is very important because we use this list to build a regular
+" expression (see below) for tokenization
+let s:cppOperatorPunctuator = ['->*', '->', '--', '-=', '-', '!=', '!', '##', '#', '%:%:', '%=', '%>', '%:', '%', '&&', '&=', '&', '(', ')', '*=', '*', ',', '...', '.*', '.', '/=', '/', '::', ':>', ':', ';', '?', '[', ']', '^=', '^', '{', '||', '|=', '|', '}', '~', '++', '+=', '+', '<<=', '<%', '<:', '<<', '<=', '<', '==', '=', '>>=', '>>', '>=', '>']
+
+" We build the regexp for the tokenizer
+let s:reCComment = '\/\*\|\*\/'
+let s:reCppComment = '\/\/'
+let s:reComment = s:reCComment.'\|'.s:reCppComment
+let s:reCppOperatorOrPunctuator = escape(join(s:cppOperatorPunctuator, '\|'), '*./^~[]')
+
+
+" Tokenize a c++ code
+" a token is dictionary where keys are:
+" - kind = cppKeyword|cppWord|cppOperatorPunctuator|unknown|cComment|cppComment|cppDigit
+" - value = 'something'
+" Note: a cppWord is any word that is not a cpp keyword
+function! omni#cpp#tokenizer#Tokenize(szCode)
+ let result = []
+
+ " The regexp to find a token, a token is a keyword, word or
+ " c++ operator or punctuator. To work properly we have to put
+ " spaces and tabs to our regexp.
+ let reTokenSearch = '\(\w\+\)\|\s\+\|'.s:reComment.'\|'.s:reCppOperatorOrPunctuator
+ " eg: 'using namespace std;'
+ " ^ ^
+ " start=0 end=5
+ let startPos = 0
+ let endPos = matchend(a:szCode, reTokenSearch)
+ let len = endPos-startPos
+ while endPos!=-1
+ " eg: 'using namespace std;'
+ " ^ ^
+ " start=0 end=5
+ " token = 'using'
+ " We also remove space and tabs
+ let token = substitute(strpart(a:szCode, startPos, len), '\s', '', 'g')
+
+ " eg: 'using namespace std;'
+ " ^ ^
+ " start=5 end=15
+ let startPos = endPos
+ let endPos = matchend(a:szCode, reTokenSearch, startPos)
+ let len = endPos-startPos
+
+ " It the token is empty we continue
+ if token==''
+ continue
+ endif
+
+ " Building the token
+ let resultToken = {'kind' : 'unknown', 'value' : token}
+
+ " Classify the token
+ if token =~ '^\d\+'
+ " It's a digit
+ let resultToken.kind = 'cppDigit'
+ elseif token=~'^\w\+$'
+ " It's a word
+ let resultToken.kind = 'cppWord'
+
+ " But maybe it's a c++ keyword
+ if match(token, s:reCppKeyword)>=0
+ let resultToken.kind = 'cppKeyword'
+ endif
+ else
+ if match(token, s:reComment)>=0
+ if index(['/*','*/'],token)>=0
+ let resultToken.kind = 'cComment'
+ else
+ let resultToken.kind = 'cppComment'
+ endif
+ else
+ " It's an operator
+ let resultToken.kind = 'cppOperatorPunctuator'
+ endif
+ endif
+
+ " We have our token, let's add it to the result list
+ call extend(result, [resultToken])
+ endwhile
+
+ return result
+endfunc