java - ANTLR doesn't give correct output tokens for Scala Grammar -
blockquote
i new scala , trying parse scala files use of scala grammar , antlr. below code scala grammar got git hub link:
https://github.com/antlr/grammars-v4/tree/master/scala
there chances of repo moved pasting scala grammar code here:
grammar scala; literal : '-'? integerliteral | '-'? floatingpointliteral | booleanliteral | characterliteral | stringliteral | symbolliteral | 'null' ; qualid : id ('.' id)* ; ids : id (',' id)* ; stableid : (id | (id '.')? 'this') '.' id | (id '.')? 'super' classqualifier? '.' id ; classqualifier : '[' id ']' ; type : functionargtypes '=>' type | infixtype existentialclause? ; functionargtypes : infixtype | '(' ( paramtype (',' paramtype )* )? ')' ; existentialclause : 'forsome' '{' existentialdcl (semi existentialdcl)* '}'; existentialdcl : 'type' typedcl | 'val' valdcl; infixtype : compoundtype (id nl? compoundtype)*; compoundtype : annottype ('with' annottype)* refinement? | refinement; annottype : simpletype annotation*; simpletype : simpletype typeargs | simpletype '#' id | stableid | (stableid | (id '.')? 'this') '.' 'type' | '(' types ')'; typeargs : '[' types ']'; types : type (',' type)*; refinement : nl? '{' refinestat (semi refinestat)* '}'; refinestat : dcl | 'type' typedef | ; typepat : type; ascription : ':' infixtype | ':' annotation+ | ':' '_' '*'; expr : (bindings | 'implicit'? id | '_') '=>' expr | expr1 ; expr1 : 'if' '(' expr ')' nl* expr (semi? 'else' expr)? | 'while' '(' expr ')' nl* expr | 'try' ('{' block '}' | expr) ('catch' '{' caseclauses '}')? ('finally' expr)? | 'do' expr semi? 'while' '(' expr ')' | 'for' ('(' enumerators ')' | '{' enumerators '}') nl* 'yield'? expr | 'throw' expr | 'return' expr? | (('new' (classtemplate | templatebody)| blockexpr | simpleexpr1 '_'?) '.') id '=' expr | simpleexpr1 argumentexprs '=' expr | postfixexpr | postfixexpr ascription | postfixexpr 'match' '{' caseclauses '}' ; postfixexpr : infixexpr (id nl?)? ; infixexpr : prefixexpr | infixexpr id nl? infixexpr ; prefixexpr : ('-' | '+' | '~' | '!')? ('new' (classtemplate | templatebody)| blockexpr | simpleexpr1 '_'?) ; simpleexpr1 : literal | stableid | (id '.')? 'this' | '_' | '(' exprs? ')' | ('new' (classtemplate | templatebody) | blockexpr ) '.' id | ('new' (classtemplate | templatebody) | blockexpr ) typeargs | simpleexpr1 argumentexprs ; exprs : expr (',' expr)* ; argumentexprs : '(' exprs? ')' | '(' (exprs ',')? postfixexpr ':' '_' '*' ')' | nl? blockexpr ; blockexpr : '{' caseclauses '}' | '{' block '}' ; block : blockstat (semi blockstat)* resultexpr? ; blockstat : import_ | annotation* ('implicit' | 'lazy')? def | annotation* localmodifier* tmpldef | expr1 | ; resultexpr : expr1 | (bindings | ('implicit'? id | '_') ':' compoundtype) '=>' block ; enumerators : generator (semi generator)* ; generator : pattern1 '<-' expr (semi? guard | semi pattern1 '=' expr)* ; caseclauses : caseclause+ ; caseclause : 'case' pattern guard? '=>' block ; guard : 'if' postfixexpr ; pattern : pattern1 ('|' pattern1 )* ; pattern1 : varid ':' typepat | '_' ':' typepat | pattern2 ; pattern2 : varid ('@' pattern3)? | pattern3 ; pattern3 : simplepattern | simplepattern (id nl? simplepattern)* ; simplepattern : '_' | varid | literal | stableid ('(' patterns ')')? | stableid '(' (patterns ',')? (varid '@')? '_' '*' ')' | '(' patterns? ')' ; patterns : pattern (',' patterns)* | '_' * ; typeparamclause : '[' varianttypeparam (',' varianttypeparam)* ']' ; funtypeparamclause: '[' typeparam (',' typeparam)* ']' ; varianttypeparam : annotation? ('+' | '-')? typeparam ; typeparam : (id | '_') typeparamclause? ('>:' type)? ('<:' type)? ('<%' type)* (':' type)* ; paramclauses : paramclause* (nl? '(' 'implicit' params ')')? ; paramclause : nl? '(' params? ')' ; params : param (',' param)* ; param : annotation* id (':' paramtype)? ('=' expr)? ; paramtype : type | '=>' type | type '*'; classparamclauses : classparamclause* (nl? '(' 'implicit' classparams ')')? ; classparamclause : nl? '(' classparams? ')' ; classparams : classparam (',' classparam)* ; classparam : annotation* modifier* ('val' | 'var')? id ':' paramtype ('=' expr)? ; bindings : '(' binding (',' binding )* ')' ; binding : (id | '_') (':' type)? ; modifier : localmodifier | accessmodifier | 'override' ; localmodifier : 'abstract' | 'final' | 'sealed' | 'implicit' | 'lazy' ; accessmodifier : ('private' | 'protected') accessqualifier? ; accessqualifier : '[' (id | 'this') ']' ; annotation : '@' simpletype argumentexprs* ; constrannotation : '@' simpletype argumentexprs ; templatebody : nl? '{' selftype? templatestat (semi templatestat)* '}' ; templatestat : import_ | (annotation nl?)* modifier* def | (annotation nl?)* modifier* dcl | expr | ; selftype : id (':' type)? '=>' | 'this' ':' type '=>' ; import_ : 'import' importexpr (',' importexpr)* ; importexpr : stableid '.' (id | '_' | importselectors) ; importselectors : '{' (importselector ',')* (importselector | '_') '}' ; importselector : id ('=>' id | '=>' '_') ; dcl : 'val' valdcl | 'var' vardcl | 'def' fundcl | 'type' nl* typedcl ; valdcl : ids ':' type ; vardcl : ids ':' type ; fundcl : funsig (':' type)? ; funsig : id funtypeparamclause? paramclauses ; typedcl : id typeparamclause? ('>:' type)? ('<:' type)? ; patvardef : 'val' patdef | 'var' vardef ; def : patvardef | 'def' fundef | 'type' nl* typedef | tmpldef ; patdef : pattern2 (',' pattern2)* (':' type)* '=' expr ; vardef : patdef | ids ':' type '=' '_' ; fundef : funsig (':' type)? '=' expr | funsig nl? '{' block '}' | 'this' paramclause paramclauses ('=' constrexpr | nl constrblock) ; typedef : id typeparamclause? '=' type ; tmpldef : 'case'? 'class' classdef | 'case' 'object' objectdef | 'trait' traitdef ; classdef : id typeparamclause? constrannotation* accessmodifier? classparamclauses classtemplateopt ; traitdef : id typeparamclause? traittemplateopt ; objectdef : id classtemplateopt ; classtemplateopt : 'extends' classtemplate | ('extends'? templatebody)? ; traittemplateopt : 'extends' traittemplate | ('extends'? templatebody)? ; classtemplate : earlydefs? classparents templatebody? ; traittemplate : earlydefs? traitparents templatebody? ; classparents : constr ('with' annottype)* ; traitparents : annottype ('with' annottype)* ; constr : annottype argumentexprs* ; earlydefs : '{' (earlydef (semi earlydef)*)? '}' 'with' ; earlydef : (annotation nl?)* modifier* patvardef ; constrexpr : selfinvocation | constrblock ; constrblock : '{' selfinvocation (semi blockstat)* '}' ; selfinvocation : 'this' argumentexprs+ ; topstatseq : topstat (semi topstat)* ; topstat : (annotation nl?)* modifier* tmpldef | import_ | packaging | packageobject | ; packaging : 'package' qualid nl? '{' topstatseq '}' ; packageobject : 'package' 'object' objectdef ; compilationunit : ('package' qualid semi)* topstatseq ; // lexer booleanliteral : 'true' | 'false'; characterliteral : '\'' (printablechar | charescapeseq) '\''; stringliteral : '"' stringelement* '"' | '"""' multilinechars '"""'; symbolliteral : '\'' plainid; integerliteral : (decimalnumeral | hexnumeral) ('l' | 'l'); floatingpointliteral : digit+ '.' digit+ exponentpart? floattype? | '.' digit+ exponentpart? floattype? | digit exponentpart floattype? | digit+ exponentpart? floattype; id : plainid | '`' stringliteral '`'; varid : lower idrest; nl : '\r'? '\n'; semi : ';' | nl+; paren : '(' | ')' | '[' | ']' | '{' | '}'; delim : '`' | '\'' | '"' | '.' | ';' | ',' ; comment : '/*' .*? '*/' | '//' .*? nl; // fragments fragment unicodeescape : '\\' 'u' 'u'? hexdigit hexdigit hexdigit hexdigit ; fragment whitespace : '\u0020' | '\u0009' | '\u000d' | '\u000a'; fragment opchar : printablechar // printablechar not matched (whitespace | upper | lower | // letter | digit | paren | delim | opchar | unicode_sm | unicode_so) ; fragment op : opchar+; fragment plainid : upper idrest | varid | op; fragment idrest : (letter | digit)* ('_' op)?; fragment stringelement : '\u0020'| '\u0021'|'\u0023' .. '\u007f' // (printablechar except '"') | charescapeseq; fragment multilinechars : ('"'? '"'? .*?)* '"'*; fragment hexdigit : '0' .. '9' | 'a' .. 'z' | 'a' .. 'z' ; fragment floattype : 'f' | 'f' | 'd' | 'd'; fragment upper : 'a' .. 'z' | '$' | '_'; // , unicode category lu fragment lower : 'a' .. 'z'; // , unicode category ll fragment letter : upper | lower; // , unicode categories lo, lt, nl fragment exponentpart : ('e' | 'e') ('+' | '-')? digit+; fragment printablechar : '\u0020' .. '\u007f' ; fragment charescapeseq : '\\' ('b' | 't' | 'n' | 'f' | 'r' | '"' | '\'' | '\\'); fragment decimalnumeral : '0' | nonzerodigit digit*; fragment hexnumeral : '0' 'x' hexdigit hexdigit+; fragment digit : '0' | nonzerodigit; fragment nonzerodigit : '1' .. '9';
the above scala grammar same got scala official website:
http://www.scala-lang.org/files/archive/spec/2.11/13-syntax-summary.html
now trying generate tokens scala file named scala.scala. code file below :
object helloworld { def main(args: array[string]) { println("hello, world!") } }
i running following command tokens :
grun scala compilationunit -tokens scala.scala
or
grun scala expr -tokens scala.scala
or
grun scala literal -tokens scala.scala
the output got is:
[@0,0:18='object helloworld {',<68>,1:0] [@1,19:19='\n',<70>,1:19] [@2,20:52=' def main(args: array[string]) {',<68>,2:0] [@3,53:53='\n',<70>,2:33] [@4,54:81=' println("hello, world!")',<68>,3:0] [@5,82:82='\n',<70>,3:28] [@6,83:85=' }',<68>,4:0] [@7,86:86='\n',<70>,4:3] [@8,87:87='}',<14>,5:0] [@9,88:88='\n',<70>,5:1] [@10,89:88='<eof>',<-1>,6:0] line 1:19 no viable alternative @ input 'object helloworld {\n'
output in tree form :
(expr object helloworld { \n def main(args: array[string]) { \n println("hello, world!") \n } \n } \n)
and output in gui :
that stupid. :( in place of tokens it's giving me loc . tested other languages java , c , works perfect. gives me correct output/correct tokens expected following grammar links:
https://github.com/antlr/grammars-v4
please , please correct me if doing wrong because new antlr , scala.
what meant token keywords,operands , operators there.according me it's never meant loc(lines of code :() cheers !!
below scala.tokens file got using scala.g4(scala grammar antlr). t__0=1 t__1=2 t__2=3 t__3=4 t__4=5 t__5=6 t__6=7 t__7=8 t__8=9 t__9=10 t__10=11 t__11=12 t__12=13 t__13=14 t__14=15 t__15=16 t__16=17 t__17=18 t__18=19 t__19=20 t__20=21 t__21=22 t__22=23 t__23=24 t__24=25 t__25=26 t__26=27 t__27=28 t__28=29 t__29=30 t__30=31 t__31=32 t__32=33 t__33=34 t__34=35 t__35=36 t__36=37 t__37=38 t__38=39 t__39=40 t__40=41 t__41=42 t__42=43 t__43=44 t__44=45 t__45=46 t__46=47 t__47=48 t__48=49 t__49=50 t__50=51 t__51=52 t__52=53 t__53=54 t__54=55 t__55=56 t__56=57 t__57=58 t__58=59 t__59=60 t__60=61 booleanliteral=62 characterliteral=63 stringliteral=64 symbolliteral=65 integerliteral=66 floatingpointliteral=67 id=68 varid=69 nl=70 semi=71 paren=72 delim=73 comment=74 '-'=1 'null'=2 '.'=3 ','=4 'this'=5 'super'=6 '['=7 ']'=8 '=>'=9 '('=10 ')'=11 'forsome'=12 '{'=13 '}'=14 'type'=15 'val'=16 'with'=17 '#'=18 ':'=19 '_'=20 '*'=21 'implicit'=22 'if'=23 'else'=24 'while'=25 'try'=26 'catch'=27 'finally'=28 'do'=29 'for'=30 'yield'=31 'throw'=32 'return'=33 'new'=34 '='=35 'match'=36 '+'=37 '~'=38 '!'=39 'lazy'=40 '<-'=41 'case'=42 '|'=43 '@'=44 '>:'=45 '<:'=46 '<%'=47 'var'=48 'override'=49 'abstract'=50 'final'=51 'sealed'=52 'private'=53 'protected'=54 'import'=55 'def'=56 'class'=57 'object'=58 'trait'=59 'extends'=60 'package'=61
i sure these tokens not correct. can make sure problem scala gramma or antlr?
Comments
Post a Comment