【问题】
antlr v3,用代码:
grammar preprocess;
//lexer grammar preprocess;
options{
language=Java;
output = AST;
}
@lexer::header {
//package com.mm.antlrv3demo;
import java.io.*;
import java.util.*;
}
@parser::header {
//package com.mm.antlrv3demo;
}
@lexer::members {
//public static TokenStreamSelector selector; // must be assigned externally
protected static Integer ifState = 1; // -1: no-else false, 0:false, 1: true
protected static List ifStates = new ArrayList(); // holds nested if conditions
protected static Map defines = new Hashtable(); // holds the defines
protected Map defineArgs = new Hashtable(); // holds the args for a macro call
/*
public void uponEOF() throws TokenStreamException, CharStreamException {
try {
selector.pop(); // return to old lexer/stream
selector.retry();
} catch (NoSuchElementException e) {
// return a real EOF if nothing in stack
}
}
*/
class SaveStruct {
SaveStruct(CharStream input){
this.input = input;
this.marker = input.mark();
}
public CharStream input;
public int marker;
}
Stack<SaveStruct> includes = new Stack<SaveStruct>();
// class SaveStruct_defines {
// SaveStruct(CharStream input){
// this.input = input;
// this.marker = input.mark();
// }
// public CharStream input;
// public int marker;
// }
// Stack<SaveStruct_defines> definesSaveStruct = new Stack<SaveStruct_defines>();
// We should override this method for handling EOF of included file
public Token nextToken(){
Token token = super.nextToken();
if(token.getType() == Token.EOF && !includes.empty()){
// We've got EOF and have non empty stack.
SaveStruct ss = includes.pop();
setCharStream(ss.input);
input.rewind(ss.marker);
//this should be used instead of super [like below] to handle exits from nested includes
//it matters, when the 'include' token is the last in previous stream (using super, lexer 'crashes' returning EOF token)
token = this.nextToken();
}
// Skip first token after switching on another input.
// You need to use this rather than super as there may be nested include files
if(((CommonToken)token).getStartIndex() < 0)
token = this.nextToken();
return token;
}
}
COMMENT
: ('//' ~('\n'|'\r')* '\r'? '\n') {skip();}
| ('/*' ( options {greedy=false;} : . )* '*/') {skip();}
;
// and lexer rule
INCLUDE : '#include' (WS)? f=STRING
{
String name = f.getText();
name = name.substring(1,name.length()-1);
try {
// save current lexer's state
SaveStruct ss = new SaveStruct(input);
includes.push(ss);
// switch on new input stream
setCharStream(new ANTLRFileStream(name));
reset();
} catch(Exception fnf) { throw new Error("Cannot open file " + name); }
};
/*
fragment
NON_CR_LF : ~('\r'|'\n');
fragment
TAB_SPACE
: (' ' | '\t');
*/
//DIRECTIVE : ('#define' WS* defineMacro=ID WS* defineText=STRING)
//DIRECTIVE : ('#define' WS* defineMacro=ID WS* defineText=( NON_CR_LF+ | (NON_CR_LF* (TAB_SPACE+ '\\' '\r'? '\n' NON_CR_LF+)*) ) )
fragment
//MACRO_TEXT : ( (('\\'){skip();System.out.println("skip line tail back slash");} '\r'? '\n')
//MACRO_TEXT : ( ('\\'{$channel=HIDDEN;System.out.println("set back slash to hidden");} '\r'? '\n')
//MACRO_TEXT : ( (('\\'){setText("");System.out.println("set back slash to empty");} '\r'? '\n')
MACRO_TEXT : (('\\' '\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT : (('\\' '\r'? '\n') | (~('\n')))*;
//MACRO_TEXT : (('\\' '\n') | (~('\n')))*;
//MACRO_TEXT : (('\\' '\n') | (~('\n' | '\r')))*;
//MACRO_TEXT : ( ('\\' '\r'? '\n') | (~('\r'|'\n')))* -> ( ('\r'? '\n') | (~('\r'|'\n')))*;
//MACRO_TEXT : (('\\'{setText("");} '\r'? '\n') | (~('\r'|'\n')))*;
/*
MACRO_TEXT : ((('\\' '\r'? '\n') | (~('\r'|'\n')))*)
{
String origMultiLineStr = getText();
String newMultiLineStr = origMultiLineStr.replace("\\", "");
setText(newMultiLineStr);
};
*/
//MACRO_TEXT : ( (('\\' '\r'? '\n')=>('\r' '\n')) | (~('\r'|'\n')))*;
DIRECTIVE
@init{
List args = new ArrayList();
boolean condition = true;
String arg0Text = "";
String arg1Text = "";
String definedContent = "";
String defineId = "";
}
: ('#define' WS* defineMacro=RAW_IDENTIFIER
{
args.add(""); // first element will hold the macro text
}
(
( '(' // get arguments if you find them (no spaces before left paren)
(WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
')'
| ' '|'\t'|'\f'
)
( options{greedy=true;}: ' '|'\t'|'\f' )*
// store the text verbatim - tokenize when called
macroText=MACRO_TEXT
{
definedContent = macroText.getText();
definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
args.set(0, definedContent);
//process the define content, to check whether it contain the previous define
//if yes, then process it
// save current lexer's state
SaveStruct ss = new SaveStruct(input);
includes.push(ss);
// switch on new input stream
setCharStream(new ANTLRStringStream(definedContent));
reset();
}
)? '\r'? '\n'
{
defineId = defineMacro.getText();
defines.put(defineId, args );
skip();
}
);
IDENTIFIER @init{
List define = new ArrayList();
List foundArgs = new ArrayList();
String callArg0Text = "";
String callArg1Text = "";
} :
identifier=RAW_IDENTIFIER
{
// see if this is a macro argument
define = (List)defineArgs.get(identifier.getText());
if (define==null) {
// see if this is a macro call
define = (List)defines.get(identifier.getText());
}
}
( {(define!=null) && (define.size()>1)}?=> (WS|COMMENT)?
// take in arguments if macro call requires them
'('
callArg0=EXPR
{
callArg0Text = callArg0.getText();
foundArgs.add(callArg0Text);
}
( COMMA callArg1=EXPR
{
callArg1Text = callArg1.getText();
foundArgs.add(callArg1Text);
}
)*
{ foundArgs.size()==define.size()-1 }? // better have right amount
')'
| {!((define!=null) && (define.size()>1))}?=>
)
{
if (define!=null) {
String defineText = (String)define.get(0);
if (define.size()==1) {
//only have one value in list -> the defineText is the define para content -> just need replace directly
setText(defineText);
} else {
//add new dict pair: (para, call value)
for (int i=0;i<foundArgs.size();++i) {
// treat macro arguments similar to local defines
List arg = new ArrayList();
arg.add((String)foundArgs.get(i));
defineArgs.put( (String)define.get(1+i), arg );
}
// save current lexer's state
SaveStruct ss = new SaveStruct(input);
includes.push(ss);
// switch on new input stream
setCharStream(new ANTLRStringStream(defineText));
reset();
}
}
};
fragment RAW_IDENTIFIER : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')* ;
NUMBER : ('0'..'9') ('0'..'9'|'a'..'z'|'A'..'Z'|'_')* ; // allow ahpha suffixes on numbers (i.e. L:long)
// group symbols into categories to parse EXPR
LEFT : '(' | '[' | '{' ;
RIGHT : ')' | ']' | '}' ;
COMMA : ',' ;
OPERATOR : '!' | '#' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '\\' | '^' | '`' | '|' | '~' ;
fragment EXPR // allow just about anything without being ambiguous
: (WS)? (NUMBER|IDENTIFIER)?
(
( LEFT EXPR ( COMMA EXPR )* RIGHT
| STRING
| OPERATOR // quotes, COMMA, LEFT, and RIGHT not in here
)
EXPR
)?
;
//INT : '0'..'9'+ ;
FLOAT
: ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
| '.' ('0'..'9')+ EXPONENT?
| ('0'..'9')+ EXPONENT
;
WS : ( ' '
| '\t'
| '\r'
| '\n'
) {$channel=HIDDEN;}
;
//RestSymbo : '{' | '}' | '&' | ';' | ',' | '+' | '-' | ')' | '(' | '~' | '/' | '`' | '$' | '@' | '%' | '^' | '#' | '\\' ;
STRING
: '"' ( ESC_SEQ | ~('\\'|'"') )* '"'
;
CHAR: '\'' ( ESC_SEQ | ~('\''|'\\') ) '\''
;
fragment
EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
fragment
ESC_SEQ
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UNICODE_ESC
| OCTAL_ESC
;
fragment
OCTAL_ESC
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UNICODE_ESC
: '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
header
: include*;
include : INCLUDE;//'#include ' '<' ID ('.h' | '.ddl') '>';
其中,这部分的代码:
//process the define content, to check whether it contain the previous define //if yes, then process it // save current lexer's state SaveStruct ss = new SaveStruct(input); includes.push(ss); // switch on new input stream setCharStream(new ANTLRStringStream(definedContent)); reset();
是后来加进去,为了处理多层的宏定义的,即,形如:
#define B C #define A B |
的内容的。
上面的完整的代码,用于处理内容:
…… #define get_dev_var_value(a,b,c) _get_dev_var_value((a),(b),METHODID(c)) //#define GET_DEV_VAR_VALUE(a,b) _get_dev_var_value((a),0,METHODID(b)) { LABEL "Message"; HELP "TEST"; DEFINITION { …… …… } } |
结果变成了:
line 1:0 mismatched character ‘_’ expecting ‘\n’ line 1:0 mismatched character ‘"’ expecting ‘\n’ line 1:19 mismatched character ‘<EOF>’ expecting ‘"’ line 1:0 mismatched character ‘R’ expecting ‘\n’ line 1:0 mismatched character ‘0’ expecting ‘\n’ line 1:0 mismatched character ‘"’ expecting ‘\n’ line 1:26 mismatched character ‘<EOF>’ expecting ‘"’
get_dev_var_value((a),(b),METHODID(c)) { LABEL "Message"; HELP "TEST"; DEFINITION { } } |
即,多层(嵌套)的宏定义,不仅没有去除,而且结果宏的内容,即MACRO_TEXT的内容,也没有去除。
【解决过程】
1.antlr语法换成:
DIRECTIVE
@init{
List args = new ArrayList();
boolean condition = true;
String arg0Text = "";
String arg1Text = "";
String definedContent = "";
String defineId = "";
}
: ('#define' WS* defineMacro=RAW_IDENTIFIER
{
args.add(""); // first element will hold the macro text
}
(
( '(' // get arguments if you find them (no spaces before left paren)
(WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
')'
| ' '|'\t'|'\f'
)
( options{greedy=true;}: ' '|'\t'|'\f' )*
// store the text verbatim - tokenize when called
macroText=MACRO_TEXT
{
definedContent = macroText.getText();
definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
args.set(0, definedContent);
}
)? '\r'? '\n'
{
defineId = defineMacro.getText();
defines.put(defineId, args );
skip();
//process the define content, to check whether it contain the previous define
//if yes, then process it
// save current lexer's state
SaveStruct ss = new SaveStruct(input);
includes.push(ss);
// switch on new input stream
setCharStream(new ANTLRStringStream(definedContent));
reset();
}
);结果是:
_get_dev_var_value((a),(b),METHODID(c))_get_dev_var_value((a),(0),METHODID(b)) METHOD message_methode LABEL "Message"; HELP "TEST"; DEFINITION { } |
2.再换成:
DIRECTIVE
@init{
List args = new ArrayList();
boolean condition = true;
String arg0Text = "";
String arg1Text = "";
String definedContent = "";
String defineId = "";
}
@after{
//process the define content, to check whether it contain the previous define
//if yes, then process it
// save current lexer's state
SaveStruct ss = new SaveStruct(input);
includes.push(ss);
// switch on new input stream
setCharStream(new ANTLRStringStream(definedContent));
reset();
}
: ('#define' WS* defineMacro=RAW_IDENTIFIER
{
args.add(""); // first element will hold the macro text
}
(
( '(' // get arguments if you find them (no spaces before left paren)
(WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
')'
| ' '|'\t'|'\f'
)
( options{greedy=true;}: ' '|'\t'|'\f' )*
// store the text verbatim - tokenize when called
macroText=MACRO_TEXT
{
definedContent = macroText.getText();
definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
args.set(0, definedContent);
}
)? '\r'? '\n'
{
defineId = defineMacro.getText();
defines.put(defineId, args );
skip();
}
);结果是:
METHOD message_methode …… } } |
3。又换成:
DIRECTIVE
@init{
List args = new ArrayList();
boolean condition = true;
String arg0Text = "";
String arg1Text = "";
String definedContent = "";
String defineId = "";
}
@after{
}
: ('#define' WS* defineMacro=RAW_IDENTIFIER
{
args.add(""); // first element will hold the macro text
}
(
( '(' // get arguments if you find them (no spaces before left paren)
(WS)? defineArg0=RAW_IDENTIFIER (WS)? {arg0Text = defineArg0.getText(); args.add(arg0Text);}
( ',' (WS)? defineArg1=RAW_IDENTIFIER (WS)? {arg1Text = defineArg1.getText(); args.add(arg1Text);} )*
')'
| ' '|'\t'|'\f'
)
( options{greedy=true;}: ' '|'\t'|'\f' )*
// store the text verbatim - tokenize when called
macroText=MACRO_TEXT
{
definedContent = macroText.getText();
definedContent = definedContent.replace("\\", ""); // remove mutile line define last's '\'
args.set(0, definedContent);
}
)? '\r'? '\n'
{
defineId = defineMacro.getText();
defines.put(defineId, args );
skip();
}
)
{
//process the define content, to check whether it contain the previous define
//if yes, then process it
// save current lexer's state
SaveStruct ss = new SaveStruct(input);
includes.push(ss);
// switch on new input stream
setCharStream(new ANTLRStringStream(definedContent));
reset();
};结果是:
METHOD message_methode …… _get_dev_var_value((a),(0),METHODID(b)); } } |
4.再去试试,用:
IDENTIFIER @init{
List define = new ArrayList();
List foundArgs = new ArrayList();
String callArg0Text = "";
String callArg1Text = "";
} :
identifier=RAW_IDENTIFIER
{
// see if this is a macro argument
define = (List)defineArgs.get(identifier.getText());
if (define==null) {
// see if this is a macro call
define = (List)defines.get(identifier.getText());
}
else
{
//is normal macro replacement
System.out.println("normal define call=" + (String)define.get(0));
}
}
( {(define!=null) && (define.size()>1)}?=> (WS|COMMENT)?
// take in arguments if macro call requires them
'('
callArg0=EXPR
{
callArg0Text = callArg0.getText();
foundArgs.add(callArg0Text);
}
( COMMA callArg1=EXPR
{
callArg1Text = callArg1.getText();
foundArgs.add(callArg1Text);
}
)*
{ foundArgs.size()==define.size()-1 }? // better have right amount
')'
| {!((define!=null) && (define.size()>1))}?=>
)
{
if (define!=null) {
String defineText = (String)define.get(0);
if (define.size()==1) {
//only have one value in list -> the defineText is the define para content -> just need replace directly
setText(defineText);
} else {
//add new dict pair: (para, call value)
for (int i=0;i<foundArgs.size();++i) {
// treat macro arguments similar to local defines
List arg = new ArrayList();
arg.add((String)foundArgs.get(i));
defineArgs.put( (String)define.get(1+i), arg );
}
// save current lexer's state
SaveStruct ss = new SaveStruct(input);
includes.push(ss);
// switch on new input stream
setCharStream(new ANTLRStringStream(defineText));
reset();
}
}
};处理:
…… #define get_dev_var_value(d,e,f) _get_dev_var_value((d),(e),METHODID(f)) #define GET_DEV_VAR_VALUE(a,b) get_dev_var_value(a,0,b) //#define GET_DEV_VAR_VALUE(a,b) _get_dev_var_value((a),0,METHODID(b)) { …… GET_DEV_VAR_VALUE("Choose between \n 3, 5, 9, 17 or 33 points \n No of points:", count); …… } |
得到:
normal define call=a normal define call=b normal define call="Choose between \n 3, 5, 9, 17 or 33 points \n No of points:" normal define call= count normal define call=a normal define call=0 normal define call=b normal define call="Choose between \n 3, 5, 9, 17 or 33 points \n No of points:" normal define call= transfer_function normal define call=a normal define call=0 normal define call=b
_get_dev_var_value((d),(e),METHODID(f))_get_dev_var_value((a),(0),METHODID(b)) METHOD message_methode …… } |
【总结】
截止目前还是没有搞定。