Why do I get a syntax error in my program made with flex and yacc? - grammar

I made a program that is supposed to recognize a simple grammar. When I input what I think is supposed to be a valid statement, I get an error. Specifically, if I type
int a;
int b;
it doesn't work. After I type int a; the program echoes ; for some reason. Then when I type int b; I get syntax error.
The lex file:
%{
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "y.tab.h"
%}
else ELSE
if IF
int INT|int
return RETURN
void VOID
while WHILE
id [a-zA-Z]*
num [0-9]*
lte <=
gte >=
equal ==
notequal !=
%%
{else} { return ELSE; }
{if} { return IF; }
{int} { return INT; }
{return} { return RETURN; }
{void} { return VOID; }
{while} { return WHILE; }
{id} { return ID; }
{num} { return NUM; }
{lte} { return LTE; }
{gte} { return GTE; }
{equal} { return EQUAL; }
{notequal} { return NOTEQUAL; }
%%
The yacc file:
/* C-Minus BNF Grammar */
%token ELSE
%token IF
%token INT
%token RETURN
%token VOID
%token WHILE
%token ID
%token NUM
%token LTE
%token GTE
%token EQUAL
%token NOTEQUAL
%%
program : declaration_list ;
declaration_list : declaration_list declaration | declaration ;
declaration : var_declaration | fun_declaration ;
var_declaration : type_specifier ID ';'
| type_specifier ID '[' NUM ']' ';' ;
type_specifier : INT | VOID ;
fun_declaration : type_specifier ID '(' params ')' compound_stmt ;
params : param_list | VOID ;
param_list : param_list ',' param
| param ;
param : type_specifier ID | type_specifier ID '[' ']' ;
compound_stmt : '{' local_declarations statement_list '}' ;
local_declarations : local_declarations var_declaration
| /* empty */ ;
statement_list : statement_list statement
| /* empty */ ;
statement : expression_stmt
| compound_stmt
| selection_stmt
| iteration_stmt
| return_stmt ;
expression_stmt : expression ';'
| ';' ;
selection_stmt : IF '(' expression ')' statement
| IF '(' expression ')' statement ELSE statement ;
iteration_stmt : WHILE '(' expression ')' statement ;
return_stmt : RETURN ';' | RETURN expression ';' ;
expression : var '=' expression | simple_expression ;
var : ID | ID '[' expression ']' ;
simple_expression : additive_expression relop additive_expression
| additive_expression ;
relop : LTE | '<' | '>' | GTE | EQUAL | NOTEQUAL ;
additive_expression : additive_expression addop term | term ;
addop : '+' | '-' ;
term : term mulop factor | factor ;
mulop : '*' | '/' ;
factor : '(' expression ')' | var | call | NUM ;
call : ID '(' args ')' ;
args : arg_list | /* empty */ ;
arg_list : arg_list ',' expression | expression ;

Ok...you need to add a semi-colon as a token as well in your language spec...as a fyi, do a google search on this ...there are a few lex/yacc files for C programming language as well...and there are plenty of tutorials on this...flex/bison are not exactly forgiving on program spec errors...you really need to understand the elements of how it works...Look for Jack Crenshaw's famous tutorial on how to build a compiler.

Lex:
id [a-zA-Z]*
num [0-9]*
both cases can meet empty strings, use '+' instead

Related

lex and yacc : a simple calculator with syntax error

a simple calculator support only + - * / and integer. I use GNU/Linux.
hoc1.l:
%{
#include "y.tab.h"
extern int yylval;
%}
%%
[ \t] { ; }
[0-9]+ { sscanf(yytext, "%d", &yylval); printf("\nget %d\n", yylval); return NUMBER; }
\n {return 0;}
%%
int yywrap(void) {
return 1;
}
hoc1.y
%{
#include<stdio.h>
#define YYSTYPE int
%}
%token NUMBER
%left '+' '-'
%left '*' '/'
%%
list:
| list '\n'
| list expr '\n' {printf("\t%d\n",$2);}
;
expr: NUMBER { $$ = $1; }
| expr '+' expr {$$ = $1+$3;}
| expr '-' expr {$$ = $1-$3;}
| expr '*' expr {$$ = $1*$3;}
| expr '/' expr {$$ = $1/$3;}
;
%%
int main(void)
{
yyparse();
return 0;
}
int yyerror(char *s) {
fprintf(stderr, "*%s*\n", s);
return 0;
}
runtime-error:
% ./hoc
8+9
get 8
+
get 9
*syntax error*
why and how to sovle it, thx!
You forgot to include your operators in your lex file, and you should return nonzero on a successful token read: returning 0 intuitively means there was no match by yylex. Remove the line in your lex file handling the newline character and replace it with the following:
[-+*/\n] { return *yytext; }
. { yyerror("unrecognized character"); return 0; }
Now it should work. Returning *yytext allows your yacc grammar to parse an expression successfully, e.g. if you get a '+', return it to allow the grammar to parse properly.

$1 of [...] has no declared type

I am unfamiliar with Yacc and trying to get an example I found here to work. When I try to compile with yacc -d calc.yacc, I get the following errors.
calc.yacc:42.17-18: $1 of `stat' has no declared type
calc.yacc:96.22-23: $1 of `expr' has no declared type
calc.yacc:105.17-18: $1 of `number' has no declared type
calc.yacc:106.20-21: $1 of `number' has no declared type
calc.yacc:110.29-30: $2 of `number' has no declared type
I tried googling and from what I can tell, the solution has to do with %type, but I'm not sure what to add.
The code is below:
%{
#include <stdio.h>
int regs[26];
int base;
%}
%start list
%union { int a; }
%type <a> expr number
%token DIGIT LETTER
%left '|'
%left '&'
%left '+' '-'
%left '*' '/' '%'
%left UMINUS /*supplies precedence for unary minus */
%% /* beginning of rules section */
list: /*empty */
|
list stat '\n'
|
list error '\n'
{
yyerrok;
}
;
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr
{
regs[$1] = $3;
}
;
expr: '(' expr ')'
{
$$ = $2;
}
|
expr '*' expr
{
$$ = $1 * $3;
}
|
expr '/' expr
{
$$ = $1 / $3;
}
|
expr '%' expr
{
$$ = $1 % $3;
}
|
expr '+' expr
{
$$ = $1 + $3;
}
|
expr '-' expr
{
$$ = $1 - $3;
}
|
expr '&' expr
{
$$ = $1 & $3;
}
|
expr '|' expr
{
$$ = $1 | $3;
}
|
'-' expr %prec UMINUS
{
$$ = -$2;
}
|
LETTER
{
$$ = regs[$1];
}
|
number
;
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
} |
number DIGIT
{
$$ = base * $1 + $2;
}
;
%%
main()
{
return(yyparse());
}
yyerror(s)
char *s;
{
fprintf(stderr, "%s\n",s);
}
yywrap()
{
return(1);
}
$1, $2, and so on refer to the terms on the right-hand side of a grammar rule. For example in
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr {
regs[$1] = $3;
}
LETTER '=' expr is one of the rules and in the following parentheses $1 refers to LETTER. regs[$1] = $3; will be made into a C statement but in order to do that, yacc needs to know what type $1 has. If you add
%type <a> LETTER
after the first %type declaration (or simply list LETTER after expr) the first error will be taken care of. Same goes for DIGIT and base. Note that there is nothing that refers to the value of stat (naturally) so there is no need for a %type declaration for stat. Thus in
calc.yacc:105.17-18: $1 of `number' has no declared type
calc.yacc:106.20-21: $1 of `number' has no declared type
calc.yacc:110.29-30: $2 of `number' has no declared type
the first line implies that DIGIT has an unknown type, the second line refers to the same problem with number; finally the last line reminds you to declare the type for base. Here is the yacc code it is referring to:
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
} |
number DIGIT
{
$$ = base * $1 + $2;
}
;
Finally, without getting into too many details, the statement
regs[$1]=$3;
will be translated by yacc into something close to:
regs[YS[1].<type of LETTER>]=YS[3].<type of expr>;
where YS is a 'magic array' (actually yacc's stack); YS has the type of the declared %union. Thus you can see that to make this into legal C, yacc needs to know which member of the %union <type of LETTER> refers to. This is what the %type declaration is for.
%{
#include<stdio.h>
int regs[26];
int base;
%}
%union { int a; }
%token DIGIT LETTER
%left '|'
%left '&'
%left '+' '-'
%left '*' '/' '%'
%left UMINUS /*supplies precedence for unary minus */
%type <a> stat expr number DIGIT LETTER
%% /* beginning of rules section */
list: list stat '\n'
|
list error '\n'
{
yyerrok;
}
| /*empty */
;
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr
{
regs[$1] = $3;
}
;
expr: '(' expr ')'
{
$$ = $2;
}
|
expr '*' expr
{
$$ = $1 * $3;
}
|
expr '/' expr
{
$$ = $1 / $3;
}
|
expr '%' expr
{
$$ = $1 % $3;
}
|
expr '+' expr
{
$$ = $1 + $3;
}
|
expr '-' expr
{
$$ = $1 - $3;
}
|
expr '&' expr
{
$$ = $1 & $3;
}
|
expr '|' expr
{
$$ = $1 | $3;
}
|
'-' expr %prec UMINUS
{
$$ = -$2;
}
|
LETTER
{
$$ = regs[$1];
}
|
number
;
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
}
|
number DIGIT
{
$$ = base * $1 + $2;
}
;
%%
main()
{
return(yyparse());
}
yyerror(s)
char *s;
{
fprintf(stderr, "%s\n",s);
}
yywrap()
{
return(1);
}
It is required to use %type directive to specify which members of union is used in which expressions.In order to use union member, a, we should use the aforementioned directive.
See More here %type

Simplified smalltalk grammar using antlr - unary minus and message chaining

I am writing simple smalltalk-like grammar using antlr. It is simplified version of smalltalk, but basic ideas are the same (message passing for example).
Here is my grammar so far:
grammar GAL;
options {
//k=2;
backtrack=true;
}
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
;
INT : '0'..'9'+
;
FLOAT
: ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
| '.' ('0'..'9')+ EXPONENT?
| ('0'..'9')+ EXPONENT
;
COMMENT
: '"' ( options {greedy=false;} : . )* '"' {$channel=HIDDEN;}
;
WS : ( ' '
| '\t'
) {$channel=HIDDEN;}
;
NEW_LINE
: ('\r'?'\n')
;
STRING
: '\'' ( ESC_SEQ | ~('\\'|'\'') )* '\''
;
fragment
EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
fragment
ESC_SEQ
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UNICODE_ESC
| OCTAL_ESC
;
fragment
OCTAL_ESC
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UNICODE_ESC
: '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
BINARY_MESSAGE_CHAR
: ('~' | '!' | '#' | '%' | '&' | '*' | '-' | '+' | '=' | '|' | '\\' | '<' | '>' | ',' | '?' | '/')
('~' | '!' | '#' | '%' | '&' | '*' | '-' | '+' | '=' | '|' | '\\' | '<' | '>' | ',' | '?' | '/')?
;
// parser
program
: NEW_LINE* (statement (NEW_LINE+ | EOF))*
;
statement
: message_sending
| return_statement
| assignment
| temp_variables
;
return_statement
: '^' statement
;
assignment
: identifier ':=' statement
;
temp_variables
: '|' identifier+ '|'
;
object
: raw_object
;
raw_object
: number
| string
| identifier
| literal
| block
| '(' message_sending ')'
;
message_sending
: keyword_message_sending
;
keyword_message_sending
: binary_message_sending keyword_message?
;
binary_message_sending
: unary_message_sending binary_message*
;
unary_message_sending
: object (unary_message)*
;
unary_message
: unary_message_selector
;
binary_message
: binary_message_selector unary_message_sending
;
keyword_message
: (NEW_LINE? single_keyword_message_selector NEW_LINE? binary_message_sending)+
;
block
:
'[' (block_signiture
)? NEW_LINE*
block_body
NEW_LINE* ']'
;
block_body
: (statement
)?
(NEW_LINE+ statement
)*
;
block_signiture
:
(':' identifier
)+ '|'
;
unary_message_selector
: identifier
;
binary_message_selector
: BINARY_MESSAGE_CHAR
;
single_keyword_message_selector
: identifier ':'
;
keyword_message_selector
: single_keyword_message_selector+
;
symbol
: '#' (string | identifier | binary_message_selector | keyword_message_selector)
;
literal
: symbol block? // if there is block then this is method
;
number
: /*'-'?*/
( INT | FLOAT )
;
string
: STRING
;
identifier
: ID
;
1. Unary Minus
I have a problem with unary minus for numbers (commented part for rule number). The problem is that minus is valid binary message. To make things worse two minus signs are also valid binary message. What I need is unary minus in case where there is no object to send binary message to (for example, -3+4 should be unary minus because there is nothing in frot of -3). Also, (-3) should be binary minus too. It would be great if 1 -- -2 would be binary message '--' with parameter -2, but I can live without that. How can I do this?
If I uncomment unary minus I get error MismatchedSetException(0!=null) when parsing something like 1-2.
2. Message chaining
What would be best way to implement message chainging like in smalltalk? What I mean by this is something like this:
obj message1 + 3;
message2;
+ 3;
keyword: 2+3
where every message would be sent to the same object, in this case obj. Message precedence should be kept (unary > binary > keyword).
3. Backtrack
Most of this grammar can be parsed with k=2, but when input is something like this:
1 + 2
Obj message:
1 + 2
message2: 'string'
parser tries to match Obj as single_keyword_message_selector and raises UnwantedTokenExcaption on token message. If remove k=2 and set backtrack=true (as I did) everything works as it should. How can I remove backtrack and get desired behaviour?
Also, most of the grammar can be parsed using k=1, so I tried to set k=2 only for rules that require it, but that is ignored. I did something like this:
rule
options { k = 2; }
: // rule definition
;
but it doesn't work until I set k in global options. What am I missing here?
Update:
It is not ideal solution to write grammar from scratch, because I have a lot of code that depends on it. Also, some features of smalltalk that are missing - are missing by design. This is not intended to be another smalltalk implementation, smalltalk was just an inspiration.
I would be more then happy to have unary minus working in cases like this: -1+2 or 2+(-1). Cases like 2 -- -1 are just not so important.
Also, message chaining is something that should be done as simple as posible. That means that I don't like idea of changeing AST I am generating.
About backtrack - I can live with it, just asked here out of personal curiosity.
This is little modified grammar that generates AST - maybe it will help to better understand what I don't want to change. (temp_variables are probably going to be deleted, I havent made that decision).
grammar GAL;
options {
//k=2;
backtrack=true;
language=CSharp3;
output=AST;
}
tokens {
HASH = '#';
COLON = ':';
DOT = '.';
CARET = '^';
PIPE = '|';
LBRACKET = '[';
RBRACKET = ']';
LPAREN = '(';
RPAREN = ')';
ASSIGN = ':=';
}
// generated files options
#namespace { GAL.Compiler }
#lexer::namespace { GAL.Compiler}
// this will disable CLSComplaint warning in ANTLR generated code
#parser::header {
// Do not bug me about [System.CLSCompliant(false)]
#pragma warning disable 3021
}
#lexer::header {
// Do not bug me about [System.CLSCompliant(false)]
#pragma warning disable 3021
}
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')*
;
INT : '0'..'9'+
;
FLOAT
: ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
| '.' ('0'..'9')+ EXPONENT?
| ('0'..'9')+ EXPONENT
;
COMMENT
: '"' ( options {greedy=false;} : . )* '"' {$channel=Hidden;}
;
WS : ( ' '
| '\t'
) {$channel=Hidden;}
;
NEW_LINE
: ('\r'?'\n')
;
STRING
: '\'' ( ESC_SEQ | ~('\\'|'\'') )* '\''
;
fragment
EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
fragment
HEX_DIGIT : ('0'..'9'|'a'..'f'|'A'..'F') ;
fragment
ESC_SEQ
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UNICODE_ESC
| OCTAL_ESC
;
fragment
OCTAL_ESC
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UNICODE_ESC
: '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
BINARY_MESSAGE_CHAR
: ('~' | '!' | '#' | '%' | '&' | '*' | '-' | '+' | '=' | '|' | '\\' | '<' | '>' | ',' | '?' | '/')
('~' | '!' | '#' | '%' | '&' | '*' | '-' | '+' | '=' | '|' | '\\' | '<' | '>' | ',' | '?' | '/')?
;
// parser
public program returns [ AstProgram program ]
: { $program = new AstProgram(); }
NEW_LINE*
( statement (NEW_LINE+ | EOF)
{ $program.AddStatement($statement.stmt); }
)*
;
statement returns [ AstNode stmt ]
: message_sending
{ $stmt = $message_sending.messageSending; }
| return_statement
{ $stmt = $return_statement.ret; }
| assignment
{ $stmt = $assignment.assignment; }
| temp_variables
{ $stmt = $temp_variables.tempVars; }
;
return_statement returns [ AstReturn ret ]
: CARET statement
{ $ret = new AstReturn($CARET, $statement.stmt); }
;
assignment returns [ AstAssignment assignment ]
: dotted_expression ASSIGN statement
{ $assignment = new AstAssignment($dotted_expression.dottedExpression, $ASSIGN, $statement.stmt); }
;
temp_variables returns [ AstTempVariables tempVars ]
: p1=PIPE
{ $tempVars = new AstTempVariables($p1); }
( identifier
{ $tempVars.AddVar($identifier.identifier); }
)+
p2=PIPE
{ $tempVars.EndToken = $p2; }
;
object returns [ AstNode obj ]
: number
{ $obj = $number.number; }
| string
{ $obj = $string.str; }
| dotted_expression
{ $obj = $dotted_expression.dottedExpression; }
| literal
{ $obj = $literal.literal; }
| block
{ $obj = $block.block; }
| LPAREN message_sending RPAREN
{ $obj = $message_sending.messageSending; }
;
message_sending returns [ AstKeywordMessageSending messageSending ]
: keyword_message_sending
{ $messageSending = $keyword_message_sending.keywordMessageSending; }
;
keyword_message_sending returns [ AstKeywordMessageSending keywordMessageSending ]
: binary_message_sending
{ $keywordMessageSending = new AstKeywordMessageSending($binary_message_sending.binaryMessageSending); }
( keyword_message
{ $keywordMessageSending = $keywordMessageSending.NewMessage($keyword_message.keywordMessage); }
)?
;
binary_message_sending returns [ AstBinaryMessageSending binaryMessageSending ]
: unary_message_sending
{ $binaryMessageSending = new AstBinaryMessageSending($unary_message_sending.unaryMessageSending); }
( binary_message
{ $binaryMessageSending = $binaryMessageSending.NewMessage($binary_message.binaryMessage); }
)*
;
unary_message_sending returns [ AstUnaryMessageSending unaryMessageSending ]
: object
{ $unaryMessageSending = new AstUnaryMessageSending($object.obj); }
(
unary_message
{ $unaryMessageSending = $unaryMessageSending.NewMessage($unary_message.unaryMessage); }
)*
;
unary_message returns [ AstUnaryMessage unaryMessage ]
: unary_message_selector
{ $unaryMessage = new AstUnaryMessage($unary_message_selector.unarySelector); }
;
binary_message returns [ AstBinaryMessage binaryMessage ]
: binary_message_selector unary_message_sending
{ $binaryMessage = new AstBinaryMessage($binary_message_selector.binarySelector, $unary_message_sending.unaryMessageSending); }
;
keyword_message returns [ AstKeywordMessage keywordMessage ]
:
{ $keywordMessage = new AstKeywordMessage(); }
(
NEW_LINE?
single_keyword_message_selector
NEW_LINE?
binary_message_sending
{ $keywordMessage.AddMessagePart($single_keyword_message_selector.singleKwSelector, $binary_message_sending.binaryMessageSending); }
)+
;
block returns [ AstBlock block ]
: LBRACKET
{ $block = new AstBlock($LBRACKET); }
(
block_signiture
{ $block.Signiture = $block_signiture.blkSigniture; }
)? NEW_LINE*
block_body
{ $block.Body = $block_body.blkBody; }
NEW_LINE*
RBRACKET
{ $block.SetEndToken($RBRACKET); }
;
block_body returns [ IList<AstNode> blkBody ]
#init { $blkBody = new List<AstNode>(); }
:
( s1=statement
{ $blkBody.Add($s1.stmt); }
)?
( NEW_LINE+ s2=statement
{ $blkBody.Add($s2.stmt); }
)*
;
block_signiture returns [ AstBlockSigniture blkSigniture ]
#init { $blkSigniture = new AstBlockSigniture(); }
:
( COLON identifier
{ $blkSigniture.AddIdentifier($COLON, $identifier.identifier); }
)+ PIPE
{ $blkSigniture.SetEndToken($PIPE); }
;
unary_message_selector returns [ AstUnaryMessageSelector unarySelector ]
: identifier
{ $unarySelector = new AstUnaryMessageSelector($identifier.identifier); }
;
binary_message_selector returns [ AstBinaryMessageSelector binarySelector ]
: BINARY_MESSAGE_CHAR
{ $binarySelector = new AstBinaryMessageSelector($BINARY_MESSAGE_CHAR); }
;
single_keyword_message_selector returns [ AstIdentifier singleKwSelector ]
: identifier COLON
{ $singleKwSelector = $identifier.identifier; }
;
keyword_message_selector returns [ AstKeywordMessageSelector keywordSelector ]
#init { $keywordSelector = new AstKeywordMessageSelector(); }
:
( single_keyword_message_selector
{ $keywordSelector.AddIdentifier($single_keyword_message_selector.singleKwSelector); }
)+
;
symbol returns [ AstSymbol symbol ]
: HASH
( string
{ $symbol = new AstSymbol($HASH, $string.str); }
| identifier
{ $symbol = new AstSymbol($HASH, $identifier.identifier); }
| binary_message_selector
{ $symbol = new AstSymbol($HASH, $binary_message_selector.binarySelector); }
| keyword_message_selector
{ $symbol = new AstSymbol($HASH, $keyword_message_selector.keywordSelector); }
)
;
literal returns [ AstNode literal ]
: symbol
{ $literal = $symbol.symbol; }
( block
{ $literal = new AstMethod($symbol.symbol, $block.block); }
)? // if there is block then this is method
;
number returns [ AstNode number ]
: /*'-'?*/
( INT
{ $number = new AstInt($INT); }
| FLOAT
{ $number = new AstInt($FLOAT); }
)
;
string returns [ AstString str ]
: STRING
{ $str = new AstString($STRING); }
;
dotted_expression returns [ AstDottedExpression dottedExpression ]
: i1=identifier
{ $dottedExpression = new AstDottedExpression($i1.identifier); }
(DOT i2=identifier
{ $dottedExpression.AddIdentifier($i2.identifier); }
)*
;
identifier returns [ AstIdentifier identifier ]
: ID
{ $identifier = new AstIdentifier($ID); }
;
Hi Smalltalk Grammar writer,
Firstly, to get a smalltalk grammar to parse properly (1 -- -2) and to support the optional '.' on the last statement, etc., you should treat whitespace as significant. Don't put it on the hidden channel.
The grammar so far is not breaking down the rules into small enough fragments. This will be a problem like you have seen with K=2 and backtracking.
I suggest you check out a working Smalltalk grammar in ANTLR as defined by the Redline Smalltalk project http://redline.st & https://github.com/redline-smalltalk/redline-smalltalk
Rgs, James.

ANTLR Grammar for Liquid Markup?

Anyone know of any ANTLR grammar for Liquid Markup or a JAVA library that can work with it? I have taken a look at Jangod but it doesn't seem to work much.
Thanks!
Here's a grammar:
grammar Liquid;
options {
output=AST;
ASTLabelType=CommonTree;
}
tokens {
ASSIGNMENT;
ATTRIBUTES;
BLOCK;
CAPTURE;
CASE;
COMMENT;
CYCLE;
ELSE;
FILTERS;
FILTER;
FOR_ARRAY;
FOR_RANGE;
GROUP;
IF;
INCLUDE;
LOOKUP;
OUTPUT;
PARAMS;
PLAIN;
RAW;
TABLE;
UNLESS;
WHEN;
WITH;
}
#parser::members {
#Override
public void reportError(RecognitionException e) {
throw new RuntimeException(e);
}
}
#lexer::members {
private boolean inTag = false;
private boolean openTagAhead() {
return input.LA(1) == '{' && (input.LA(2) == '{' || input.LA(2) == '\u0025');
}
#Override
public void reportError(RecognitionException e) {
throw new RuntimeException(e);
}
}
/* parser rules */
parse
: block EOF -> block
;
block
: (options{greedy=true;}: atom)* -> ^(BLOCK atom*)
;
atom
: tag
| output
| assignment
| Other -> ^(PLAIN Other)
;
tag
: raw_tag
| comment_tag
| if_tag
| unless_tag
| case_tag
| cycle_tag
| for_tag
| table_tag
| capture_tag
| include_tag
;
raw_tag
: TagStart RawStart TagEnd raw_body TagStart RawEnd TagEnd
-> ^(RAW raw_body)
;
raw_body
: ~TagStart*
;
comment_tag
: TagStart CommentStart TagEnd comment_body TagStart CommentEnd TagEnd
-> ^(COMMENT comment_body)
;
comment_body
: ~TagStart*
;
if_tag
: TagStart IfStart expr TagEnd block else_tag? TagStart IfEnd TagEnd
-> ^(IF expr block ^(ELSE else_tag?))
;
else_tag
: TagStart Else TagEnd block
-> block
;
unless_tag
: TagStart UnlessStart expr TagEnd block else_tag? TagStart UnlessEnd TagEnd
-> ^(UNLESS expr block ^(ELSE else_tag?))
;
case_tag
: TagStart CaseStart expr TagEnd when_tag+ else_tag? TagStart CaseEnd TagEnd
-> ^(CASE expr when_tag+ ^(ELSE else_tag?))
;
when_tag
: TagStart When expr TagEnd block
-> ^(WHEN expr block)
;
cycle_tag
: TagStart Cycle cycle_group? expr (Comma expr)* TagEnd
-> ^(CYCLE ^(GROUP cycle_group?) expr+)
;
cycle_group
: expr Col -> expr
;
for_tag
: for_array
| for_range
;
for_array // attributes must be 'limit' or 'offset'!
: TagStart ForStart Id In lookup attribute* TagEnd block TagStart ForEnd TagEnd
-> ^(FOR_ARRAY Id lookup ^(ATTRIBUTES attribute*) block)
;
attribute
: Id Col expr -> ^(Id expr)
;
for_range
: TagStart ForStart Id In OPar expr DotDot expr CPar TagEnd block TagStart ForEnd TagEnd
-> ^(FOR_RANGE Id expr expr block)
;
table_tag // attributes must be 'limit' or 'cols'!
: TagStart TableStart Id In Id attribute* TagEnd block TagStart TableEnd TagEnd
-> ^(TABLE Id Id ^(ATTRIBUTES attribute*) block)
;
capture_tag
: TagStart CaptureStart Id TagEnd block TagStart CaptureEnd TagEnd
-> ^(CAPTURE Id block)
;
include_tag
: TagStart Include a=Str (With b=Str)? TagEnd
-> ^(INCLUDE $a ^(WITH $b?))
;
output
: OutStart expr filter* OutEnd
-> ^(OUTPUT expr ^(FILTERS filter*))
;
filter
: Pipe Id params?
-> ^(FILTER Id ^(PARAMS params?))
;
params
: Col expr (Comma expr)* -> expr+
;
assignment
: TagStart Assign Id EqSign expr TagEnd
-> ^(ASSIGNMENT Id expr)
;
expr
: or_expr
;
or_expr
: and_expr (Or^ and_expr)*
;
and_expr
: eq_expr (And^ eq_expr)*
;
eq_expr
: rel_expr ((Eq | NEq)^ rel_expr)*
;
rel_expr
: term ((LtEq | Lt | GtEq | Gt)^ term)?
;
term
: Num
| Str
| True
| False
| Nil
| lookup
;
lookup
: Id (Dot Id)* -> ^(LOOKUP Id+)
;
/* lexer rules */
OutStart : '{{' {inTag=true;};
OutEnd : '}}' {inTag=false;};
TagStart : '{%' {inTag=true;};
TagEnd : '%}' {inTag=false;};
Str : {inTag}?=> (SStr | DStr);
DotDot : {inTag}?=> '..';
Dot : {inTag}?=> '.';
NEq : {inTag}?=> '!=';
Eq : {inTag}?=> '==';
EqSign : {inTag}?=> '=';
GtEq : {inTag}?=> '>=';
Gt : {inTag}?=> '>';
LtEq : {inTag}?=> '<=';
Lt : {inTag}?=> '<';
Pipe : {inTag}?=> '|';
Col : {inTag}?=> ':';
Comma : {inTag}?=> ',';
OPar : {inTag}?=> '(';
CPar : {inTag}?=> ')';
Num : {inTag}?=> Digit+;
WS : {inTag}?=> (' ' | '\t' | '\r' | '\n')+ {skip();};
Id
: {inTag}?=> (Letter | '_') (Letter | '_' | '-' | Digit)*
{
if($text.equals("capture")) $type = CaptureStart;
else if($text.equals("endcapture")) $type = CaptureEnd;
else if($text.equals("comment")) $type = CommentStart;
else if($text.equals("endcomment")) $type = CommentEnd;
else if($text.equals("raw")) $type = RawStart;
else if($text.equals("endraw")) $type = RawEnd;
else if($text.equals("if")) $type = IfStart;
else if($text.equals("endif")) $type = IfEnd;
else if($text.equals("unless")) $type = UnlessStart;
else if($text.equals("endunless")) $type = UnlessEnd;
else if($text.equals("else")) $type = Else;
else if($text.equals("case")) $type = CaseStart;
else if($text.equals("endcase")) $type = CaseEnd;
else if($text.equals("when")) $type = When;
else if($text.equals("cycle")) $type = Cycle;
else if($text.equals("for")) $type = ForStart;
else if($text.equals("endfor")) $type = ForEnd;
else if($text.equals("in")) $type = In;
else if($text.equals("and")) $type = And;
else if($text.equals("or")) $type = Or;
else if($text.equals("tablerow")) $type = TableStart;
else if($text.equals("endtablerow")) $type = TableEnd;
else if($text.equals("assign")) $type = Assign;
else if($text.equals("true")) $type = True;
else if($text.equals("false")) $type = False;
else if($text.equals("nil")) $type = Nil;
else if($text.equals("include")) $type = Include;
else if($text.equals("with")) $type = With;
}
;
Other
: ({!inTag && !openTagAhead()}?=> . )+
{
String s = getText().replaceAll("\\s+", " ").trim();
if(s.isEmpty()) {
skip();
}
else {
setText(s);
}
}
;
/* fragment rules */
fragment Letter : 'a'..'z' | 'A'..'Z';
fragment Digit : '0'..'9';
fragment SStr : '\'' ~'\''* '\'';
fragment DStr : '"' ~'"'* '"';
fragment CommentStart : ;
fragment CommentEnd : ;
fragment RawStart : ;
fragment RawEnd : ;
fragment IfStart : ;
fragment IfEnd : ;
fragment UnlessStart : ;
fragment UnlessEnd : ;
fragment Else : ;
fragment CaseStart : ;
fragment CaseEnd : ;
fragment When : ;
fragment Cycle : ;
fragment ForStart : ;
fragment ForEnd : ;
fragment In : ;
fragment And : ;
fragment Or : ;
fragment TableStart : ;
fragment TableEnd : ;
fragment Assign : ;
fragment True : ;
fragment False : ;
fragment Nil : ;
fragment Include : ;
fragment With : ;
fragment CaptureStart : ;
fragment CaptureEnd : ;
I have dusted the thing off a bit and put it in a Github repository: https://github.com/bkiers/Liqp
Be aware: although I have successfully used this grammar in the past, the input might have been rather "easy". If you're going to use it, and run into problems, I'd appreciate it if you let me know. If you're looking for a more robust, thoroughly tested library/parser/grammar, this might not be what you're looking for.

semantic phase of c compiler

if write 1=a in the sample c program, it doesnt detect it as an error. How do i solve this problem? Also how do i do global and local scope of variables. Thanks if anyone can solve it
clexer.lex source code
D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]
E [Ee][+-]?{D}+
FS (f|F|l|L)
IS (u|U|l|L)*
%{
#include <stdio.h>
#include "y.tab.h"
int cnt=1;
int line=1;
char tempid[100];
%}
%%
"/*" {comment();}
"auto" { cnt+=yyleng;ECHO; return(AUTO); }
"break" { cnt+=yyleng;ECHO; return(BREAK); }
"case" { cnt+=yyleng;ECHO; return(CASE); }
"char" { cnt+=yyleng;ECHO; return(CHAR); }
"const" { cnt+=yyleng;ECHO; return(CONST); }
"continue" { cnt+=yyleng;ECHO; return(CONTINUE); }
"default" { cnt+=yyleng;ECHO; return(DEFAULT); }
"do" { cnt+=yyleng;ECHO; return(DO); }
"double" { cnt+=yyleng;ECHO; return(DOUBLE); }
"else" { cnt+=yyleng;ECHO; return(ELSE); }
"enum" { cnt+=yyleng;ECHO; return(ENUM); }
"extern" { cnt+=yyleng;ECHO; return(EXTERN); }
"float" { cnt+=yyleng;ECHO; return(FLOAT); }
"for" { cnt+=yyleng;ECHO; return(FOR); }
"goto" { cnt+=yyleng;ECHO; return(GOTO); }
"if" { cnt+=yyleng;ECHO; return(IF); }
"int" { cnt+=yyleng;ECHO; return(INT); }
"long" { cnt+=yyleng;ECHO; return(LONG); }
"register" { cnt+=yyleng;ECHO; return(REGISTER); }
"return" { cnt+=yyleng;ECHO; return(RETURN); }
"short" { cnt+=yyleng;ECHO; return(SHORT); }
"signed" { cnt+=yyleng;ECHO; return(SIGNED); }
"sizeof" { cnt+=yyleng;ECHO; return(SIZEOF); }
"static" { cnt+=yyleng;ECHO; return(STATIC); }
"struct" { cnt+=yyleng;ECHO; return(STRUCT); }
"switch" { cnt+=yyleng;ECHO; return(SWITCH); }
"typedef" { cnt+=yyleng;ECHO; return(TYPEDEF); }
"union" { cnt+=yyleng;ECHO; return(UNION); }
"unsigned" { cnt+=yyleng;ECHO; return(UNSIGNED); }
"void" { cnt+=yyleng;ECHO; return(VOID); }
"volatile" { cnt+=yyleng;ECHO; return(VOLATILE); }
"while" { cnt+=yyleng;ECHO; return(WHILE); }
(['])+({L}|{D})+([']) { cnt+=yyleng;ECHO; return(SINGLE); }
{L}({L}|{D})* { cnt+=yyleng;ECHO; strcpy(tempid,yytext);return(IDENTIFIER); }
0[xX]{H}+{IS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
0{D}+{IS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+{IS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
L?'(\\.|[^\\'])+' { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+{E}{FS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}*"."{D}+({E})?{FS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+"."{D}*({E})?{FS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
L?\"(\\.|[^\\"])*\" { cnt+=yyleng;ECHO; return(STRING_LITERAL); }
"..." { cnt+=yyleng;ECHO; return(ELLIPSIS); }
">>=" { cnt+=yyleng;ECHO; return(RIGHT_ASSIGN); }
"<<=" { cnt+=yyleng;ECHO; return(LEFT_ASSIGN); }
"+=" { cnt+=yyleng;ECHO; return(ADD_ASSIGN); }
"-=" { cnt+=yyleng;ECHO; return(SUB_ASSIGN); }
"*=" { cnt+=yyleng;ECHO; return(MUL_ASSIGN); }
"/=" { cnt+=yyleng;ECHO; return(DIV_ASSIGN); }
"%=" { cnt+=yyleng;ECHO; return(MOD_ASSIGN); }
"&=" { cnt+=yyleng;ECHO; return(AND_ASSIGN); }
"^=" { cnt+=yyleng;ECHO; return(XOR_ASSIGN); }
"|=" { cnt+=yyleng;ECHO; return(OR_ASSIGN); }
">>" { cnt+=yyleng;ECHO; return(RIGHT_OP); }
"<<" { cnt+=yyleng;ECHO; return(LEFT_OP); }
"++" { cnt+=yyleng;ECHO; return(INC_OP); }
"--" { cnt+=yyleng;ECHO; return(DEC_OP); }
"->" { cnt+=yyleng;ECHO; return(PTR_OP); }
"&&" { cnt+=yyleng;ECHO; return(AND_OP); }
"||" { cnt+=yyleng;ECHO; return(OR_OP); }
"<=" { cnt+=yyleng;ECHO; return(LE_OP); }
">=" { cnt+=yyleng;ECHO; return(GE_OP); }
"==" { cnt+=yyleng;ECHO; return(EQ_OP); }
"!=" { cnt+=yyleng;ECHO; return(NE_OP); }
";" { cnt+=yyleng;ECHO; return(';'); }
("{"|"<%") { cnt+=yyleng;ECHO; return('{'); }
("}"|"%>") { cnt+=yyleng;ECHO; return('}'); }
"," { cnt+=yyleng;ECHO; return(','); }
":" { cnt+=yyleng;ECHO; return(':'); }
"=" { cnt+=yyleng;ECHO; return('='); }
"(" { cnt+=yyleng;ECHO; return('('); }
")" { cnt+=yyleng;ECHO; return(')'); }
("["|"<:") { cnt+=yyleng;ECHO; return('['); }
("]"|":>") { cnt+=yyleng;ECHO; return(']'); }
"." { cnt+=yyleng;ECHO; return('.'); }
"&" { cnt+=yyleng;ECHO; return('&'); }
"!" { cnt+=yyleng;ECHO; return('!'); }
"~" { cnt+=yyleng;ECHO; return('~'); }
"-" { cnt+=yyleng;ECHO; return('-'); }
"+" { cnt+=yyleng;ECHO; return('+'); }
"*" { cnt+=yyleng;ECHO; return('*'); }
"/" { cnt+=yyleng;ECHO; return('/'); }
"%" { cnt+=yyleng;ECHO; return('%'); }
"<" { cnt+=yyleng;ECHO; return('<'); }
">" { cnt+=yyleng;ECHO; return('>'); }
"^" { cnt+=yyleng;ECHO; return('^'); }
"|" { cnt+=yyleng;ECHO; return('|'); }
"?" { cnt+=yyleng;ECHO; return('?'); }
[ ] {cnt+=yyleng;ECHO;}
[\t\v\f] { cnt+=yyleng; }
[\n] {line++;cnt=1;}
. { /* ignore bad characters */ }
%%
yywrap()
{
return(1);
}
comment()
{
char c, c1;
loop:
while ((c = input()) != '*' && c != 0)
{
if(c=='\n') {line++;cnt=1;}
else {cnt++;}
}
//putchar(c); PUTCHAR only if comments need to be shown!
if ((c1 = input()) != '/' && c1 != 0)
{
unput(c1);
goto loop;
}
}
cparser.yacc source code
%{
#include <stdio.h>
#include <string.h>
#include "symbol_table.h"
extern FILE *yyin;
extern FILE *yyout;
extern int column;
extern int line;
extern int cnt;
extern char *yytext,tempid[100];
int temp,err,err1=0;
install()
{
symrec *s;
s = getsym (tempid);
if (s == 0)
s = putsym (tempid,temp);
else
{
printf(" VOID=1 ");
printf(" CHAR=2 ");
printf(" INT=3 ");
printf(" FLOAT=4 ");
printf(" DOUBLE=4 ");
printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is already defined as %d\n\n",line,cnt,s->name,s->type );
exit(0);
}
err1=1;
}
int context_check()
{
symrec *s;
s = getsym(tempid);
if (s == 0 )
{printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is an undeclared identifier\n\n",line,cnt,tempid);exit(0);return 0;}
else
return(s->type);
err1=1;
}
type_err(int t1,int t2)
{
if(t1&&t2)
{
printf(" VOID=1 ");
printf(" CHAR=2 ");
printf(" INT=3 ");
printf(" FLOAT=4 ");
printf(" DOUBLE=4 ");
printf( "\n\nThere is a Semantic error at Pos : %d : %d : Type mismatch for %s between %d and %d \n\n",line,cnt,tempid,t1,t2);
err1=1;
exit(0);
}
}
%}
%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME SINGLE
%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS
%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE
%start translation_unit
%%
primary_expression
: IDENTIFIER {$$=context_check();}
| CONSTANT
| STRING_LITERAL
| '(' expression ')' {$$= $2;}
;
postfix_expression
: primary_expression {$$=$1;}
| postfix_expression '[' expression ']'
| postfix_expression '(' ')'
| postfix_expression '(' argument_expression_list ')'
| postfix_expression '.' IDENTIFIER
| postfix_expression PTR_OP IDENTIFIER
| postfix_expression INC_OP
| postfix_expression DEC_OP
;
argument_expression_list
: assignment_expression
| argument_expression_list ',' assignment_expression
;
unary_expression
: postfix_expression {$$=$1;}
| INC_OP unary_expression
| DEC_OP unary_expression
| unary_operator cast_expression
| SIZEOF unary_expression
| SIZEOF '(' type_name ')'
;
unary_operator
: '&'
| '*'
| '+'
| '-'
| '~'
| '!'
;
cast_expression
: unary_expression {$$=$1;}
| '(' type_name ')' cast_expression
;
multiplicative_expression
: cast_expression {$$=$1;}
| multiplicative_expression '*' cast_expression
| multiplicative_expression '/' cast_expression
| multiplicative_expression '%' cast_expression
;
additive_expression
: multiplicative_expression {$$=$1;}
| additive_expression '+' multiplicative_expression
| additive_expression '-' multiplicative_expression
;
shift_expression
: additive_expression {$$=$1;}
| shift_expression LEFT_OP additive_expression
| shift_expression RIGHT_OP additive_expression
;
relational_expression
: shift_expression {$$=$1;}
| relational_expression '<' shift_expression
| relational_expression '>' shift_expression
| relational_expression LE_OP shift_expression
| relational_expression GE_OP shift_expression
;
equality_expression
: relational_expression {$$=$1;}
| equality_expression EQ_OP relational_expression
| equality_expression NE_OP relational_expression
;
and_expression
: equality_expression {$$=$1;}
| and_expression '&' equality_expression
;
exclusive_or_expression
: and_expression {$$=$1;}
| exclusive_or_expression '^' and_expression
;
inclusive_or_expression
: exclusive_or_expression {$$=$1;}
| inclusive_or_expression '|' exclusive_or_expression
;
logical_and_expression
: inclusive_or_expression {$$=$1;}
| logical_and_expression AND_OP inclusive_or_expression
;
logical_or_expression
: logical_and_expression {$$=$1;}
| logical_or_expression OR_OP logical_and_expression
;
conditional_expression
: logical_or_expression {$$=$1;}
| logical_or_expression '?' expression ':' conditional_expression
;
assignment_expression
: conditional_expression {$$=$1;}
| unary_expression assignment_operator assignment_expression {if($1!=$3){type_err($1,$3);}}
;
assignment_operator
: '='
| MUL_ASSIGN
| DIV_ASSIGN
| MOD_ASSIGN
| ADD_ASSIGN
| SUB_ASSIGN
| LEFT_ASSIGN
| RIGHT_ASSIGN
| AND_ASSIGN
| XOR_ASSIGN
| OR_ASSIGN
;
expression
: assignment_expression {$$=$1;}
| expression ',' assignment_expression
;
constant_expression
: conditional_expression
;
declaration
: declaration_specifiers ';'
| declaration_specifiers init_declarator_list ';'
;
declaration_specifiers
: storage_class_specifier
| storage_class_specifier declaration_specifiers
| type_specifier
| type_specifier declaration_specifiers
| type_qualifier
| type_qualifier declaration_specifiers
;
init_declarator_list
: init_declarator
| init_declarator_list ',' init_declarator
;
init_declarator
: declarator
| declarator '=' initializer
;
storage_class_specifier
: TYPEDEF
| EXTERN
| STATIC
| AUTO
| REGISTER
;
type_specifier
: VOID {temp=1;}
| CHAR {temp=2;}
| SHORT {temp=3;}
| INT {temp=3;}
| LONG {temp=3;}
| FLOAT {temp=4;}
| DOUBLE {temp=4;}
| SIGNED
| UNSIGNED
| struct_or_union_specifier
| enum_specifier
| TYPE_NAME
;
struct_or_union_specifier
: struct_or_union IDENTIFIER '{' struct_declaration_list '}' {install();}
| struct_or_union '{' struct_declaration_list '}'
| struct_or_union IDENTIFIER {install();}
;
struct_or_union
: STRUCT
| UNION
;
struct_declaration_list
: struct_declaration
| struct_declaration_list struct_declaration
;
struct_declaration
: specifier_qualifier_list struct_declarator_list ';'
;
specifier_qualifier_list
: type_specifier specifier_qualifier_list
| type_specifier
| type_qualifier specifier_qualifier_list
| type_qualifier
;
struct_declarator_list
: struct_declarator
| struct_declarator_list ',' struct_declarator
;
struct_declarator
: declarator
| ':' constant_expression
| declarator ':' constant_expression
;
enum_specifier
: ENUM '{' enumerator_list '}'
| ENUM IDENTIFIER '{' enumerator_list '}'
| ENUM IDENTIFIER
;
enumerator_list
: enumerator
| enumerator_list ',' enumerator
;
enumerator
: IDENTIFIER {context_check();}
| IDENTIFIER '=' constant_expression //{context_check();}
;
type_qualifier
: CONST
| VOLATILE
;
declarator
: pointer direct_declarator
| direct_declarator
;
direct_declarator
: IDENTIFIER {install();}
| '(' declarator ')'
| direct_declarator '[' constant_expression ']'
| direct_declarator '[' ']'
| direct_declarator '(' parameter_type_list ')'
| direct_declarator '(' identifier_list ')'
| direct_declarator '(' ')'
;
pointer
: '*'
| '*' type_qualifier_list
| '*' pointer
| '*' type_qualifier_list pointer
;
type_qualifier_list
: type_qualifier
| type_qualifier_list type_qualifier
;
parameter_type_list
: parameter_list
| parameter_list ',' ELLIPSIS
;
parameter_list
: parameter_declaration
| parameter_list ',' parameter_declaration
;
parameter_declaration
: declaration_specifiers declarator
| declaration_specifiers abstract_declarator
| declaration_specifiers
;
identifier_list
: IDENTIFIER {install();}
| identifier_list ',' IDENTIFIER {install();}
;
type_name
: specifier_qualifier_list
| specifier_qualifier_list abstract_declarator
;
abstract_declarator
: pointer
| direct_abstract_declarator
| pointer direct_abstract_declarator
;
direct_abstract_declarator
: '(' abstract_declarator ')'
| '[' ']'
| '[' constant_expression ']'
| direct_abstract_declarator '[' ']'
| direct_abstract_declarator '[' constant_expression ']'
| '(' ')'
| '(' parameter_type_list ')'
| direct_abstract_declarator '(' ')'
| direct_abstract_declarator '(' parameter_type_list ')'
;
initializer
: assignment_expression {$$=$1;}
| '{' initializer_list '}'
| '{' initializer_list ',' '}'
;
initializer_list
: initializer
| initializer_list ',' initializer
;
statement
: labeled_statement
| compound_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement
;
labeled_statement
: IDENTIFIER ':' statement //{context_check();}
| CASE constant_expression ':' statement
| DEFAULT ':' statement
;
compound_statement
: '{' '}'
| '{' statement_list '}'
| '{' declaration_list '}'
| '{' declaration_list statement_list '}'
;
declaration_list
: declaration
| declaration_list declaration
;
statement_list
: statement
| statement_list statement
;
expression_statement
: ';'
| expression ';'
;
selection_statement
: IF '(' expression ')' statement %prec LOWER_THAN_ELSE ;
| IF '(' expression ')' statement ELSE statement
| SWITCH '(' expression ')' statement
;
iteration_statement
: WHILE '(' expression ')' statement
| DO statement WHILE '(' expression ')' ';'
| FOR '(' expression_statement expression_statement ')' statement
| FOR '(' expression_statement expression_statement expression ')' statement
;
jump_statement
: GOTO IDENTIFIER ';' //{context_check();}
| CONTINUE ';'
| BREAK ';'
| RETURN ';'
| RETURN expression ';'
;
translation_unit
: external_declaration
| translation_unit external_declaration
;
external_declaration
: function_definition
| declaration
;
function_definition
: declaration_specifiers declarator declaration_list compound_statement
| declaration_specifiers declarator compound_statement
| declarator declaration_list compound_statement
| declarator compound_statement
;
%%
yyerror(s)
char *s;
{
fflush(stdout);err=1;
printf("Syntax error at Pos : %d : %d\n",line,cnt);
exit(0);
//printf("\n%*s\n%*s\n", column, "^", column, s);
}
main(argc,argv)
int argc;
char **argv;
{
char *fname;
++argv,--argc;/*skip program name*/
if(argc>0)
{
yyin=fopen(argv[0],"r");
fname=argv[0];
strcat(fname,"_output");
yyout=fopen(fname,"w");
}
else
{
printf("Please give the c filename as an argument.\n");
}
yyparse();
if(err==0)
printf("No Syntax errors found!\n");
fname=argv[0];strcat(fname,"_symbol-table");
FILE *sym_tab=fopen(fname,"w");
fprintf(sym_tab,"Type\tSymbol\n");
symrec *ptr;
for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
{
fprintf(sym_tab,"%d\t%s\n",ptr->type,ptr->name);
}
fclose(sym_tab);
}
Symbol table.h source code
#define t_void 1
#define t_char 2
#define t_int 3
#define t_float 4
struct symrec
{
char *name;
int type;
struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putsym(char *sym_name,int sym_type)
{
symrec *ptr;
ptr=(symrec *)malloc(sizeof(symrec));
ptr->name=(char *)malloc(strlen(sym_name)+1);
strcpy(ptr->name,sym_name);
ptr->type=sym_type;
ptr->next=(struct symrec *)sym_table;
sym_table=ptr;
return ptr;
}
symrec *getsym(char *sym_name)
{
symrec *ptr;
for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
if(strcmp(ptr->name,sym_name)==0)
return ptr;
return 0;
}
In general terms, when you have an assignment operation, you need to check the left operand to make sure its an lvalue and issue an error if its not. This is most commonly done as part of typechecking -- you keep attributes about values (eg, is it an lvalue or not) along with the type, and check that those attributes are correct for each use of a value.
So what you might do is use %union to define a parser value object that can hold this info:
%union {
struct {
Type *type;
int is_lvalue;
} valinfo;
}
%type<valinfo> assignment_expression unary_expression
Then, your rule for assignments would check this along with the type:
assignment_expression:
unary_expression assignment_operator assignment_expression {
if (!$1.is_lvalue)
error("assigning to non-lvalue");
if ($1.type != $3.type && !type_is_implicitly_convertable($3.type, $1.type))
error("type mismatch in assignment");
$$.type = $1. type;
$$.is_lvalue = 0; }
Note that you need to make sure to set $$ properly in EVERY rule action that might have its value used by some other rule action; your code fails to do this, so likely won't do anything useful as is.