I have the following grammar:
Expression
: SimpleExpression {$$ = $1;};
| SimpleExpression LTnum SimpleExpression
{ MkLeftC($1, $2); $$ = MkRightC($3, $2); }
| SimpleExpression LEnum SimpleExpression
{ MkLeftC($1, $2); $$ = MkRightC($3, $2); }
| SimpleExpression EQnum SimpleExpression
{ MkLeftC($1, $2); $$ = MkRightC($3, $2); }
| SimpleExpression NEnum SimpleExpression
{ MkLeftC($1, $2); $$ = MkRightC($3, $2); }
| SimpleExpression GEnum SimpleExpression
{ MkLeftC($1, $2); $$ = MkRightC($3, $2); }
| SimpleExpression GTnum SimpleExpression
{ MkLeftC($1, $2); $$ = MkRightC($3, $2); }
;
SimpleExpression
: PLUSnum Term op_terms
{ $$ = MakeTree(AddOp,$3,$2); }
| MINUSnum Term op_terms
{ $$ = MakeTree(SubOp,$3,$2); }
;
op_terms
: PLUSnum Term
{ $$ = MakeTree(AddOp,NullExp(),$2); }
| PLUSnum Term op_terms
{ $$ = MakeTree(AddOp,$3,$2); }
| MINUSnum Term
{ $$ = MakeTree(SubOp,NullExp(),$2); }
| MINUSnum Term op_terms
{ $$ = MakeTree(SubOp,$3,$2); }
| ORnum Term
{ $$ = MakeTree(OrOp,NullExp(),$2); }
| ORnum Term op_terms
{ $$ = MakeTree(OrOp,$3,$2); }
;
I get the following shift-reduce conflicts in the y.output file:
51: shift/reduce conflict (shift 74, reduce 57) on GTnum
51: shift/reduce conflict (shift 75, reduce 57) on NEnum
51: shift/reduce conflict (shift 76, reduce 57) on EQnum
51: shift/reduce conflict (shift 77, reduce 57) on GEnum
51: shift/reduce conflict (shift 78, reduce 57) on LEnum
51: shift/reduce conflict (shift 79, reduce 57) on LTnum
state 51
Expression : SimpleExpression . (57)
Expression : SimpleExpression . LTnum SimpleExpression (58)
Expression : SimpleExpression . LEnum SimpleExpression (59)
Expression : SimpleExpression . EQnum SimpleExpression (60)
Expression : SimpleExpression . NEnum SimpleExpression (61)
Expression : SimpleExpression . GEnum SimpleExpression (62)
Expression : SimpleExpression . GTnum SimpleExpression (63)
I need help with removing these conflicts. What am I doing wrong here? I have tried to set precedence rules but they somehow don't seem to work here. Any ideas?
Strange grammar. It should be of this form:
Expression
: SimpleExpression {$$ = $1;};
| Expression LTnum SimpleExpression
| Expression LEnum SimpleExpression
| Expression EQnum SimpleExpression
| Expression NEnum SimpleExpression
| Expression GEnum SimpleExpression
| Expression GTnum SimpleExpression
;
i.e. using left-recursion.
Strange tree too. It should be of the general form:
$$ = MkBinaryNode($1,$2,$3);
Related
Below is my yacc code to parse C source code. I am a little new to this and this is an already existing code.
{
%{
#include <stdio.h>
#include <string.h>
#include "Expression.c"
%}
%token Identifier
%token Number
%token '=' '+' '-' '*' '/' ',' ';' '(' ')' '{' '}' '[' ']' '<' '>'
%token INT
%token CHAR
%token FLOAT
%token LONG
%token DOUBLE
%token RETURN
%token IF
%token ELSE
%token EQ /* == */
%token BADTOKEN
%%
program
: function
{ $$ = $1; }
| program function
{ $$ = binaryNode("",$1,$2);}
| error '}'
function:
typename Identifier '(' formal.arguments ')' function.body
{ $$ = attachAllChildren($2,$1,$4,$6); }
typename
: INT
{ $$ = leafNode("INT");}
| CHAR
{ $$ = leafNode("CHAR"); }
| DOUBLE
{ $$ = leafNode("DOUBLE"); }
| LONG
{ $$ = leafNode("LONG"); }
| FLOAT
{ $$ = leafNode("FLOAT"); }
formal.arguments
: /* empty */
{ $$ = NULL; }
| formal.argument.list
{ $$ = $1; }
formal.argument.list
: formal.argument
{ $$ = $1; }
| formal.argument.list ',' formal.argument
{ $$ = binaryNode(",", $1, $3); }
formal.argument
: typename Identifier
{ $$ = attachChild($2, $1); }
function.body
: '{' '}'
{ $$ = NULL; }
| '{' statements '}'
{ $$ = $2; }
statements
: statement
{ $$ = $1; }
| statements statement
{ $$ = attachChild($1,$2);}
statement
: declaration
{ $$ = $1; }
| RETURN expression ';' /* return statement */
{ $$ = unaryNode("RETURN", $2); }
| if.statement
{ $$ =$1; }
| term '=' expression ';' /* assignment */
{ $$ = binaryNode("=", $1, $3); }
| expression ';'
{ $$ = $1; }
| '{' statements '}'
{ $$ = $2; }
| ';' /* null statement */
{ $$ = NULL; }
declaration
: typename Identifier ';'
{ $$ = attachChild($2,$1); }
| typename Identifier '[' Number ']' ';' /* array */
{ $$ = attachSiblings($2, $1, $4); }
if.statement
: IF '(' expression ')' statement
{ $$ = ternaryNode("IF",$3,$5, NULL); }
| IF '(' expression ')' statement ELSE statement
{ $$ = ternaryNode("IF", $3, $5, $7); }
expression
: additive.expression
{ $$ = $1; }
| expression EQ additive.expression
{ $$ = binaryNode("=",$1, $3); }
| expression '>' additive.expression
{ $$ = binaryNode(">", $1, $3); }
| expression '<' additive.expression
{ $$ = binaryNode("<", $1, $3); }
additive.expression
: term
{ $$ = $1; }
| additive.expression '+' term
{ $$ = binaryNode("+", $1, $3);}
| additive.expression '-' term
{ $$ = binaryNode("-", $1, $3);}
term
: Identifier
{ $$ = leafNode($1);}
| Number
{ $$ = leafNode($1);}
| Identifier '(' opt.actual.arguments ')' /* function call */
{ $$ = attachChild($1,$3);}
| Identifier '[' expression ']' /* array access */
{ $$ = attachChild($1,$3); }
| '(' expression ')'
{ $$ = $2;}
opt.actual.arguments
: /* empty */
{ $$ = NULL;}
| actual.arguments
{ $$=$1; }
actual.arguments
: expression
{ $$ = $1; }
| actual.arguments ',' expression
{ $$ = binaryNode(",",$1, $3); }
%%
yyerror(msg)
char* msg;
{
#if !defined(YYBISON)
extern int yynerrs;
++yynerrs;
#endif
fprintf(stderr, "Error: %s\n",msg);
}
main()
{
extern int yynerrs;
yyparse();
fprintf(stderr, "%d errors.\n", yynerrs);
return 0;
}
}
On compiling the above code , I get a warning that there is 1 shift/reduce conflict in the code. How can I resolve this?
Use the -v option to yacc, which will produce a y.output file telling you where the conflict is and how its is triggered. Note that a conflict is NOT an error -- you still get a valid parser from yacc -- but that parser may not recognize exactly the language defined by your grammar.
In your case, you get something like:
State 81 conflicts: 1 shift/reduce
:
State 81
28 if.statement: IF '(' expression ')' statement .
29 | IF '(' expression ')' statement . ELSE statement
ELSE shift, and go to state 83
ELSE [reduce using rule 28 (if.statement)]
$default reduce using rule 28 (if.statement)
which tells you that you have the classic dangling else ambiguity, so you can probably just ignore the conflict, as the generated parser will resolve the ambiguity by binding the else to the closest if, which is probably what you want.
I am unfamiliar with Yacc and trying to get an example I found here to work. When I try to compile with yacc -d calc.yacc, I get the following errors.
calc.yacc:42.17-18: $1 of `stat' has no declared type
calc.yacc:96.22-23: $1 of `expr' has no declared type
calc.yacc:105.17-18: $1 of `number' has no declared type
calc.yacc:106.20-21: $1 of `number' has no declared type
calc.yacc:110.29-30: $2 of `number' has no declared type
I tried googling and from what I can tell, the solution has to do with %type, but I'm not sure what to add.
The code is below:
%{
#include <stdio.h>
int regs[26];
int base;
%}
%start list
%union { int a; }
%type <a> expr number
%token DIGIT LETTER
%left '|'
%left '&'
%left '+' '-'
%left '*' '/' '%'
%left UMINUS /*supplies precedence for unary minus */
%% /* beginning of rules section */
list: /*empty */
|
list stat '\n'
|
list error '\n'
{
yyerrok;
}
;
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr
{
regs[$1] = $3;
}
;
expr: '(' expr ')'
{
$$ = $2;
}
|
expr '*' expr
{
$$ = $1 * $3;
}
|
expr '/' expr
{
$$ = $1 / $3;
}
|
expr '%' expr
{
$$ = $1 % $3;
}
|
expr '+' expr
{
$$ = $1 + $3;
}
|
expr '-' expr
{
$$ = $1 - $3;
}
|
expr '&' expr
{
$$ = $1 & $3;
}
|
expr '|' expr
{
$$ = $1 | $3;
}
|
'-' expr %prec UMINUS
{
$$ = -$2;
}
|
LETTER
{
$$ = regs[$1];
}
|
number
;
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
} |
number DIGIT
{
$$ = base * $1 + $2;
}
;
%%
main()
{
return(yyparse());
}
yyerror(s)
char *s;
{
fprintf(stderr, "%s\n",s);
}
yywrap()
{
return(1);
}
$1, $2, and so on refer to the terms on the right-hand side of a grammar rule. For example in
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr {
regs[$1] = $3;
}
LETTER '=' expr is one of the rules and in the following parentheses $1 refers to LETTER. regs[$1] = $3; will be made into a C statement but in order to do that, yacc needs to know what type $1 has. If you add
%type <a> LETTER
after the first %type declaration (or simply list LETTER after expr) the first error will be taken care of. Same goes for DIGIT and base. Note that there is nothing that refers to the value of stat (naturally) so there is no need for a %type declaration for stat. Thus in
calc.yacc:105.17-18: $1 of `number' has no declared type
calc.yacc:106.20-21: $1 of `number' has no declared type
calc.yacc:110.29-30: $2 of `number' has no declared type
the first line implies that DIGIT has an unknown type, the second line refers to the same problem with number; finally the last line reminds you to declare the type for base. Here is the yacc code it is referring to:
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
} |
number DIGIT
{
$$ = base * $1 + $2;
}
;
Finally, without getting into too many details, the statement
regs[$1]=$3;
will be translated by yacc into something close to:
regs[YS[1].<type of LETTER>]=YS[3].<type of expr>;
where YS is a 'magic array' (actually yacc's stack); YS has the type of the declared %union. Thus you can see that to make this into legal C, yacc needs to know which member of the %union <type of LETTER> refers to. This is what the %type declaration is for.
%{
#include<stdio.h>
int regs[26];
int base;
%}
%union { int a; }
%token DIGIT LETTER
%left '|'
%left '&'
%left '+' '-'
%left '*' '/' '%'
%left UMINUS /*supplies precedence for unary minus */
%type <a> stat expr number DIGIT LETTER
%% /* beginning of rules section */
list: list stat '\n'
|
list error '\n'
{
yyerrok;
}
| /*empty */
;
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr
{
regs[$1] = $3;
}
;
expr: '(' expr ')'
{
$$ = $2;
}
|
expr '*' expr
{
$$ = $1 * $3;
}
|
expr '/' expr
{
$$ = $1 / $3;
}
|
expr '%' expr
{
$$ = $1 % $3;
}
|
expr '+' expr
{
$$ = $1 + $3;
}
|
expr '-' expr
{
$$ = $1 - $3;
}
|
expr '&' expr
{
$$ = $1 & $3;
}
|
expr '|' expr
{
$$ = $1 | $3;
}
|
'-' expr %prec UMINUS
{
$$ = -$2;
}
|
LETTER
{
$$ = regs[$1];
}
|
number
;
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
}
|
number DIGIT
{
$$ = base * $1 + $2;
}
;
%%
main()
{
return(yyparse());
}
yyerror(s)
char *s;
{
fprintf(stderr, "%s\n",s);
}
yywrap()
{
return(1);
}
It is required to use %type directive to specify which members of union is used in which expressions.In order to use union member, a, we should use the aforementioned directive.
See More here %type
if write 1=a in the sample c program, it doesnt detect it as an error. How do i solve this problem? Also how do i do global and local scope of variables. Thanks if anyone can solve it
clexer.lex source code
D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]
E [Ee][+-]?{D}+
FS (f|F|l|L)
IS (u|U|l|L)*
%{
#include <stdio.h>
#include "y.tab.h"
int cnt=1;
int line=1;
char tempid[100];
%}
%%
"/*" {comment();}
"auto" { cnt+=yyleng;ECHO; return(AUTO); }
"break" { cnt+=yyleng;ECHO; return(BREAK); }
"case" { cnt+=yyleng;ECHO; return(CASE); }
"char" { cnt+=yyleng;ECHO; return(CHAR); }
"const" { cnt+=yyleng;ECHO; return(CONST); }
"continue" { cnt+=yyleng;ECHO; return(CONTINUE); }
"default" { cnt+=yyleng;ECHO; return(DEFAULT); }
"do" { cnt+=yyleng;ECHO; return(DO); }
"double" { cnt+=yyleng;ECHO; return(DOUBLE); }
"else" { cnt+=yyleng;ECHO; return(ELSE); }
"enum" { cnt+=yyleng;ECHO; return(ENUM); }
"extern" { cnt+=yyleng;ECHO; return(EXTERN); }
"float" { cnt+=yyleng;ECHO; return(FLOAT); }
"for" { cnt+=yyleng;ECHO; return(FOR); }
"goto" { cnt+=yyleng;ECHO; return(GOTO); }
"if" { cnt+=yyleng;ECHO; return(IF); }
"int" { cnt+=yyleng;ECHO; return(INT); }
"long" { cnt+=yyleng;ECHO; return(LONG); }
"register" { cnt+=yyleng;ECHO; return(REGISTER); }
"return" { cnt+=yyleng;ECHO; return(RETURN); }
"short" { cnt+=yyleng;ECHO; return(SHORT); }
"signed" { cnt+=yyleng;ECHO; return(SIGNED); }
"sizeof" { cnt+=yyleng;ECHO; return(SIZEOF); }
"static" { cnt+=yyleng;ECHO; return(STATIC); }
"struct" { cnt+=yyleng;ECHO; return(STRUCT); }
"switch" { cnt+=yyleng;ECHO; return(SWITCH); }
"typedef" { cnt+=yyleng;ECHO; return(TYPEDEF); }
"union" { cnt+=yyleng;ECHO; return(UNION); }
"unsigned" { cnt+=yyleng;ECHO; return(UNSIGNED); }
"void" { cnt+=yyleng;ECHO; return(VOID); }
"volatile" { cnt+=yyleng;ECHO; return(VOLATILE); }
"while" { cnt+=yyleng;ECHO; return(WHILE); }
(['])+({L}|{D})+([']) { cnt+=yyleng;ECHO; return(SINGLE); }
{L}({L}|{D})* { cnt+=yyleng;ECHO; strcpy(tempid,yytext);return(IDENTIFIER); }
0[xX]{H}+{IS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
0{D}+{IS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+{IS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
L?'(\\.|[^\\'])+' { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+{E}{FS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}*"."{D}+({E})?{FS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
{D}+"."{D}*({E})?{FS}? { cnt+=yyleng;ECHO; return(CONSTANT); }
L?\"(\\.|[^\\"])*\" { cnt+=yyleng;ECHO; return(STRING_LITERAL); }
"..." { cnt+=yyleng;ECHO; return(ELLIPSIS); }
">>=" { cnt+=yyleng;ECHO; return(RIGHT_ASSIGN); }
"<<=" { cnt+=yyleng;ECHO; return(LEFT_ASSIGN); }
"+=" { cnt+=yyleng;ECHO; return(ADD_ASSIGN); }
"-=" { cnt+=yyleng;ECHO; return(SUB_ASSIGN); }
"*=" { cnt+=yyleng;ECHO; return(MUL_ASSIGN); }
"/=" { cnt+=yyleng;ECHO; return(DIV_ASSIGN); }
"%=" { cnt+=yyleng;ECHO; return(MOD_ASSIGN); }
"&=" { cnt+=yyleng;ECHO; return(AND_ASSIGN); }
"^=" { cnt+=yyleng;ECHO; return(XOR_ASSIGN); }
"|=" { cnt+=yyleng;ECHO; return(OR_ASSIGN); }
">>" { cnt+=yyleng;ECHO; return(RIGHT_OP); }
"<<" { cnt+=yyleng;ECHO; return(LEFT_OP); }
"++" { cnt+=yyleng;ECHO; return(INC_OP); }
"--" { cnt+=yyleng;ECHO; return(DEC_OP); }
"->" { cnt+=yyleng;ECHO; return(PTR_OP); }
"&&" { cnt+=yyleng;ECHO; return(AND_OP); }
"||" { cnt+=yyleng;ECHO; return(OR_OP); }
"<=" { cnt+=yyleng;ECHO; return(LE_OP); }
">=" { cnt+=yyleng;ECHO; return(GE_OP); }
"==" { cnt+=yyleng;ECHO; return(EQ_OP); }
"!=" { cnt+=yyleng;ECHO; return(NE_OP); }
";" { cnt+=yyleng;ECHO; return(';'); }
("{"|"<%") { cnt+=yyleng;ECHO; return('{'); }
("}"|"%>") { cnt+=yyleng;ECHO; return('}'); }
"," { cnt+=yyleng;ECHO; return(','); }
":" { cnt+=yyleng;ECHO; return(':'); }
"=" { cnt+=yyleng;ECHO; return('='); }
"(" { cnt+=yyleng;ECHO; return('('); }
")" { cnt+=yyleng;ECHO; return(')'); }
("["|"<:") { cnt+=yyleng;ECHO; return('['); }
("]"|":>") { cnt+=yyleng;ECHO; return(']'); }
"." { cnt+=yyleng;ECHO; return('.'); }
"&" { cnt+=yyleng;ECHO; return('&'); }
"!" { cnt+=yyleng;ECHO; return('!'); }
"~" { cnt+=yyleng;ECHO; return('~'); }
"-" { cnt+=yyleng;ECHO; return('-'); }
"+" { cnt+=yyleng;ECHO; return('+'); }
"*" { cnt+=yyleng;ECHO; return('*'); }
"/" { cnt+=yyleng;ECHO; return('/'); }
"%" { cnt+=yyleng;ECHO; return('%'); }
"<" { cnt+=yyleng;ECHO; return('<'); }
">" { cnt+=yyleng;ECHO; return('>'); }
"^" { cnt+=yyleng;ECHO; return('^'); }
"|" { cnt+=yyleng;ECHO; return('|'); }
"?" { cnt+=yyleng;ECHO; return('?'); }
[ ] {cnt+=yyleng;ECHO;}
[\t\v\f] { cnt+=yyleng; }
[\n] {line++;cnt=1;}
. { /* ignore bad characters */ }
%%
yywrap()
{
return(1);
}
comment()
{
char c, c1;
loop:
while ((c = input()) != '*' && c != 0)
{
if(c=='\n') {line++;cnt=1;}
else {cnt++;}
}
//putchar(c); PUTCHAR only if comments need to be shown!
if ((c1 = input()) != '/' && c1 != 0)
{
unput(c1);
goto loop;
}
}
cparser.yacc source code
%{
#include <stdio.h>
#include <string.h>
#include "symbol_table.h"
extern FILE *yyin;
extern FILE *yyout;
extern int column;
extern int line;
extern int cnt;
extern char *yytext,tempid[100];
int temp,err,err1=0;
install()
{
symrec *s;
s = getsym (tempid);
if (s == 0)
s = putsym (tempid,temp);
else
{
printf(" VOID=1 ");
printf(" CHAR=2 ");
printf(" INT=3 ");
printf(" FLOAT=4 ");
printf(" DOUBLE=4 ");
printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is already defined as %d\n\n",line,cnt,s->name,s->type );
exit(0);
}
err1=1;
}
int context_check()
{
symrec *s;
s = getsym(tempid);
if (s == 0 )
{printf( "\n\nThere is a Semantic error at Pos : %d : %d : %s is an undeclared identifier\n\n",line,cnt,tempid);exit(0);return 0;}
else
return(s->type);
err1=1;
}
type_err(int t1,int t2)
{
if(t1&&t2)
{
printf(" VOID=1 ");
printf(" CHAR=2 ");
printf(" INT=3 ");
printf(" FLOAT=4 ");
printf(" DOUBLE=4 ");
printf( "\n\nThere is a Semantic error at Pos : %d : %d : Type mismatch for %s between %d and %d \n\n",line,cnt,tempid,t1,t2);
err1=1;
exit(0);
}
}
%}
%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME SINGLE
%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS
%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%nonassoc LOWER_THAN_ELSE
%nonassoc ELSE
%start translation_unit
%%
primary_expression
: IDENTIFIER {$$=context_check();}
| CONSTANT
| STRING_LITERAL
| '(' expression ')' {$$= $2;}
;
postfix_expression
: primary_expression {$$=$1;}
| postfix_expression '[' expression ']'
| postfix_expression '(' ')'
| postfix_expression '(' argument_expression_list ')'
| postfix_expression '.' IDENTIFIER
| postfix_expression PTR_OP IDENTIFIER
| postfix_expression INC_OP
| postfix_expression DEC_OP
;
argument_expression_list
: assignment_expression
| argument_expression_list ',' assignment_expression
;
unary_expression
: postfix_expression {$$=$1;}
| INC_OP unary_expression
| DEC_OP unary_expression
| unary_operator cast_expression
| SIZEOF unary_expression
| SIZEOF '(' type_name ')'
;
unary_operator
: '&'
| '*'
| '+'
| '-'
| '~'
| '!'
;
cast_expression
: unary_expression {$$=$1;}
| '(' type_name ')' cast_expression
;
multiplicative_expression
: cast_expression {$$=$1;}
| multiplicative_expression '*' cast_expression
| multiplicative_expression '/' cast_expression
| multiplicative_expression '%' cast_expression
;
additive_expression
: multiplicative_expression {$$=$1;}
| additive_expression '+' multiplicative_expression
| additive_expression '-' multiplicative_expression
;
shift_expression
: additive_expression {$$=$1;}
| shift_expression LEFT_OP additive_expression
| shift_expression RIGHT_OP additive_expression
;
relational_expression
: shift_expression {$$=$1;}
| relational_expression '<' shift_expression
| relational_expression '>' shift_expression
| relational_expression LE_OP shift_expression
| relational_expression GE_OP shift_expression
;
equality_expression
: relational_expression {$$=$1;}
| equality_expression EQ_OP relational_expression
| equality_expression NE_OP relational_expression
;
and_expression
: equality_expression {$$=$1;}
| and_expression '&' equality_expression
;
exclusive_or_expression
: and_expression {$$=$1;}
| exclusive_or_expression '^' and_expression
;
inclusive_or_expression
: exclusive_or_expression {$$=$1;}
| inclusive_or_expression '|' exclusive_or_expression
;
logical_and_expression
: inclusive_or_expression {$$=$1;}
| logical_and_expression AND_OP inclusive_or_expression
;
logical_or_expression
: logical_and_expression {$$=$1;}
| logical_or_expression OR_OP logical_and_expression
;
conditional_expression
: logical_or_expression {$$=$1;}
| logical_or_expression '?' expression ':' conditional_expression
;
assignment_expression
: conditional_expression {$$=$1;}
| unary_expression assignment_operator assignment_expression {if($1!=$3){type_err($1,$3);}}
;
assignment_operator
: '='
| MUL_ASSIGN
| DIV_ASSIGN
| MOD_ASSIGN
| ADD_ASSIGN
| SUB_ASSIGN
| LEFT_ASSIGN
| RIGHT_ASSIGN
| AND_ASSIGN
| XOR_ASSIGN
| OR_ASSIGN
;
expression
: assignment_expression {$$=$1;}
| expression ',' assignment_expression
;
constant_expression
: conditional_expression
;
declaration
: declaration_specifiers ';'
| declaration_specifiers init_declarator_list ';'
;
declaration_specifiers
: storage_class_specifier
| storage_class_specifier declaration_specifiers
| type_specifier
| type_specifier declaration_specifiers
| type_qualifier
| type_qualifier declaration_specifiers
;
init_declarator_list
: init_declarator
| init_declarator_list ',' init_declarator
;
init_declarator
: declarator
| declarator '=' initializer
;
storage_class_specifier
: TYPEDEF
| EXTERN
| STATIC
| AUTO
| REGISTER
;
type_specifier
: VOID {temp=1;}
| CHAR {temp=2;}
| SHORT {temp=3;}
| INT {temp=3;}
| LONG {temp=3;}
| FLOAT {temp=4;}
| DOUBLE {temp=4;}
| SIGNED
| UNSIGNED
| struct_or_union_specifier
| enum_specifier
| TYPE_NAME
;
struct_or_union_specifier
: struct_or_union IDENTIFIER '{' struct_declaration_list '}' {install();}
| struct_or_union '{' struct_declaration_list '}'
| struct_or_union IDENTIFIER {install();}
;
struct_or_union
: STRUCT
| UNION
;
struct_declaration_list
: struct_declaration
| struct_declaration_list struct_declaration
;
struct_declaration
: specifier_qualifier_list struct_declarator_list ';'
;
specifier_qualifier_list
: type_specifier specifier_qualifier_list
| type_specifier
| type_qualifier specifier_qualifier_list
| type_qualifier
;
struct_declarator_list
: struct_declarator
| struct_declarator_list ',' struct_declarator
;
struct_declarator
: declarator
| ':' constant_expression
| declarator ':' constant_expression
;
enum_specifier
: ENUM '{' enumerator_list '}'
| ENUM IDENTIFIER '{' enumerator_list '}'
| ENUM IDENTIFIER
;
enumerator_list
: enumerator
| enumerator_list ',' enumerator
;
enumerator
: IDENTIFIER {context_check();}
| IDENTIFIER '=' constant_expression //{context_check();}
;
type_qualifier
: CONST
| VOLATILE
;
declarator
: pointer direct_declarator
| direct_declarator
;
direct_declarator
: IDENTIFIER {install();}
| '(' declarator ')'
| direct_declarator '[' constant_expression ']'
| direct_declarator '[' ']'
| direct_declarator '(' parameter_type_list ')'
| direct_declarator '(' identifier_list ')'
| direct_declarator '(' ')'
;
pointer
: '*'
| '*' type_qualifier_list
| '*' pointer
| '*' type_qualifier_list pointer
;
type_qualifier_list
: type_qualifier
| type_qualifier_list type_qualifier
;
parameter_type_list
: parameter_list
| parameter_list ',' ELLIPSIS
;
parameter_list
: parameter_declaration
| parameter_list ',' parameter_declaration
;
parameter_declaration
: declaration_specifiers declarator
| declaration_specifiers abstract_declarator
| declaration_specifiers
;
identifier_list
: IDENTIFIER {install();}
| identifier_list ',' IDENTIFIER {install();}
;
type_name
: specifier_qualifier_list
| specifier_qualifier_list abstract_declarator
;
abstract_declarator
: pointer
| direct_abstract_declarator
| pointer direct_abstract_declarator
;
direct_abstract_declarator
: '(' abstract_declarator ')'
| '[' ']'
| '[' constant_expression ']'
| direct_abstract_declarator '[' ']'
| direct_abstract_declarator '[' constant_expression ']'
| '(' ')'
| '(' parameter_type_list ')'
| direct_abstract_declarator '(' ')'
| direct_abstract_declarator '(' parameter_type_list ')'
;
initializer
: assignment_expression {$$=$1;}
| '{' initializer_list '}'
| '{' initializer_list ',' '}'
;
initializer_list
: initializer
| initializer_list ',' initializer
;
statement
: labeled_statement
| compound_statement
| expression_statement
| selection_statement
| iteration_statement
| jump_statement
;
labeled_statement
: IDENTIFIER ':' statement //{context_check();}
| CASE constant_expression ':' statement
| DEFAULT ':' statement
;
compound_statement
: '{' '}'
| '{' statement_list '}'
| '{' declaration_list '}'
| '{' declaration_list statement_list '}'
;
declaration_list
: declaration
| declaration_list declaration
;
statement_list
: statement
| statement_list statement
;
expression_statement
: ';'
| expression ';'
;
selection_statement
: IF '(' expression ')' statement %prec LOWER_THAN_ELSE ;
| IF '(' expression ')' statement ELSE statement
| SWITCH '(' expression ')' statement
;
iteration_statement
: WHILE '(' expression ')' statement
| DO statement WHILE '(' expression ')' ';'
| FOR '(' expression_statement expression_statement ')' statement
| FOR '(' expression_statement expression_statement expression ')' statement
;
jump_statement
: GOTO IDENTIFIER ';' //{context_check();}
| CONTINUE ';'
| BREAK ';'
| RETURN ';'
| RETURN expression ';'
;
translation_unit
: external_declaration
| translation_unit external_declaration
;
external_declaration
: function_definition
| declaration
;
function_definition
: declaration_specifiers declarator declaration_list compound_statement
| declaration_specifiers declarator compound_statement
| declarator declaration_list compound_statement
| declarator compound_statement
;
%%
yyerror(s)
char *s;
{
fflush(stdout);err=1;
printf("Syntax error at Pos : %d : %d\n",line,cnt);
exit(0);
//printf("\n%*s\n%*s\n", column, "^", column, s);
}
main(argc,argv)
int argc;
char **argv;
{
char *fname;
++argv,--argc;/*skip program name*/
if(argc>0)
{
yyin=fopen(argv[0],"r");
fname=argv[0];
strcat(fname,"_output");
yyout=fopen(fname,"w");
}
else
{
printf("Please give the c filename as an argument.\n");
}
yyparse();
if(err==0)
printf("No Syntax errors found!\n");
fname=argv[0];strcat(fname,"_symbol-table");
FILE *sym_tab=fopen(fname,"w");
fprintf(sym_tab,"Type\tSymbol\n");
symrec *ptr;
for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
{
fprintf(sym_tab,"%d\t%s\n",ptr->type,ptr->name);
}
fclose(sym_tab);
}
Symbol table.h source code
#define t_void 1
#define t_char 2
#define t_int 3
#define t_float 4
struct symrec
{
char *name;
int type;
struct symrec *next;
};
typedef struct symrec symrec;
symrec *sym_table = (symrec *)0;
symrec *putsym();
symrec *getsym();
symrec *putsym(char *sym_name,int sym_type)
{
symrec *ptr;
ptr=(symrec *)malloc(sizeof(symrec));
ptr->name=(char *)malloc(strlen(sym_name)+1);
strcpy(ptr->name,sym_name);
ptr->type=sym_type;
ptr->next=(struct symrec *)sym_table;
sym_table=ptr;
return ptr;
}
symrec *getsym(char *sym_name)
{
symrec *ptr;
for(ptr=sym_table;ptr!=(symrec *)0;ptr=(symrec *)ptr->next)
if(strcmp(ptr->name,sym_name)==0)
return ptr;
return 0;
}
In general terms, when you have an assignment operation, you need to check the left operand to make sure its an lvalue and issue an error if its not. This is most commonly done as part of typechecking -- you keep attributes about values (eg, is it an lvalue or not) along with the type, and check that those attributes are correct for each use of a value.
So what you might do is use %union to define a parser value object that can hold this info:
%union {
struct {
Type *type;
int is_lvalue;
} valinfo;
}
%type<valinfo> assignment_expression unary_expression
Then, your rule for assignments would check this along with the type:
assignment_expression:
unary_expression assignment_operator assignment_expression {
if (!$1.is_lvalue)
error("assigning to non-lvalue");
if ($1.type != $3.type && !type_is_implicitly_convertable($3.type, $1.type))
error("type mismatch in assignment");
$$.type = $1. type;
$$.is_lvalue = 0; }
Note that you need to make sure to set $$ properly in EVERY rule action that might have its value used by some other rule action; your code fails to do this, so likely won't do anything useful as is.
I made a program that is supposed to recognize a simple grammar. When I input what I think is supposed to be a valid statement, I get an error. Specifically, if I type
int a;
int b;
it doesn't work. After I type int a; the program echoes ; for some reason. Then when I type int b; I get syntax error.
The lex file:
%{
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "y.tab.h"
%}
else ELSE
if IF
int INT|int
return RETURN
void VOID
while WHILE
id [a-zA-Z]*
num [0-9]*
lte <=
gte >=
equal ==
notequal !=
%%
{else} { return ELSE; }
{if} { return IF; }
{int} { return INT; }
{return} { return RETURN; }
{void} { return VOID; }
{while} { return WHILE; }
{id} { return ID; }
{num} { return NUM; }
{lte} { return LTE; }
{gte} { return GTE; }
{equal} { return EQUAL; }
{notequal} { return NOTEQUAL; }
%%
The yacc file:
/* C-Minus BNF Grammar */
%token ELSE
%token IF
%token INT
%token RETURN
%token VOID
%token WHILE
%token ID
%token NUM
%token LTE
%token GTE
%token EQUAL
%token NOTEQUAL
%%
program : declaration_list ;
declaration_list : declaration_list declaration | declaration ;
declaration : var_declaration | fun_declaration ;
var_declaration : type_specifier ID ';'
| type_specifier ID '[' NUM ']' ';' ;
type_specifier : INT | VOID ;
fun_declaration : type_specifier ID '(' params ')' compound_stmt ;
params : param_list | VOID ;
param_list : param_list ',' param
| param ;
param : type_specifier ID | type_specifier ID '[' ']' ;
compound_stmt : '{' local_declarations statement_list '}' ;
local_declarations : local_declarations var_declaration
| /* empty */ ;
statement_list : statement_list statement
| /* empty */ ;
statement : expression_stmt
| compound_stmt
| selection_stmt
| iteration_stmt
| return_stmt ;
expression_stmt : expression ';'
| ';' ;
selection_stmt : IF '(' expression ')' statement
| IF '(' expression ')' statement ELSE statement ;
iteration_stmt : WHILE '(' expression ')' statement ;
return_stmt : RETURN ';' | RETURN expression ';' ;
expression : var '=' expression | simple_expression ;
var : ID | ID '[' expression ']' ;
simple_expression : additive_expression relop additive_expression
| additive_expression ;
relop : LTE | '<' | '>' | GTE | EQUAL | NOTEQUAL ;
additive_expression : additive_expression addop term | term ;
addop : '+' | '-' ;
term : term mulop factor | factor ;
mulop : '*' | '/' ;
factor : '(' expression ')' | var | call | NUM ;
call : ID '(' args ')' ;
args : arg_list | /* empty */ ;
arg_list : arg_list ',' expression | expression ;
Ok...you need to add a semi-colon as a token as well in your language spec...as a fyi, do a google search on this ...there are a few lex/yacc files for C programming language as well...and there are plenty of tutorials on this...flex/bison are not exactly forgiving on program spec errors...you really need to understand the elements of how it works...Look for Jack Crenshaw's famous tutorial on how to build a compiler.
Lex:
id [a-zA-Z]*
num [0-9]*
both cases can meet empty strings, use '+' instead
When I run yacc -d parser.y on the following file I get the following errors:
parser.y:23.3-24.4: warning: unused value: $4
15 rules never reduced
parser.y: warning: 7 useless nonterminals and 15 useless rules
parser.y:16.1-14: fatal error: start symbol statement_list does not derive any sentence
make: *** [y.tab.c] Error 1
I'm particularly concerned about how to get rid of the fatal error.
%{
#include "parser.h"
#include <string.h>
%}
%union {
double dval;
struct symtab *symp;
}
%token <symp> NAME
%token <dval> NUMBER
%type <dval> expression
%type <dval> term
%type <dval> factor
%%
statement_list: statement '\n'
| statement_list statement '\n'
;
statement: NAME '=' expression { $1->value = $3; }
| expression { printf("= %g\n", $1); }
;
expression: expression '+' term { $$ = $1 + $3; }
| expression '-' term { $$ = $1 - $3; }
term
;
term: term '*' factor { $$ = $1 * $3; }
| term '/' factor { if($3 == 0.0)
yyerror("divide by zero");
else
$$ = $1 / $3;
}
| factor
;
factor: '(' expression ')' { $$ = $2; }
| '-' factor { $$ = -$2; }
| NUMBER
| NAME { $$ = $1->value; }
;
%%
/* look up a symbol table entry, add if not present */
struct symtab *symlook(char *s) {
char *p;
struct symtab *sp;
for(sp = symtab; sp < &symtab[NSYMS]; sp++) {
/* is it already here? */
if(sp->name && !strcmp(sp->name, s))
return sp;
if(!sp->name) { /* is it free */
sp->name = strdup(s);
return sp;
}
/* otherwise continue to next */
}
yyerror("Too many symbols");
exit(1); /* cannot continue */
} /* symlook */
yyerror(char *s)
{
printf( "yyerror: %s\n", s);
}
All those warnings and errors are caused by the missing | before term in your expression rule. The hint is the unused $4 in a snippet that's plainly should only have 3 arguments. That problem cascades into all the others.
Change:
expression: expression '+' term { $$ = $1 + $3; }
| expression '-' term { $$ = $1 - $3; }
term
;
into:
expression: expression '+' term { $$ = $1 + $3; }
| expression '-' term { $$ = $1 - $3; }
| term
;
and try again.
you forget the or | here
expression: expression '+' term { $$ = $1 + $3; }
| expression '-' term { $$ = $1 - $3; }
term
;
the last rule should be |term {};