Compiling Yacc code - yacc

Below is my yacc code to parse C source code. I am a little new to this and this is an already existing code.
{
%{
#include <stdio.h>
#include <string.h>
#include "Expression.c"
%}
%token Identifier
%token Number
%token '=' '+' '-' '*' '/' ',' ';' '(' ')' '{' '}' '[' ']' '<' '>'
%token INT
%token CHAR
%token FLOAT
%token LONG
%token DOUBLE
%token RETURN
%token IF
%token ELSE
%token EQ /* == */
%token BADTOKEN
%%
program
: function
{ $$ = $1; }
| program function
{ $$ = binaryNode("",$1,$2);}
| error '}'
function:
typename Identifier '(' formal.arguments ')' function.body
{ $$ = attachAllChildren($2,$1,$4,$6); }
typename
: INT
{ $$ = leafNode("INT");}
| CHAR
{ $$ = leafNode("CHAR"); }
| DOUBLE
{ $$ = leafNode("DOUBLE"); }
| LONG
{ $$ = leafNode("LONG"); }
| FLOAT
{ $$ = leafNode("FLOAT"); }
formal.arguments
: /* empty */
{ $$ = NULL; }
| formal.argument.list
{ $$ = $1; }
formal.argument.list
: formal.argument
{ $$ = $1; }
| formal.argument.list ',' formal.argument
{ $$ = binaryNode(",", $1, $3); }
formal.argument
: typename Identifier
{ $$ = attachChild($2, $1); }
function.body
: '{' '}'
{ $$ = NULL; }
| '{' statements '}'
{ $$ = $2; }
statements
: statement
{ $$ = $1; }
| statements statement
{ $$ = attachChild($1,$2);}
statement
: declaration
{ $$ = $1; }
| RETURN expression ';' /* return statement */
{ $$ = unaryNode("RETURN", $2); }
| if.statement
{ $$ =$1; }
| term '=' expression ';' /* assignment */
{ $$ = binaryNode("=", $1, $3); }
| expression ';'
{ $$ = $1; }
| '{' statements '}'
{ $$ = $2; }
| ';' /* null statement */
{ $$ = NULL; }
declaration
: typename Identifier ';'
{ $$ = attachChild($2,$1); }
| typename Identifier '[' Number ']' ';' /* array */
{ $$ = attachSiblings($2, $1, $4); }
if.statement
: IF '(' expression ')' statement
{ $$ = ternaryNode("IF",$3,$5, NULL); }
| IF '(' expression ')' statement ELSE statement
{ $$ = ternaryNode("IF", $3, $5, $7); }
expression
: additive.expression
{ $$ = $1; }
| expression EQ additive.expression
{ $$ = binaryNode("=",$1, $3); }
| expression '>' additive.expression
{ $$ = binaryNode(">", $1, $3); }
| expression '<' additive.expression
{ $$ = binaryNode("<", $1, $3); }
additive.expression
: term
{ $$ = $1; }
| additive.expression '+' term
{ $$ = binaryNode("+", $1, $3);}
| additive.expression '-' term
{ $$ = binaryNode("-", $1, $3);}
term
: Identifier
{ $$ = leafNode($1);}
| Number
{ $$ = leafNode($1);}
| Identifier '(' opt.actual.arguments ')' /* function call */
{ $$ = attachChild($1,$3);}
| Identifier '[' expression ']' /* array access */
{ $$ = attachChild($1,$3); }
| '(' expression ')'
{ $$ = $2;}
opt.actual.arguments
: /* empty */
{ $$ = NULL;}
| actual.arguments
{ $$=$1; }
actual.arguments
: expression
{ $$ = $1; }
| actual.arguments ',' expression
{ $$ = binaryNode(",",$1, $3); }
%%
yyerror(msg)
char* msg;
{
#if !defined(YYBISON)
extern int yynerrs;
++yynerrs;
#endif
fprintf(stderr, "Error: %s\n",msg);
}
main()
{
extern int yynerrs;
yyparse();
fprintf(stderr, "%d errors.\n", yynerrs);
return 0;
}
}
On compiling the above code , I get a warning that there is 1 shift/reduce conflict in the code. How can I resolve this?

Use the -v option to yacc, which will produce a y.output file telling you where the conflict is and how its is triggered. Note that a conflict is NOT an error -- you still get a valid parser from yacc -- but that parser may not recognize exactly the language defined by your grammar.
In your case, you get something like:
State 81 conflicts: 1 shift/reduce
:
State 81
28 if.statement: IF '(' expression ')' statement .
29 | IF '(' expression ')' statement . ELSE statement
ELSE shift, and go to state 83
ELSE [reduce using rule 28 (if.statement)]
$default reduce using rule 28 (if.statement)
which tells you that you have the classic dangling else ambiguity, so you can probably just ignore the conflict, as the generated parser will resolve the ambiguity by binding the else to the closest if, which is probably what you want.

Related

Using yacc to output boolean expressions

I am trying to learn this language for a college class and our teacher gave us a prompt to try. Basically we are to take a boolean expression and output if that expression is true or false. The input will be in the format of:
true and (false or true) or false.
I have talked with my professor about many solutions and he is wanting the class to make tokens for AND OR NOT TRUE FALSE. He also wants us to use the logical operators in the yacc file instead of the tokens, IE ||, &&, !.
test.l
%{
#include "y.tab.h"
%}
AND [Aa][Nn][Dd]
OR [Oo][Rr]
NOT [Nn][Oo][Tt]
op '&' | '|' | "!"
%%
[a-zA-Z] {return ALPHA;}
[\t]+ ;
[\n] {return '\n';}
{AND} { return (AND); }
{OR} { return (OR); }
{NOT} { return (NOT); }
[Tt][Rr][Uu][Ee] { yylval = 1;
return (boolean); }
[Ff][Aa][Ll][Ss][Ee] { yylval = 0;
return (boolean); }
. {();}
%%
test.y
%{
#include<stdio.h>
#include<stdlib.h>
int yylex();
%}
%token ALPHA AND OR NOT TRUE FALSE boolean
%left "&" "|"
%right '!'
%%
program: bexpr '\n' {if ($1 >= 1)
{
printf("TRUE\n");
exit(0);
}
else{
printf("FALSE\n");
exit(0);
}
|
;
bexpr: bexpr "|""|" bterm { $$ = $1 || $3; }
| bterm { $$ = $1; }
;
bterm: bterm "&""&" bfactor { $$ = $1 && $3; }
| bfactor { $$ = $1; }
;
bfactor: '!' bfactor { $$ = ! $2; }
| '(' bexpr ')' { $$ = $2; }
| TRUE { $$ = $1; }
| FALSE {$$ = $1; }
| boolean { $$ = $1; }
;
%%
int main()
{
printf("Enter your truth statement\n");
yyparse();
return 0;
}
If i were to put in true and false, would expect false. However, I get syntax error. If I only put in true, the output is correct, same for false. Basically if I put anything other than one term, the program throws an error.

y.tab.c: undefined reference to yylex

I am trying to run an example I found online of a calculator. But I have this error showing every time I run my gcc command. Here are the commands that I run:
flex -l calc3.l
yacc -vd calc3.y
gcc y.tab.c -lm -ll
-> at this point I got this error message:
/tmp/ccPOq58f.o : In function 'yyparse':
y.tab.c: undefined reference to 'yylex'
collect2: error: ld returned 1 exit status
Here is my code:
calc3.l
%{
#include <stdlib.h>
#include "calc3.h"
#include "y.tab.h"
void yyerror(char *);
%}
%%
[a-z] {
yylval.sIndex = *yytext - 'a';
return VARIABLE;
}
0 {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[1-9][0-9]* {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[-()<>=+*/;{}.] {
return *yytext;
}
">=" return GE;
"<=" return LE;
"==" return EQ;
"!=" return NE;
"while" return WHILE;
"if" return IF;
"else" return ELSE;
"print" return PRINT;
[ \t\n]+ ; /* ignore whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
here is calc3.h
typedef enum { typeCon, typeId, typeOpr } nodeEnum;
/* constants */
typedef struct {
int value; /* value of constant */
} conNodeType;
/* identifiers */
typedef struct {
int i; /* subscript to sym array */
} idNodeType;
/* operators */
typedef struct {
int oper; /* operator */
int nops; /* number of operands */
struct nodeTypeTag **op; /* operands */
} oprNodeType;
typedef struct nodeTypeTag {
nodeEnum type; /* type of node */
union {
conNodeType con; /* constants */
idNodeType id; /* identifiers */
oprNodeType opr; /* operators */
};
} nodeType;
extern int sym[26];
and here is calc3.y
%{
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "calc3.h"
/* prototypes */
nodeType *opr(int oper, int nops, ...);
nodeType *id(int i);
nodeType *con(int value);
void freeNode(nodeType *p);
int ex(nodeType *p);
int yylex(void);
void yyerror(char *s);
int sym[26]; /* symbol table */
%}
%union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
};
%token <iValue> INTEGER
%token <sIndex> VARIABLE
%token WHILE IF PRINT
%nonassoc IFX
%nonassoc ELSE
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
%type <nPtr> stmt expr stmt_list
%%
program:
function { exit(0); }
;
function:
function stmt { ex($2); freeNode($2); }
| /* NULL */
;
stmt:
';' { $$ = opr(';', 2, NULL, NULL); }
| expr ';' { $$ = $1; }
| PRINT expr ';' { $$ = opr(PRINT, 1, $2); }
| VARIABLE '=' expr ';' { $$ = opr('=', 2, id($1), $3); }
| WHILE '(' expr ')' stmt { $$ = opr(WHILE, 2, $3, $5); }
| IF '(' expr ')' stmt %prec IFX { $$ = opr(IF, 2, $3, $5); }
| IF '(' expr ')' stmt ELSE stmt { $$ = opr(IF, 3, $3, $5, $7); }
| '{' stmt_list '}' { $$ = $2; }
;
stmt_list:
stmt { $$ = $1; }
| stmt_list stmt { $$ = opr(';', 2, $1, $2); }
;
expr:
INTEGER { $$ = con($1); }
| VARIABLE { $$ = id($1); }
| '-' expr %prec UMINUS { $$ = opr(UMINUS, 1, $2); }
| expr '+' expr { $$ = opr('+', 2, $1, $3); }
| expr '-' expr { $$ = opr('-', 2, $1, $3); }
| expr '*' expr { $$ = opr('*', 2, $1, $3); }
| expr '/' expr { $$ = opr('/', 2, $1, $3); }
| expr '<' expr { $$ = opr('<', 2, $1, $3); }
| expr '>' expr { $$ = opr('>', 2, $1, $3); }
| expr GE expr { $$ = opr(GE, 2, $1, $3); }
| expr LE expr { $$ = opr(LE, 2, $1, $3); }
| expr NE expr { $$ = opr(NE, 2, $1, $3); }
| expr EQ expr { $$ = opr(EQ, 2, $1, $3); }
| '(' expr ')' { $$ = $2; }
;
%%
nodeType *con(int value) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(nodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeCon;
p->con.value = value;
return p;
}
nodeType *id(int i) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(nodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeId;
p->id.i = i;
return p;
}
nodeType *opr(int oper, int nops, ...) {
va_list ap;
nodeType *p;
int i;
/* allocate node */
if ((p = malloc(sizeof(nodeType))) == NULL)
yyerror("out of memory");
if ((p->opr.op = malloc(nops * sizeof(nodeType *))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeOpr;
p->opr.oper = oper;
p->opr.nops = nops;
va_start(ap, nops);
for (i = 0; i < nops; i++)
p->opr.op[i] = va_arg(ap, nodeType*);
va_end(ap);
return p;
}
void freeNode(nodeType *p) {
int i;
if (!p) return;
if (p->type == typeOpr) {
for (i = 0; i < p->opr.nops; i++)
freeNode(p->opr.op[i]);
free (p->opr.op);
}
free (p);
}
void yyerror(char *s) {
fprintf(stdout, "%s\n", s);
}
int main(void) {
yyparse();
return 0;
}
If you just use
flex calc3.l
then flex produces a scanner called lex.yy.c. (I removed the -l option which was used in the original question. -l causes flex to be more compatible with certain aspects of the original lex utility, and it has no use except for compiling ancient lex scanners.)
Similarly, if you just use
yacc -vd calc3.y
the bison will produce files called y.tab.c and y.tab.h. And
gcc y.tab.c -lm -ll
will produce a file called a.out.
None of that is a good idea. It's far better to give the files meaningful names, based on the input filenames. All three of these tools understand a -o command-line flag which specifies the output name file.
So you could do this:
flex calc3.l
yacc -vd calc3.y
gcc lex.yy.c y.tab.c -lm -ll
But I'd recommend something like this:
flex -o calc3.lex.c calc3.l
bison -o calc3.tab.c -vd calc3.y
gcc -o calc3 calc3.lex.c calc3.tab.c -lm -ll
When you do this, you'll need to change the #include "y.tab.h" to #include "calc3.tab.h". (Note that if you invoke bison as bison rather than as yacc, it will automatically produce output files with names based on the grammar file. But it doesn't hurt to be explicit.)
Even better if you put it in a Makefile, or at least a script file.

$1 of [...] has no declared type

I am unfamiliar with Yacc and trying to get an example I found here to work. When I try to compile with yacc -d calc.yacc, I get the following errors.
calc.yacc:42.17-18: $1 of `stat' has no declared type
calc.yacc:96.22-23: $1 of `expr' has no declared type
calc.yacc:105.17-18: $1 of `number' has no declared type
calc.yacc:106.20-21: $1 of `number' has no declared type
calc.yacc:110.29-30: $2 of `number' has no declared type
I tried googling and from what I can tell, the solution has to do with %type, but I'm not sure what to add.
The code is below:
%{
#include <stdio.h>
int regs[26];
int base;
%}
%start list
%union { int a; }
%type <a> expr number
%token DIGIT LETTER
%left '|'
%left '&'
%left '+' '-'
%left '*' '/' '%'
%left UMINUS /*supplies precedence for unary minus */
%% /* beginning of rules section */
list: /*empty */
|
list stat '\n'
|
list error '\n'
{
yyerrok;
}
;
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr
{
regs[$1] = $3;
}
;
expr: '(' expr ')'
{
$$ = $2;
}
|
expr '*' expr
{
$$ = $1 * $3;
}
|
expr '/' expr
{
$$ = $1 / $3;
}
|
expr '%' expr
{
$$ = $1 % $3;
}
|
expr '+' expr
{
$$ = $1 + $3;
}
|
expr '-' expr
{
$$ = $1 - $3;
}
|
expr '&' expr
{
$$ = $1 & $3;
}
|
expr '|' expr
{
$$ = $1 | $3;
}
|
'-' expr %prec UMINUS
{
$$ = -$2;
}
|
LETTER
{
$$ = regs[$1];
}
|
number
;
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
} |
number DIGIT
{
$$ = base * $1 + $2;
}
;
%%
main()
{
return(yyparse());
}
yyerror(s)
char *s;
{
fprintf(stderr, "%s\n",s);
}
yywrap()
{
return(1);
}
$1, $2, and so on refer to the terms on the right-hand side of a grammar rule. For example in
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr {
regs[$1] = $3;
}
LETTER '=' expr is one of the rules and in the following parentheses $1 refers to LETTER. regs[$1] = $3; will be made into a C statement but in order to do that, yacc needs to know what type $1 has. If you add
%type <a> LETTER
after the first %type declaration (or simply list LETTER after expr) the first error will be taken care of. Same goes for DIGIT and base. Note that there is nothing that refers to the value of stat (naturally) so there is no need for a %type declaration for stat. Thus in
calc.yacc:105.17-18: $1 of `number' has no declared type
calc.yacc:106.20-21: $1 of `number' has no declared type
calc.yacc:110.29-30: $2 of `number' has no declared type
the first line implies that DIGIT has an unknown type, the second line refers to the same problem with number; finally the last line reminds you to declare the type for base. Here is the yacc code it is referring to:
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
} |
number DIGIT
{
$$ = base * $1 + $2;
}
;
Finally, without getting into too many details, the statement
regs[$1]=$3;
will be translated by yacc into something close to:
regs[YS[1].<type of LETTER>]=YS[3].<type of expr>;
where YS is a 'magic array' (actually yacc's stack); YS has the type of the declared %union. Thus you can see that to make this into legal C, yacc needs to know which member of the %union <type of LETTER> refers to. This is what the %type declaration is for.
%{
#include<stdio.h>
int regs[26];
int base;
%}
%union { int a; }
%token DIGIT LETTER
%left '|'
%left '&'
%left '+' '-'
%left '*' '/' '%'
%left UMINUS /*supplies precedence for unary minus */
%type <a> stat expr number DIGIT LETTER
%% /* beginning of rules section */
list: list stat '\n'
|
list error '\n'
{
yyerrok;
}
| /*empty */
;
stat: expr
{
printf("%d\n",$1);
}
|
LETTER '=' expr
{
regs[$1] = $3;
}
;
expr: '(' expr ')'
{
$$ = $2;
}
|
expr '*' expr
{
$$ = $1 * $3;
}
|
expr '/' expr
{
$$ = $1 / $3;
}
|
expr '%' expr
{
$$ = $1 % $3;
}
|
expr '+' expr
{
$$ = $1 + $3;
}
|
expr '-' expr
{
$$ = $1 - $3;
}
|
expr '&' expr
{
$$ = $1 & $3;
}
|
expr '|' expr
{
$$ = $1 | $3;
}
|
'-' expr %prec UMINUS
{
$$ = -$2;
}
|
LETTER
{
$$ = regs[$1];
}
|
number
;
number: DIGIT
{
$$ = $1;
base = ($1==0) ? 8 : 10;
}
|
number DIGIT
{
$$ = base * $1 + $2;
}
;
%%
main()
{
return(yyparse());
}
yyerror(s)
char *s;
{
fprintf(stderr, "%s\n",s);
}
yywrap()
{
return(1);
}
It is required to use %type directive to specify which members of union is used in which expressions.In order to use union member, a, we should use the aforementioned directive.
See More here %type

What does return 0 do in the code and why is the yywrap function written without a body?

I AM USING BISON AND FLEX.
What does return 0 do in case of the kcalc.l file that I have posted?
And I am not getting the use of yywrap without a body (i mean not literally but an empty body).The code is of a calculator without any variable managing and basic operations that can be done like addition subtraction multiplication division and handling of unary minus operator. I have been studying through the lex and yacc specifications but did not get any answer for the query I asked .
Kcal.y
%{
#include <stdio.h>
%}
%token Number
%left '-' '+'
%left '*' '/'
%nonassoc UMINUS
%%
statement: expression
{ printf(" result = %d\n", $1);} ;
expression: expression '+' expression
{ $$ = $1 + $3;
printf("Recognised'+'expression\n");
}
| expression '-' expression
{ $$ = $1 - $3;
printf("Recognised '-' expression\n");
}
| expression '*' expression
{ $$ = $1 * $3;
printf("Recognised '*' expression\n");
}
| expression '/' expression
{ if ($3 == 0)
printf ("divide by zero\n");
else
$$ = $1 / $3;
printf("Recognised '/' expression\n");
}
| '-' expression %prec UMINUS
{
$$ = - $2;
printf("Recognised paranthesized expression\n");
}
| '(' expression ')'
{
$$ = $2;
printf("Recognised paranthesized expression");
}
| Number { $$ = $1;
printf("Recognised a no.\n");
}
;
%%
int main(void)
{
return yyparse();
}
int yyerror (char *msg)
{
return fprintf(stderr,"Yacc :%s", msg);
}
yywrap()
{
}
 
kcalc.l
%{
#include "y.tab.h"
extern int yylval;
%}
%%
[0-9]+ { yylval = atoi(yytext);
printf("accepted the number : %d\n", yylval);
return Number; }
[ \t] { printf("skipped whitespace \n");}
\n { printf("reached end of line\n");
**return 0;**
}
. { printf("found other data \" %s\n", yytext);
return yytext[0];
}
%%
The return 0 notifies the end-of-input to the parser, so apparently the expression should be contained on a single line. The empty body of yywrap is just wrong. If you use -Wall with the gcc compiler it will give two warnings for yywrap:
kcal.y:54: warning: return type defaults to ‘int’
kcal.y:55: warning: control reaches end of non-void function
The first one because no result type for the function is specified (K&R style C), so it is assumed it should return an int. The second warning because it lacks a return statement for such an int.
Since a newline terminates the input, the chances of yywrap ever being called are slim. But it will be called if the input does not contain a newline. If by sheer accident the (more or less random) return value of yywrap were to be interpreted as 0 the tokenizer would end up in an infinite loop of repeatedly calling yywrap.

What's wrong with this yacc file?

When I run yacc -d parser.y on the following file I get the following errors:
parser.y:23.3-24.4: warning: unused value: $4
15 rules never reduced
parser.y: warning: 7 useless nonterminals and 15 useless rules
parser.y:16.1-14: fatal error: start symbol statement_list does not derive any sentence
make: *** [y.tab.c] Error 1
I'm particularly concerned about how to get rid of the fatal error.
%{
#include "parser.h"
#include <string.h>
%}
%union {
double dval;
struct symtab *symp;
}
%token <symp> NAME
%token <dval> NUMBER
%type <dval> expression
%type <dval> term
%type <dval> factor
%%
statement_list: statement '\n'
| statement_list statement '\n'
;
statement: NAME '=' expression { $1->value = $3; }
| expression { printf("= %g\n", $1); }
;
expression: expression '+' term { $$ = $1 + $3; }
| expression '-' term { $$ = $1 - $3; }
term
;
term: term '*' factor { $$ = $1 * $3; }
| term '/' factor { if($3 == 0.0)
yyerror("divide by zero");
else
$$ = $1 / $3;
}
| factor
;
factor: '(' expression ')' { $$ = $2; }
| '-' factor { $$ = -$2; }
| NUMBER
| NAME { $$ = $1->value; }
;
%%
/* look up a symbol table entry, add if not present */
struct symtab *symlook(char *s) {
char *p;
struct symtab *sp;
for(sp = symtab; sp < &symtab[NSYMS]; sp++) {
/* is it already here? */
if(sp->name && !strcmp(sp->name, s))
return sp;
if(!sp->name) { /* is it free */
sp->name = strdup(s);
return sp;
}
/* otherwise continue to next */
}
yyerror("Too many symbols");
exit(1); /* cannot continue */
} /* symlook */
yyerror(char *s)
{
printf( "yyerror: %s\n", s);
}
All those warnings and errors are caused by the missing | before term in your expression rule. The hint is the unused $4 in a snippet that's plainly should only have 3 arguments. That problem cascades into all the others.
Change:
expression: expression '+' term { $$ = $1 + $3; }
| expression '-' term { $$ = $1 - $3; }
term
;
into:
expression: expression '+' term { $$ = $1 + $3; }
| expression '-' term { $$ = $1 - $3; }
| term
;
and try again.
you forget the or | here
expression: expression '+' term { $$ = $1 + $3; }
| expression '-' term { $$ = $1 - $3; }
term
;
the last rule should be |term {};