lex and yacc : a simple calculator with syntax error - yacc

a simple calculator support only + - * / and integer. I use GNU/Linux.
hoc1.l:
%{
#include "y.tab.h"
extern int yylval;
%}
%%
[ \t] { ; }
[0-9]+ { sscanf(yytext, "%d", &yylval); printf("\nget %d\n", yylval); return NUMBER; }
\n {return 0;}
%%
int yywrap(void) {
return 1;
}
hoc1.y
%{
#include<stdio.h>
#define YYSTYPE int
%}
%token NUMBER
%left '+' '-'
%left '*' '/'
%%
list:
| list '\n'
| list expr '\n' {printf("\t%d\n",$2);}
;
expr: NUMBER { $$ = $1; }
| expr '+' expr {$$ = $1+$3;}
| expr '-' expr {$$ = $1-$3;}
| expr '*' expr {$$ = $1*$3;}
| expr '/' expr {$$ = $1/$3;}
;
%%
int main(void)
{
yyparse();
return 0;
}
int yyerror(char *s) {
fprintf(stderr, "*%s*\n", s);
return 0;
}
runtime-error:
% ./hoc
8+9
get 8
+
get 9
*syntax error*
why and how to sovle it, thx!

You forgot to include your operators in your lex file, and you should return nonzero on a successful token read: returning 0 intuitively means there was no match by yylex. Remove the line in your lex file handling the newline character and replace it with the following:
[-+*/\n] { return *yytext; }
. { yyerror("unrecognized character"); return 0; }
Now it should work. Returning *yytext allows your yacc grammar to parse an expression successfully, e.g. if you get a '+', return it to allow the grammar to parse properly.

Related

Yacc %define parse.error verbose generates error

When I try to get more out of my "syntax error", I seem to use the way described on so many websites, but all seem to create their own errors, for some reason.
I was getting standard "syntax error" on line 5 of the Input file... so I wanted to add better error handling so I can see what exactly is the issue. But
%define parse.error verbose
However, it gives me this;
error: %define variable 'parse.error' is not used
Below are my files, as long as you keep it constructive, feel free to comment on more then just the error parts, any help is welcome :)
(As long as the errors get fixed as well :P )
Thanks in advance!
lex file;
%option nounput yylineno
%{
#include "yaccTest.tab.h"
void InvalidToken();
void extern yyerror (char *s);
%}
whitespace [ \t\r\v\f]
linefeed \n
%%
";" {return SEMICOLON;}
"=" {return EQ;}
"+" {return PLUS;}
"-" {return MINUS;}
"*" {return MULTIPLY;}
"/" {return DEVIDE;}
"(" {return BO;}
")" {return BC;}
"^" {return POWER;}
"print" {return PRINT;}
[a-zA-Z][a-zA-Z0-9]* {yylval.charValue = yytext[0]; return IDENTIFIER;}
[0-9]+ {yylval.intValue = atoi(yytext); return NUMBER;}
{whitespace} {;}
. {InvalidToken();}
%%
void yyerror(char *s) {
fprintf(stderr, "\nERROR ON LINE %d : \n %s\n", yylineno, s);
exit(0);
}
void InvalidToken(){
printf("ERROR ON LINE %d : \n Invalid Token %s\n", yylineno,yytext);
exit(0);
}
int yywrap (void) {return 1;}
yacc file;
%{
#include <stdio.h>
#include <stdlib.h>
int getVariableValue(char varID);
extern int yylineno;
int varIDs[52] = {0};
int varValues[52] = {0};
%}
%define parse.lac full
%define parse.error verbose
%union YYSTYPE {int intValue; char charValue;}
%token COLON SEMICOLON ST SE EQ GE GT PLUS MINUS MULTIPLY DEVIDE BO BC CBO CBC POWER LOOP PRINT
%token <intValue> NUMBER
%token <charValue> IDENTIFIER CHAR
%type <charValue> declaration expression
%type <intValue> numval
%right EQ
%left PLUS MINUS
%left MULTIPLY DEVIDE
%left POWER
%%
declaration : IDENTIFIER EQ expression
| declaration IDENTIFIER EQ expression
;
expression : numval SEMICOLON
| PRINT BO numval BC SEMICOLON {printf("Printing");}
;
numval : NUMBER {$$ = $1;}
| NUMBER PLUS NUMBER {$$ = $1 + $3;}
| NUMBER MINUS NUMBER {$$ = $1 - $3;}
| NUMBER MULTIPLY NUMBER {$$ = $1 * $3;}
| NUMBER DEVIDE NUMBER {$$ = $1 / $3;}
| NUMBER POWER NUMBER {int i;int j = $1;for(i = 1; i < $3; i++){j=j*$1;};$$ = j;}
;
%%
int getVariableValue(char varID) {
int i, j, localTemp;
for (i=0;i<((sizeof(varIDs)/sizeof(varIDs[0])));i++) {
if (varID == varIDs[i]) {
localTemp = varValues[i];
}
}
return localTemp;
}
int setVariableValue(char varID, int varValue) {
int i, varPresent = 0;
for (i=0;i<((sizeof(varIDs)/sizeof(varIDs[0])));i++) {
if (varID == varIDs[i]) {
varValues[i] = varValue;
varPresent = 1;
}
}
if (varPresent == 0) {
for (i=0;i<((sizeof(varIDs)/sizeof(varIDs[0])));i++) {
if (&(varIDs[i]) == NULL) {
if (&(varValues[i]) == NULL) {
varIDs[i] = varID;
varValues[i] = varValue;
}
else {
missingVarIDError(varID, varValue);
}
}
else {
notEnoughStorageError(varID, varValue);
}
}
}
}
int missingVarIDError(char *id, int val){
printf("\nERROR ON LINE %d : \nIdentifier '%s' not found, but assigned location DOES have a value; %s",yylineno,id,val);
exit(0);
}
int notEnoughStorageError(char *id, int val){
printf("\nERROR ON LINE %d : \nIdentifier '%s' did not fit in StorageArray, '%3' not stored!",yylineno,id,val);
exit(0);
}
int main (void) {
return yyparse ( );
return 0;
}
Input file;
x=4;
y=2+6;
X=2;
z=5;
print(4);

Accept both integers and floats in a bison grammar

This question is attached to this post https://stackoverflow.com/questions/42848197/bison-flex-cannot-print-out-result?noredirect=1#comment72805876_42848197
this time I try to make my calculator program accepts both integers and floats numbers.
Thank you.
Here is my code
Flex:
%{
#include <stdio.h>
#include "f1.tab.h"
%}
integer [1-9][0-9]*|0
float [0-9]+\.[0-9]+
%%
{integer} { yylval.ival = atoi(yytext); return INT; }
{float} { yylval.fval = atof(yytext); return FLOAT; }
. { return yytext[0]; }
%%
Bison :
%{
#include <stdio.h>
%}
%union {
int ival;
float fval;
}
%token <ival> INT
%token <fval> FLOAT
%type <fval> exp
%type <fval> fac
%type <fval> f
%%
input: line
| input line
;
line: exp ';' { printf("%d\n", $1); };
exp: fac { $$ = $1; }
| exp '+' fac { $$ = $1 + $3; }
| exp '-' fac { $$ = $1 - $3; }
;
fac: f
| fac '*' f { $$ = $1 * $3; }
| fac '/' f { $$ = $1 / $3; }
;
f: INT | FLOAT;
%%
main(int argc, char **argv) {
yyparse();
}
yyerror(char *s) {
fprintf(stderr, "error: %s\n", s);
}
Bison tells you exactly what the problem is:
parser.y:32.4-6: warning: type clash on default action: <fval> != <ival> [-Wother]
f: INT | FLOAT;
^^^
The default action for the rule f: INT copies an ivar to an fvar without any sort of conversion (basically, copying via union). To fix it, you need to insert a conversion:
f: INT { $$ = (double)$1; }

lex and yacc to parse trignometric expression

I have the following code for lex and yacc. I am getting kind of extra values in the printed statement can anyone tell. whats wrong with the code?
Lex code:
%{
#include <stdio.h>
#include "y.tab.h"
%}
%%
[ \t] ;
[+-] { yylval=yytext; return Sym;}
(s|c|t)..x { yylval=yytext; return Str;}
[a-zA-Z]+ { printf("Invalid");}
%%
int yywrap()
{
return 1;
}
yacc code:
%{
#include<stdio.h>
%}
%start exps
%token Sym Str
%%
exps: exps exp
| exp
;
exp : Str Sym Str {printf("%s",$1); printf("%s",$2); printf("%s",$3);}
;
%%
int main (void)
{
while(1){
return yyparse();
}
}
yyerror(char *err) {
fprintf(stderr, "%s\n",err);
}
Input:
sinx+cosx
output:
sinx+cosx+cosxcosx
look at the output of the code!!!
yytext is a pointer into flex's internal scanning buffer, so its contents will be modified when the next token is read. If you want to return it to the parser, you need to make a copy:
[+-] { yylval=strdup(yytext); return Sym;}
(s|c|t)..x { yylval=strdup(yytext); return Str;}
Where symbols are a single character, it might make more sense to return that character directly in the scanner:
[-+] { return *yytext; }
in which case, your yacc rules should use the character directly in '-single quotes:
exp : Str '+' Str {printf("%s + %s",$1, $3); free($1); free($3); }
| Str '-' Str {printf("%s - %s",$1, $3); free($1); free($3); }

y.tab.c: undefined reference to yylex

I am trying to run an example I found online of a calculator. But I have this error showing every time I run my gcc command. Here are the commands that I run:
flex -l calc3.l
yacc -vd calc3.y
gcc y.tab.c -lm -ll
-> at this point I got this error message:
/tmp/ccPOq58f.o : In function 'yyparse':
y.tab.c: undefined reference to 'yylex'
collect2: error: ld returned 1 exit status
Here is my code:
calc3.l
%{
#include <stdlib.h>
#include "calc3.h"
#include "y.tab.h"
void yyerror(char *);
%}
%%
[a-z] {
yylval.sIndex = *yytext - 'a';
return VARIABLE;
}
0 {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[1-9][0-9]* {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[-()<>=+*/;{}.] {
return *yytext;
}
">=" return GE;
"<=" return LE;
"==" return EQ;
"!=" return NE;
"while" return WHILE;
"if" return IF;
"else" return ELSE;
"print" return PRINT;
[ \t\n]+ ; /* ignore whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
here is calc3.h
typedef enum { typeCon, typeId, typeOpr } nodeEnum;
/* constants */
typedef struct {
int value; /* value of constant */
} conNodeType;
/* identifiers */
typedef struct {
int i; /* subscript to sym array */
} idNodeType;
/* operators */
typedef struct {
int oper; /* operator */
int nops; /* number of operands */
struct nodeTypeTag **op; /* operands */
} oprNodeType;
typedef struct nodeTypeTag {
nodeEnum type; /* type of node */
union {
conNodeType con; /* constants */
idNodeType id; /* identifiers */
oprNodeType opr; /* operators */
};
} nodeType;
extern int sym[26];
and here is calc3.y
%{
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "calc3.h"
/* prototypes */
nodeType *opr(int oper, int nops, ...);
nodeType *id(int i);
nodeType *con(int value);
void freeNode(nodeType *p);
int ex(nodeType *p);
int yylex(void);
void yyerror(char *s);
int sym[26]; /* symbol table */
%}
%union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
};
%token <iValue> INTEGER
%token <sIndex> VARIABLE
%token WHILE IF PRINT
%nonassoc IFX
%nonassoc ELSE
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
%type <nPtr> stmt expr stmt_list
%%
program:
function { exit(0); }
;
function:
function stmt { ex($2); freeNode($2); }
| /* NULL */
;
stmt:
';' { $$ = opr(';', 2, NULL, NULL); }
| expr ';' { $$ = $1; }
| PRINT expr ';' { $$ = opr(PRINT, 1, $2); }
| VARIABLE '=' expr ';' { $$ = opr('=', 2, id($1), $3); }
| WHILE '(' expr ')' stmt { $$ = opr(WHILE, 2, $3, $5); }
| IF '(' expr ')' stmt %prec IFX { $$ = opr(IF, 2, $3, $5); }
| IF '(' expr ')' stmt ELSE stmt { $$ = opr(IF, 3, $3, $5, $7); }
| '{' stmt_list '}' { $$ = $2; }
;
stmt_list:
stmt { $$ = $1; }
| stmt_list stmt { $$ = opr(';', 2, $1, $2); }
;
expr:
INTEGER { $$ = con($1); }
| VARIABLE { $$ = id($1); }
| '-' expr %prec UMINUS { $$ = opr(UMINUS, 1, $2); }
| expr '+' expr { $$ = opr('+', 2, $1, $3); }
| expr '-' expr { $$ = opr('-', 2, $1, $3); }
| expr '*' expr { $$ = opr('*', 2, $1, $3); }
| expr '/' expr { $$ = opr('/', 2, $1, $3); }
| expr '<' expr { $$ = opr('<', 2, $1, $3); }
| expr '>' expr { $$ = opr('>', 2, $1, $3); }
| expr GE expr { $$ = opr(GE, 2, $1, $3); }
| expr LE expr { $$ = opr(LE, 2, $1, $3); }
| expr NE expr { $$ = opr(NE, 2, $1, $3); }
| expr EQ expr { $$ = opr(EQ, 2, $1, $3); }
| '(' expr ')' { $$ = $2; }
;
%%
nodeType *con(int value) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(nodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeCon;
p->con.value = value;
return p;
}
nodeType *id(int i) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(nodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeId;
p->id.i = i;
return p;
}
nodeType *opr(int oper, int nops, ...) {
va_list ap;
nodeType *p;
int i;
/* allocate node */
if ((p = malloc(sizeof(nodeType))) == NULL)
yyerror("out of memory");
if ((p->opr.op = malloc(nops * sizeof(nodeType *))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeOpr;
p->opr.oper = oper;
p->opr.nops = nops;
va_start(ap, nops);
for (i = 0; i < nops; i++)
p->opr.op[i] = va_arg(ap, nodeType*);
va_end(ap);
return p;
}
void freeNode(nodeType *p) {
int i;
if (!p) return;
if (p->type == typeOpr) {
for (i = 0; i < p->opr.nops; i++)
freeNode(p->opr.op[i]);
free (p->opr.op);
}
free (p);
}
void yyerror(char *s) {
fprintf(stdout, "%s\n", s);
}
int main(void) {
yyparse();
return 0;
}
If you just use
flex calc3.l
then flex produces a scanner called lex.yy.c. (I removed the -l option which was used in the original question. -l causes flex to be more compatible with certain aspects of the original lex utility, and it has no use except for compiling ancient lex scanners.)
Similarly, if you just use
yacc -vd calc3.y
the bison will produce files called y.tab.c and y.tab.h. And
gcc y.tab.c -lm -ll
will produce a file called a.out.
None of that is a good idea. It's far better to give the files meaningful names, based on the input filenames. All three of these tools understand a -o command-line flag which specifies the output name file.
So you could do this:
flex calc3.l
yacc -vd calc3.y
gcc lex.yy.c y.tab.c -lm -ll
But I'd recommend something like this:
flex -o calc3.lex.c calc3.l
bison -o calc3.tab.c -vd calc3.y
gcc -o calc3 calc3.lex.c calc3.tab.c -lm -ll
When you do this, you'll need to change the #include "y.tab.h" to #include "calc3.tab.h". (Note that if you invoke bison as bison rather than as yacc, it will automatically produce output files with names based on the grammar file. But it doesn't hurt to be explicit.)
Even better if you put it in a Makefile, or at least a script file.

What does return 0 do in the code and why is the yywrap function written without a body?

I AM USING BISON AND FLEX.
What does return 0 do in case of the kcalc.l file that I have posted?
And I am not getting the use of yywrap without a body (i mean not literally but an empty body).The code is of a calculator without any variable managing and basic operations that can be done like addition subtraction multiplication division and handling of unary minus operator. I have been studying through the lex and yacc specifications but did not get any answer for the query I asked .
Kcal.y
%{
#include <stdio.h>
%}
%token Number
%left '-' '+'
%left '*' '/'
%nonassoc UMINUS
%%
statement: expression
{ printf(" result = %d\n", $1);} ;
expression: expression '+' expression
{ $$ = $1 + $3;
printf("Recognised'+'expression\n");
}
| expression '-' expression
{ $$ = $1 - $3;
printf("Recognised '-' expression\n");
}
| expression '*' expression
{ $$ = $1 * $3;
printf("Recognised '*' expression\n");
}
| expression '/' expression
{ if ($3 == 0)
printf ("divide by zero\n");
else
$$ = $1 / $3;
printf("Recognised '/' expression\n");
}
| '-' expression %prec UMINUS
{
$$ = - $2;
printf("Recognised paranthesized expression\n");
}
| '(' expression ')'
{
$$ = $2;
printf("Recognised paranthesized expression");
}
| Number { $$ = $1;
printf("Recognised a no.\n");
}
;
%%
int main(void)
{
return yyparse();
}
int yyerror (char *msg)
{
return fprintf(stderr,"Yacc :%s", msg);
}
yywrap()
{
}
 
kcalc.l
%{
#include "y.tab.h"
extern int yylval;
%}
%%
[0-9]+ { yylval = atoi(yytext);
printf("accepted the number : %d\n", yylval);
return Number; }
[ \t] { printf("skipped whitespace \n");}
\n { printf("reached end of line\n");
**return 0;**
}
. { printf("found other data \" %s\n", yytext);
return yytext[0];
}
%%
The return 0 notifies the end-of-input to the parser, so apparently the expression should be contained on a single line. The empty body of yywrap is just wrong. If you use -Wall with the gcc compiler it will give two warnings for yywrap:
kcal.y:54: warning: return type defaults to ‘int’
kcal.y:55: warning: control reaches end of non-void function
The first one because no result type for the function is specified (K&R style C), so it is assumed it should return an int. The second warning because it lacks a return statement for such an int.
Since a newline terminates the input, the chances of yywrap ever being called are slim. But it will be called if the input does not contain a newline. If by sheer accident the (more or less random) return value of yywrap were to be interpreted as 0 the tokenizer would end up in an infinite loop of repeatedly calling yywrap.