How to build a parser using lex/yacc for definition of variables in c - yacc

Hi I am new in lex/yacc and i am practicing to implement a parser for definition of a set of variables say int x,y,z; double a, b; char c.
I need to assign a constant value to variables and also manage character constants as well as print all variables with their types and values.
I had some codes online which i edited but always run into errors.
If there is anyone out there who can help me.
lex code(calc.l)
%{
#include "y.tab.h"
%}
%%
"print" {return print;}
"exit" {return exit_command;}
[a-zA-Z] {yylval.id = yytext[0]; return identifier;}
[0-9]+ {yylval.num = atoi(yytext); return number;}
[a-zA-Z] {yylval.const = yytext[0]; return character;}
[0-9]*\.[0-9]+ {yylval.num = atoi(yytext); return decimal;}
[ \t\n] ;
[-+=;] {return yytext[0];}
. {ECHO; yyerror ("unexpected character");}
%%
int yywrap (void) {return 1;}
yacc code(calc.y)
%{
void yyerror (char *s);
#include <stdio.h> /* C declarations used in actions */
#include <stdlib.h>
int symbols[52];
int symbolVal(char symbol);
void updateSymbolVal(char symbol, int val);
void updateSymbolValD(char symbol, double dval);
void updateSymbolValC(char symbol, char cval);
%}
%union {int num; char id;double deci;char const;} /* Yacc definitions */
%start line
%token print
%token exit_command
%token <num> number
%token <id> identifier
%token <deci> decimal
%token <const> character
%type <num> line exp term
%type <id> assignment
%%
/* descriptions of expected inputs corresponding actions (in C) */
line : assignment ';' {;}
| exit_command ';' {exit(EXIT_SUCCESS);}
| print exp ';' {printf("Printing %d\n", $2);}
| line assignment ';' {;}
| line print exp ';' {printf("Printing %d\n", $3);}
| line exit_command ';' {exit(EXIT_SUCCESS);}
;
assignment : identifier '=' exp { updateSymbolVal($1,$3); }
;
exp : term {$$ = $1;}
;
term : number {$$ = $1;}
| character {$$ = $1;}
| decimal {$$ = $1;}
| identifier {$$ = symbolVal($1);}
;
%% /* C code */
int computeSymbolIndex(char token)
{
int idx = -1;
if(islower(token)) {
idx = token - 'a' + 26;
} else if(isupper(token)) {
idx = token - 'A';
}
return idx;
}
/* returns the value of a given symbol */
int symbolVal(char symbol)
{
int bucket = computeSymbolIndex(symbol);
return symbols[bucket];
}
/* updates the value of a given integer symbol */
void updateSymbolVal(char symbol, int val)
{
int bucket = computeSymbolIndex(symbol);
symbols[bucket] = val;
}
/* updates the value of a given double symbol */
void updateSymbolVal(char symbol, double dval)
{
int bucket = computeSymbolIndex(symbol);
symbols[bucket] = dval;
}
/* updates the value of a given character symbol */
void updateSymbolVal(char symbol, char cval)
{
int bucket = computeSymbolIndex(symbol);
symbols[bucket] = cval;
}
int main (void) {
/* init symbol table */
int i;
for(i=0; i<52; i++) {
symbols[i] = 0;
}
return yyparse ( );
}
void yyerror (char *s) {fprintf (stderr, "%s\n", s);}
It always gives me the error ""calc.l", line 9: warning, rule cannot be matched" which is this line of code " [a-zA-Z] {yylval.const = yytext[0]; return character;}"
I have changed my lex and yacc codes to the codes below and have run into another error. Am hoping there is someone out there to help me with the understanding of the error and how to get the codes running.I have search online for the meaning of the error but m not getting the understanding.
**Lex codes**
calc.l
%{
#include "y.tab.h"
%}
%%
"print" {return print;}
"exit" {return exit_command;}
[a-zA-Z] {yylval.id = yytext[0]; return identifier;}
[0-9]+ {yylval.num = atoi(yytext); return number;}
[\'.\'] {yylval.const = yytext[0]; return character;}
[0-9]*\.[0-9]+ {yylval.deci = atof(yytext); return decimal;}
[ \t\n] ;
[=;] {return yytext[0];}
. {ECHO; yyerror ("unexpected character");}
%%
int yywrap (void) {return 1;}
**yacc codes**
**calc.y**
%{
void yyerror (char *s);
#include <stdio.h> /* C declarations used in actions */
#include <stdlib.h>
int symbols[52];
int symbolVal(char symbol);
void updateSymbolVal(char symbol, int val);
void updateSymbolValD(char symbol, double dval);
void updateSymbolValC(char symbol, char cval);
%}
%union {int num; char id;double deci;char const;} /* Yacc definitions */
%start line
%token print
%token exit_command
%token <num> number
%token <id> identifier
%token <deci> decimal
%token <const> character
%type <id> assignment
%type <id> charact_assign
%type <id> double_assign
%type <num> exp
%type <const> char_con
%type <deci> real_val
%%
/* descriptions of expected inputs corresponding actions (in C) */
line : assignment ';' {;}
| charact_assign ';' {;}
| double_assign ';' {;}
| exit_command ';' {exit(EXIT_SUCCESS);}
| print assignment ';' {printf("Printing %d\n", $2);}
| line assignment ';' {;}
| line charact_assign ';' {;}
| line double_assign ';' {;}
| line print assignment ';' {printf("Printing %d\n", $3);}
| line exit_command ';' {exit(EXIT_SUCCESS);}
;
assignment : identifier '=' exp { updateSymbolVal($1,$3); }
;
exp : number {$$ = $1;}
;
charact_assign: identifier '=' char_con { updateSymbolVal($1,$3); }
;
char_con : character {$$ = $1;}
;
double_assign: identifier '=' real_val { updateSymbolVal($1,$3); }
;
real_val : decimal {$$ = $1;}
;
%% /* C code */
int computeSymbolIndex(char token)
{
int idx = -1;
if(islower(token)) {
idx = token - 'a' + 26;
} else if(isupper(token)) {
idx = token - 'A';
}
return idx;
}
/* returns the value of a given symbol */
int symbolVal(char symbol)
{
int bucket = computeSymbolIndex(symbol);
return symbols[bucket];
}
/* updates the value of a given integer symbol */
void updateSymbolVal(char symbol, int val)
{
int bucket = computeSymbolIndex(symbol);
symbols[bucket] = val;
}
/* updates the value of a given double symbol */
void updateSymbolVal(char symbol, double dval)
{
int bucket = computeSymbolIndex(symbol);
symbols[bucket] = dval;
}
/* updates the value of a given character symbol */
void updateSymbolVal(char symbol, char cval)
{
int bucket = computeSymbolIndex(symbol);
symbols[bucket] = cval;
}
int main (void) {
/* init symbol table */
int i;
for(i=0; i<52; i++) {
symbols[i] = 0;
}
return yyparse ( );
}
void yyerror (char *s) {fprintf (stderr, "%s\n", s);}
**`Below are the errors i am running into:`**
"calc.y", line 12: unrecognized '%' directive
"calc.y", line 14: unrecognized '%' directive
"calc.y", line 15: unrecognized '%' directive
"calc.y", line 16: unrecognized '%' directive
"calc.y", line 17: unrecognized '%' directive
"calc.y", line 18: unrecognized '%' directive
"calc.y", line 19: unrecognized '%' directive
"calc.y", line 20: unrecognized '%' directive
"calc.y", line 21: unrecognized '%' directive
"calc.y", line 22: unrecognized '%' directive
"calc.y", line 23: unrecognized '%' directive
"calc.y", line 24: unrecognized '%' directive
"calc.y", line 25: unrecognized '%' directive
"calc.y", line 30: unrecognized rule
"calc.y", line 30: unrecognized rule
"calc.y", line 30: unrecognized rule
"calc.y", line 34: unrecognized rule
"calc.y", line 35: unrecognized rule
"calc.y", line 36: unrecognized rule
"calc.y", line 37: unrecognized rule
"calc.y", line 38: unrecognized rule
"calc.y", line 39: unrecognized rule
"calc.y", line 40: unrecognized rule
"calc.y", line 41: unrecognized rule
"calc.y", line 42: unrecognized rule

You have the same pattern twice:
[a-zA-Z] {yylval.id = yytext[0]; return identifier;}
[0-9]+ {yylval.num = atoi(yytext); return number;}
[a-zA-Z] {yylval.const = yytext[0]; return character;}
So, anything that matches a-zA-Z will be matched by the first one, and nothing will ever match the second one.
In case it wasn't clear: a lex/flex lexer considers the rules in order, so the first rule that matches an input is the one that's used (and even if they would match, later patterns aren't considered if an earlier one matches). This is why (for example) you always put the . pattern last (assuming you use it, but you usually do)--since it will match anything, no pattern following it can ever match anything.

Related

Yacc %define parse.error verbose generates error

When I try to get more out of my "syntax error", I seem to use the way described on so many websites, but all seem to create their own errors, for some reason.
I was getting standard "syntax error" on line 5 of the Input file... so I wanted to add better error handling so I can see what exactly is the issue. But
%define parse.error verbose
However, it gives me this;
error: %define variable 'parse.error' is not used
Below are my files, as long as you keep it constructive, feel free to comment on more then just the error parts, any help is welcome :)
(As long as the errors get fixed as well :P )
Thanks in advance!
lex file;
%option nounput yylineno
%{
#include "yaccTest.tab.h"
void InvalidToken();
void extern yyerror (char *s);
%}
whitespace [ \t\r\v\f]
linefeed \n
%%
";" {return SEMICOLON;}
"=" {return EQ;}
"+" {return PLUS;}
"-" {return MINUS;}
"*" {return MULTIPLY;}
"/" {return DEVIDE;}
"(" {return BO;}
")" {return BC;}
"^" {return POWER;}
"print" {return PRINT;}
[a-zA-Z][a-zA-Z0-9]* {yylval.charValue = yytext[0]; return IDENTIFIER;}
[0-9]+ {yylval.intValue = atoi(yytext); return NUMBER;}
{whitespace} {;}
. {InvalidToken();}
%%
void yyerror(char *s) {
fprintf(stderr, "\nERROR ON LINE %d : \n %s\n", yylineno, s);
exit(0);
}
void InvalidToken(){
printf("ERROR ON LINE %d : \n Invalid Token %s\n", yylineno,yytext);
exit(0);
}
int yywrap (void) {return 1;}
yacc file;
%{
#include <stdio.h>
#include <stdlib.h>
int getVariableValue(char varID);
extern int yylineno;
int varIDs[52] = {0};
int varValues[52] = {0};
%}
%define parse.lac full
%define parse.error verbose
%union YYSTYPE {int intValue; char charValue;}
%token COLON SEMICOLON ST SE EQ GE GT PLUS MINUS MULTIPLY DEVIDE BO BC CBO CBC POWER LOOP PRINT
%token <intValue> NUMBER
%token <charValue> IDENTIFIER CHAR
%type <charValue> declaration expression
%type <intValue> numval
%right EQ
%left PLUS MINUS
%left MULTIPLY DEVIDE
%left POWER
%%
declaration : IDENTIFIER EQ expression
| declaration IDENTIFIER EQ expression
;
expression : numval SEMICOLON
| PRINT BO numval BC SEMICOLON {printf("Printing");}
;
numval : NUMBER {$$ = $1;}
| NUMBER PLUS NUMBER {$$ = $1 + $3;}
| NUMBER MINUS NUMBER {$$ = $1 - $3;}
| NUMBER MULTIPLY NUMBER {$$ = $1 * $3;}
| NUMBER DEVIDE NUMBER {$$ = $1 / $3;}
| NUMBER POWER NUMBER {int i;int j = $1;for(i = 1; i < $3; i++){j=j*$1;};$$ = j;}
;
%%
int getVariableValue(char varID) {
int i, j, localTemp;
for (i=0;i<((sizeof(varIDs)/sizeof(varIDs[0])));i++) {
if (varID == varIDs[i]) {
localTemp = varValues[i];
}
}
return localTemp;
}
int setVariableValue(char varID, int varValue) {
int i, varPresent = 0;
for (i=0;i<((sizeof(varIDs)/sizeof(varIDs[0])));i++) {
if (varID == varIDs[i]) {
varValues[i] = varValue;
varPresent = 1;
}
}
if (varPresent == 0) {
for (i=0;i<((sizeof(varIDs)/sizeof(varIDs[0])));i++) {
if (&(varIDs[i]) == NULL) {
if (&(varValues[i]) == NULL) {
varIDs[i] = varID;
varValues[i] = varValue;
}
else {
missingVarIDError(varID, varValue);
}
}
else {
notEnoughStorageError(varID, varValue);
}
}
}
}
int missingVarIDError(char *id, int val){
printf("\nERROR ON LINE %d : \nIdentifier '%s' not found, but assigned location DOES have a value; %s",yylineno,id,val);
exit(0);
}
int notEnoughStorageError(char *id, int val){
printf("\nERROR ON LINE %d : \nIdentifier '%s' did not fit in StorageArray, '%3' not stored!",yylineno,id,val);
exit(0);
}
int main (void) {
return yyparse ( );
return 0;
}
Input file;
x=4;
y=2+6;
X=2;
z=5;
print(4);

lex and yacc to parse trignometric expression

I have the following code for lex and yacc. I am getting kind of extra values in the printed statement can anyone tell. whats wrong with the code?
Lex code:
%{
#include <stdio.h>
#include "y.tab.h"
%}
%%
[ \t] ;
[+-] { yylval=yytext; return Sym;}
(s|c|t)..x { yylval=yytext; return Str;}
[a-zA-Z]+ { printf("Invalid");}
%%
int yywrap()
{
return 1;
}
yacc code:
%{
#include<stdio.h>
%}
%start exps
%token Sym Str
%%
exps: exps exp
| exp
;
exp : Str Sym Str {printf("%s",$1); printf("%s",$2); printf("%s",$3);}
;
%%
int main (void)
{
while(1){
return yyparse();
}
}
yyerror(char *err) {
fprintf(stderr, "%s\n",err);
}
Input:
sinx+cosx
output:
sinx+cosx+cosxcosx
look at the output of the code!!!
yytext is a pointer into flex's internal scanning buffer, so its contents will be modified when the next token is read. If you want to return it to the parser, you need to make a copy:
[+-] { yylval=strdup(yytext); return Sym;}
(s|c|t)..x { yylval=strdup(yytext); return Str;}
Where symbols are a single character, it might make more sense to return that character directly in the scanner:
[-+] { return *yytext; }
in which case, your yacc rules should use the character directly in '-single quotes:
exp : Str '+' Str {printf("%s + %s",$1, $3); free($1); free($3); }
| Str '-' Str {printf("%s - %s",$1, $3); free($1); free($3); }

lex and yacc : a simple calculator with syntax error

a simple calculator support only + - * / and integer. I use GNU/Linux.
hoc1.l:
%{
#include "y.tab.h"
extern int yylval;
%}
%%
[ \t] { ; }
[0-9]+ { sscanf(yytext, "%d", &yylval); printf("\nget %d\n", yylval); return NUMBER; }
\n {return 0;}
%%
int yywrap(void) {
return 1;
}
hoc1.y
%{
#include<stdio.h>
#define YYSTYPE int
%}
%token NUMBER
%left '+' '-'
%left '*' '/'
%%
list:
| list '\n'
| list expr '\n' {printf("\t%d\n",$2);}
;
expr: NUMBER { $$ = $1; }
| expr '+' expr {$$ = $1+$3;}
| expr '-' expr {$$ = $1-$3;}
| expr '*' expr {$$ = $1*$3;}
| expr '/' expr {$$ = $1/$3;}
;
%%
int main(void)
{
yyparse();
return 0;
}
int yyerror(char *s) {
fprintf(stderr, "*%s*\n", s);
return 0;
}
runtime-error:
% ./hoc
8+9
get 8
+
get 9
*syntax error*
why and how to sovle it, thx!
You forgot to include your operators in your lex file, and you should return nonzero on a successful token read: returning 0 intuitively means there was no match by yylex. Remove the line in your lex file handling the newline character and replace it with the following:
[-+*/\n] { return *yytext; }
. { yyerror("unrecognized character"); return 0; }
Now it should work. Returning *yytext allows your yacc grammar to parse an expression successfully, e.g. if you get a '+', return it to allow the grammar to parse properly.

What does return 0 do in the code and why is the yywrap function written without a body?

I AM USING BISON AND FLEX.
What does return 0 do in case of the kcalc.l file that I have posted?
And I am not getting the use of yywrap without a body (i mean not literally but an empty body).The code is of a calculator without any variable managing and basic operations that can be done like addition subtraction multiplication division and handling of unary minus operator. I have been studying through the lex and yacc specifications but did not get any answer for the query I asked .
Kcal.y
%{
#include <stdio.h>
%}
%token Number
%left '-' '+'
%left '*' '/'
%nonassoc UMINUS
%%
statement: expression
{ printf(" result = %d\n", $1);} ;
expression: expression '+' expression
{ $$ = $1 + $3;
printf("Recognised'+'expression\n");
}
| expression '-' expression
{ $$ = $1 - $3;
printf("Recognised '-' expression\n");
}
| expression '*' expression
{ $$ = $1 * $3;
printf("Recognised '*' expression\n");
}
| expression '/' expression
{ if ($3 == 0)
printf ("divide by zero\n");
else
$$ = $1 / $3;
printf("Recognised '/' expression\n");
}
| '-' expression %prec UMINUS
{
$$ = - $2;
printf("Recognised paranthesized expression\n");
}
| '(' expression ')'
{
$$ = $2;
printf("Recognised paranthesized expression");
}
| Number { $$ = $1;
printf("Recognised a no.\n");
}
;
%%
int main(void)
{
return yyparse();
}
int yyerror (char *msg)
{
return fprintf(stderr,"Yacc :%s", msg);
}
yywrap()
{
}
 
kcalc.l
%{
#include "y.tab.h"
extern int yylval;
%}
%%
[0-9]+ { yylval = atoi(yytext);
printf("accepted the number : %d\n", yylval);
return Number; }
[ \t] { printf("skipped whitespace \n");}
\n { printf("reached end of line\n");
**return 0;**
}
. { printf("found other data \" %s\n", yytext);
return yytext[0];
}
%%
The return 0 notifies the end-of-input to the parser, so apparently the expression should be contained on a single line. The empty body of yywrap is just wrong. If you use -Wall with the gcc compiler it will give two warnings for yywrap:
kcal.y:54: warning: return type defaults to ‘int’
kcal.y:55: warning: control reaches end of non-void function
The first one because no result type for the function is specified (K&R style C), so it is assumed it should return an int. The second warning because it lacks a return statement for such an int.
Since a newline terminates the input, the chances of yywrap ever being called are slim. But it will be called if the input does not contain a newline. If by sheer accident the (more or less random) return value of yywrap were to be interpreted as 0 the tokenizer would end up in an infinite loop of repeatedly calling yywrap.

Syntax error in Bison after one token is processed

I am trying to come up to speed on Flex and Bison. I can parse one token with a very simple "language" but it fails on the second, even though the token is legitimate.
test.l:
%{
#include <stdio.h>
#include "test.hpp"
%}
%%
[0-9]+ {printf("Number entered\n"); return INTEGER_NUMBER;}
[a-zA-Z]+ {printf("plain text entered: '%s'\n",yytext); return PLAIN_TEXT;}
[ \t] ;
. ;
%%
test.y
%{
#include <stdio.h>
extern "C" {
int yyparse(void);
int yylex(void);
int yywrap() { return 1; }
extern int yylineno;
extern char* yytext;
extern int yylval;
}
/* #define YYSTYPE char * */
void yyerror(const char *message)
{
fprintf(stderr, "%d: error: '%s' at '%s', yylval=%u\n", yylineno, message, yytext, yylval);
}
main()
{
yyparse();
}
%}
%token PLAIN_TEXT INTEGER_NUMBER
%%
test : text | number;
text : PLAIN_TEXT
{
/*printf("plain text\n");*/
};
number : INTEGER_NUMBER
{
/*printf("number\n");*/
};
%%
Results:
$ ./test
cat
plain text entered: 'cat'
dog
plain text entered: 'dog'
1: error: 'syntax error' at 'dog', yylval=0
$ ./test
34
Number entered
34
Number entered
1: error: 'syntax error' at '34', yylval=0
Why am I getting this syntax error?
Your test.y seems to lack the grammar for the case that several tests
continue.
So, how about adding the grammar like the following?
%%
tests : test | tests test; /* added */
test : text | number;
...