Calculator in lex and yacc - yacc

I am trying to create a calculator by using lex and yacc. However I can not understand how can I give operator precedence to this program? I could not find any information about it. Which code do I need to add to my project to calculate correctly?
Yacc file is:
%{
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
int yylex();
void yyerror(const char *s);
%}
%token INTEGER
%left '*' '/'
%left '+' '-'
%%
program:
program line | line
line:
expr ';' { printf("%d\n",$1); } ; | '\n'
expr:
expr '+' term { $$ = $1 + $3; }
| expr '-' term { $$ = $1 - $3; }
| expr '*' term { $$ = $1 * $3; }
| expr '/' term { $$ = $1 / $3; }
| expr '%' term { $$ = $1 % $3; }
| expr '^' term { $$ = $1 ; }
| term { $$ = $1; }
term:
INTEGER { $$ = $1; }
%%
void yyerror(const char *s) { fprintf(stderr,"%s\n",s); return ; }
int main(void) { /*yydebug=1;*/ yyparse(); return 0; }
Lex file is:
%{
#include <stdlib.h>
#include <stdio.h>
void yyerror(char*);
extern int yylval;
#include "calc.tab.h"
#include<time.h>
%}
%%
[ \t]+ ; //skip whitespace
[0-9]+ {yylval = atoi(yytext); return INTEGER;}
[-+*/%^] {return *yytext;}
\n {return *yytext;}
; {return *yytext;}
. {char msg[25]; sprintf(msg,"%s <%s>","invalid character",yytext); yyerror(msg);}

%left '*' '/'
%left '+' '-'
Precedence declarations are specified in the order from lowest precedence to highest. So in the above code you give * and / the lowest precedence level and + and - the highest. That's the opposite order of what you want, so you'll need to switch the order of these two lines. You'll also want to add the operators % and ^, which are currently part of your grammar, but not your precedence annotations.
With those changes, you'll now have specified the precedence you want, but it won't take effect yet. Why not? Because precedence annotations are used to resolve ambiguities, but your grammar isn't actually ambiguous.
The way you've written the grammar, with only the left operand of all operators being expr and the right operand being term, there's only one way to derive an expression like 2+4*2, namely by deriving 2+4 from expr and 2 from term (because deriving 4*2 from term would be impossible since term can only match a single number). So your grammar treats all operators as left-associative and having the same precedence and your precedence annotations aren't considered at all.
In order for the precedence annotations to be considered, you'll have to change your grammar, so that both operands of the operators are expr (e.g. expr '+' expr instead of expr '+' term). Written like that an expression like 2+4*2 could either be derived by deriving 2+4 from expr as the left operand and 2 from expr as the right operand or 2 as the left and 4*2 as the right and this ambiguity will be resolved using your precedence annotations.

Related

Yacc parser not detecting my language well

I am new to yacc and I am trying to define some rules for my language.
I have written a grammar "well" and it runs and executes without an error but for some reason, it doesn't do what it is supposed to do.
mylex.l
%{
#include <stdio.h>
#include "myyacc.tab.h"
extern int yyval;
%}
/* KEEP TRACK OF LINE NUMBER*/
%option yylineno
uppercase [A-Z]
lowercase [a-z]
alpha [{uppercase}{lowercase}]
digit [0-9]
alphanum [{alpha}{digit}]
id uppercase({alphanum}|_)*
int_literal [0-9]+
float_literal [0-9]+\.[0-9]+
string_literal \"[^\"]*\"
comment (##)(.)*(##)
%%
"int" {return INT;}
"float" {return FLOAT;}
"boolean" {return BOOLEAN;}
"if" {return IF;}
"else" {return ELSE;}
"end" {return END;}
"true" {return TRUE;}
"false" {return FALSE;}
"read" {return READ;}
"print" {return PRINT;}
"while" {return WHILE;}
"START" {return START;}
"END" {return END;}
"+" {return ADD;}
"-" {return SUB;}
"*" {return MUL;}
"/" {return DIV;}
"&&" {return LOG_AND;}
"||" {return LOG_OR;}
"!" {return LOG_NOT;}
"==" {return EQ;}
"<>" {return NEQ;}
"<" {return LT;}
"<=" {return LEQ;}
">" {return GT;}
">=" {return GEQ;}
"=" {return ASSIGN;}
"(" {return LPAREN;}
")" {return RPAREN;}
"{" {return LBRACE;}
"}" {return RBRACE;}
{int_literal} {return INT_LITERAL;}
{float_literal} {return FLOAT_LITERAL;}
{string_literal} {return STRING_LITERAL;}
{id} {return ID;}
{comment} { ; }
%%
int yywrap() {
return 1;
}
myyacc.y
%{
#include <stdio.h>
#include <stdlib.h>
extern int yylineno;
extern FILE* yyin;
extern int yyerror (char* msg);
extern char * yytext;
%}
/* definitions section start */
%token INT FLOAT BOOLEAN IF ELSE END TRUE FALSE READ PRINT WHILE START
%token INT_LITERAL FLOAT_LITERAL STRING_LITERAL ID ERROR
%right ASSIGN
%right LOG_NOT
%left MUL DIV
%left ADD SUB
%left LPAREN RPAREN
%left LBRACE RBRACE
%left LT LEQ GT GEQ
%left EQ NEQ
%left LOG_AND
%left LOG_OR
%start program
/* definitions section end */
%%
/* rules section start */
program : START statements END {printf("No syntax errors detected")};
statements : statements statement
| statement
;
statement : dec_stmt
| assignment_stmt
| print_stmt
| read_stmt
| condition_stmt
| while_stmt
;
dec_stmt : type ID
;
type : INT
| FLOAT
| BOOLEAN
;
assignment_stmt : ID ASSIGN expression
;
expression : exp EQ exp
| exp NEQ exp
| exp LT exp
| exp LEQ exp
| exp GT exp
| exp GEQ exp
| exp
;
exp : exp MUL exp
| exp DIV exp
| exp ADD exp
| exp SUB exp
| exp LOG_AND exp
| exp LOG_OR exp
| LOG_NOT exp
| LPAREN exp RPAREN
| INT_LITERAL
| FLOAT_LITERAL
| ID
| TRUE
| FALSE
;
print_stmt : PRINT LPAREN ID RPAREN
| PRINT LPAREN STRING_LITERAL RPAREN
;
read_stmt : ID ASSIGN READ LPAREN RPAREN
;
condition_stmt : IF LPAREN expression RPAREN LBRACE statement RBRACE END
| IF LPAREN expression RPAREN LBRACE statement RBRACE ELSE LBRACE statement RBRACE END
;
while_stmt : WHILE LPAREN expression RPAREN LBRACE statement RBRACE
;
/* rules section end */
%%
/* auxiliary routines start */
int main(int argc, char *argv[])
{
// don't change this part
yyin = fopen(argv[1], "r" );
if(!yyparse())
printf("\nParsing complete\n");
else
printf("\nParsing failed\n");
fclose(yyin);
return 0;
}
int yyerror (char* msg)
{
printf("Line %d: %s near %s\n", yylineno, msg, yytext);
exit(1);
}
/* auxiliary routines end */
Test case
START
int X12
float ABC1
DDe = 7
while(QNn >0) ## this a Comment ##
{ RLk9999 = ACc - 2
CCC = true
}
if ( ACc ==5){ print ( " Inside IF inside Loop " ) } end }
print ( " Hello .. " )
END
Output
Line 3: syntax error near 12
It also gets the line number wrong.
I've been trying to see what I'm doing wrong for some time now and I'd really appreciate a second set of eyes.
You cannot use macros inside character classes. Inside a character class, pattern operators lose their special meaning, so when you write
alphanum [{alpha}{digit}]
you are defining a character class containing {, }, and the letters adghilpt. That doesn't match the 12 in X12.
Anyway, flex already has predefined sets of characters which you can include in your character classes:
* [:lower:] a-z
* [:upper:] A-Z
* [:alpha:] [:lower:][:upper:]
* [:digit:] 0-9
* [:alnum:] [:alpha:][:digit:]
Note that these can only be used inside a character class. So you could write your id pattern as
id [[:upper:]][[:alnum:]_]*
without the need for any other macros.
Please see the flex pattern documentation for more details.
In addition to #rici's answer, I've also noticed that my while_statement in the yacc file has only been set to accept only one statement in it's body

How to fix implicit declaration of function 'yyerror' problem

Hi I am making a program that does simple arithmetic operations using Lex and yacc, but I am having a problem with a specific error.
ex1.y
%{
#include <stdio.h>
int sym[26];
%}
%token INTEGER VARIABLE
%left '+' '-'
%left '*' '/' '%'
%%
program:
program statement '\n'
|
;
statement:
expr {printf("%d\n", $1);}
| VARIABLE '=' expr {sym[$1] = $3;}
;
expr:
INTEGER
| VARIABLE { $$ = sym[$1];}
| expr '+' expr { $$ = $1 + $3;}
| expr '-' expr { $$ = $1 - $3;}
| expr '*' expr { $$ = $1 * $3;}
| expr '/' expr { $$ = $1 / $3;}
| '(' expr ')' { $$ = $2;}
;
%%
main() { return yyparse();}
int yyerror(char *s){
fprintf(stderr,"%s\n",s);
return 0;
}
ex1.l
%{
#include <stdlib.h>
#include "y.tab.h"
%}
%%
/* variables */
[a-z] {
yylval = *yytext -'a';
return VARIABLE;
}
/* integers */
[0-9]+ {
yylval = atoi(yytext);
return INTEGER;
}
/* operators */
[-+()=/*\n] { return *yytext;}
/* skip whitespace */
[ \t] ;
/* anything else is an error */
. yyerror("invalid character");
%%
int yywrap (void){
return 1;
}
when I execute bellow instruction
$bison –d -y ex1.y
$lex ex1.l
$gcc lex.yy.c y.tab.c –o ex1
The following error occurs:
ex1.l: In function ‘yylex’:
ex1.l:28:1: warning: implicit declaration of function ‘yyerror’; did you mean ‘perror’? [-Wimplicit-function-declaration]
28 |
| ^
| perror
y.tab.c: In function ‘yyparse’:
y.tab.c:1227:16: warning: implicit declaration of function ‘yylex’ [-Wimplicit-function-declaration]
1227 | yychar = yylex ();
| ^~~~~
y.tab.c:1402:7: warning: implicit declaration of function ‘yyerror’; did you mean ‘yyerrok’? [-Wimplicit-function-declaration]
1402 | yyerror (YY_("syntax error"));
| ^~~~~~~
| yyerrok
I don't know what is wrong with my code. I would appreciate it if you could tell me how to fix the above error.
The version of bison you are using requires you to declare prototypes for yylex() and yyerror. These should go right after the #include <stdio.h> at the top of the file:
int yylex(void);
int yyerror(char* s);
I would use int yyerror(const char* s) as the prototype for yyerror, because it is more accurate, but if you do that you'll have to make the same change in the definition.
You use yyerror in your lex file, so you will have to add its declaration in that file as well.
main() hasn't been a valid prototype any time this century. Return types are required in function declarations, including main(). So I guess you are basing your code on a very old template. There are better starting points in the examples in the bison manual.
(And don't expect it to be easy to work with parser generators if you have no experience with C.)

How to Read Multiple Lines of input file for arithmetic yacc program?

I am new to compilers and learning to make calculator that inputs multiple line equations (one equation each line) from a .txt file. And I am facing the problem of segmentation fault.
YACC Code :
%{
#include <stdio.h>
#include <string.h>
#define YYSTYPE int /* the attribute type for Yacc's stack */
extern int yylval; /* defined by lex, holds attrib of cur token */
extern char yytext[]; /* defined by lex and holds most recent token */
extern FILE * yyin; /* defined by lex; lex reads from this file */
%}
%token NUM
%%
Begin : Line
| Begin Line
;
Line : Calc {printf("%s",$$); }
;
Calc : Expr {printf("Result = %d\n",$1);}
Expr : Fact '+' Expr { $$ = $1 + $3; }
| Fact '-' Expr { $$ = $1 - $3; }
| Fact '*' Expr { $$ = $1 * $3; }
| Fact '/' Expr { $$ = $1 / $3; }
| Fact { $$ = $1; }
| '-' Expr { $$ = -$2; }
;
Fact : '(' Expr ')' { $$ = $2; }
| Id { $$ = $1; }
;
Id : NUM { $$ = yylval; }
;
%%
void yyerror(char *mesg); /* this one is required by YACC */
main(int argc, char* *argv){
char ch;
if(argc != 2) {printf("useage: calc filename \n"); exit(1);}
if( !(yyin = fopen(argv[1],"r")) ){
printf("cannot open file\n");exit(1);
}
yyparse();
}
void yyerror(char *mesg){
printf("Bad Expression : %s\n", mesg);
exit(1); /* stop after the first error */
}
LEX Code :
%{
#include <stdio.h>
#include "y.tab.h"
int yylval; /*declared extern by yacc code. used to pass info to yacc*/
%}
letter [A-Za-z]
digit [0-9]
num ({digit})*
op "+"|"*"|"("|")"|"/"|"-"
ws [ \t\n]
other .
%%
{ws} { /* note, no return */ }
{num} { yylval = atoi(yytext); return NUM;}
{op} { return yytext[0];}
{other} { printf("bad%cbad%d\n",*yytext,*yytext); return '?'; }
%%
/* c functions called in the matching section could go here */
I am trying to print the expression along with result.
Thanks In Advance.
In your parser, you have:
Line : Calc {printf("%s",$$); }
Now $$ is the semantic value which the rule is computing, and you haven't assigned anything to it. So it would not be unreasonable to assume that it is undefined, which would be bad, but in fact it does have a value because of the default rule $$ = $1;. All the same, it would be much more readable to write
printf("%s", $1);
But that's not correct, is it? After all, you have
#define YYSTYPE int
so all semantic types are integers. But you're telling printf that $1 is a string (%s). printf will believe you, so it will go ahead and try to dereference the int as though it were a char*, with predictable results (i.e., a segfault).
You are probably using a compiler which is clever enough to notice the fact that you are trying to print an int with a %s format code. But either you haven't asked the compiler to help you or you are ignoring its advice.
Always compile with warnings enabled. If you are using gcc or clang, that means putting -Wall in the command line. (If you are using some other compiler, find out how to produce warnings. It will be documented.) And then read the warnings and fix them before trying to run the program.
There are several other errors and/or questionable practices in your code. Your grammar is inaccurate (why do you use fact as the left-hand operand of every operator?), and despite your comment, your lexical scanner ignores newline characters, so there is no way the parser can know whether expressions are one per line, two per line, or spread over multiple lines; that will make it hard to use the calculator as a command-line tool.
There is no need to define the lex macro digit; (f)lex recognizes the Posix character class [[:digit:]] (and others, documented here) automatically. Nor is it particularly useful to define the macro num. Overuse of lex macros makes your program harder to read; it is usually better to just write the patterns out in place:
[[:digit:]]+ { yylval = atoi(yytext); return NUM; }
which would be more readable and less work both for you and for anyone reading your code. (If your professor or tutor disagrees, I'd be happy to discuss the matter with them directly.)

yacc associativity of nonterminal symbols?

Say I have a grammar like this:
expr : expr '+' expr { $$ = operation('+', $1, $3); }
| expr '-' expr { $$ = operation('-', $1, $3); }
| expr '*' expr { $$ = operation('*', $1, $3); }
| expr '/' expr { $$ = operation('/', $1, $3); }
| num
;
Where each of those operators has a precedence attached and is marked as left associative.
Then I want to refactor my grammar such that:
op : '+' | '-' | '*' | '/' ;
expr : expr op expr { $$ = operation($2, $1, $3); }
| num
;
How does yacc (if even at all) determine the associativity and precedence of op in this case? Will it trace its way through all the possible precedences/associativities of +, -, * and / when evaluating op, or does defining an associativity for nonterminal symbols make no sense?
AFAIK, with precedence order for nonterminals, it uses the precedence of the rightmost terminal symbol, but I can't find any documentation on the associativity rules themselves for nonterminals.
The "normal" way to do this (as far as I'm aware) is to define a different expr type for each operator, that way you get very explicit control over what's happening.
Python's grammar is a good example of this: http://docs.python.org/reference/grammar.html.

Why am I getting conflicts: 1 shift/reduce

I'm new to bison and I'm getting a "conflicts: 1 shift/reduce" error. Can anyone shed some light on this?
Here's the y file.
test.y:
%{
#include <stdio.h>
#include <string.h>
#define YYERROR_VERBOSE
#define YYDEBUG 1
void yyerror(const char *str);
int yywrap();
%}
%union
{
int integer;
char *string;
}
%token <string> VAR_LOCAL
%token <integer> LIT_NUMBER
%token <string> LIT_STRING
%token WS_LINEBRK
//%token SYMB_EQL
%token SYMB_PLUS
%token SYMB_MINUS
%token SYMB_MUL
%token SYMB_DIV
%%
/*
// Sample input
num = 10
str = "this is a string"
*/
inputs: /* empty token */
| literal
| variable
| inputs stmt WS_LINEBRK
;
stmt: variable "=" exps
;
exps: variable op literal
| variable op variable
| literal op literal
| literal op variable
;
op: SYMB_PLUS | SYMB_MINUS | SYMB_MUL | SYMB_DIV ;
variable: VAR_LOCAL
{
printf("variable: %s\n", $1);
}
;
literal:
number | string
;
string: LIT_STRING
{
printf("word: %s\n", $1);
}
;
number: LIT_NUMBER
{
printf("number: %d\n", $1);
}
;
%%
void yyerror(const char *str)
{
fprintf(stderr,"error: %s\n",str);
}
int yywrap()
{
return 1;
}
main()
{
yyparse();
}
Here's the lex file
test.l:
%{
#include <stdio.h>
#include <stdlib.h>
#include "y.tab.h"
int line_no = 0;
%}
%%
[a-z][a-zA-Z0-9]* {
// local variable
yylval.string=strdup(yytext);
return VAR_LOCAL;
}
[0-9]+ {
//number literal
yylval.integer=atoi(yytext);
return LIT_NUMBER;
}
= return SYMB_EQL;
\+ return SYMB_PLUS;
\- return SYMB_MINUS;
\* return SYMB_MUL;
\/ return SYMB_DIV;
\"[-+\!\.a-zA-Z0-9' ]+\" {
// word literal
yylval.string=strdup(yytext);
return LIT_STRING;
}
\n {
// line break
printf("\n");
return WS_LINEBRK;
}
[ \t]+ /* ignore whitespace */;
%%
bison -r test.y will write a file test.output with a detailed description of the generated state machine that allows you to see what's going on - such as the state where the shift/reduce conflict occurs.
In your case, the problem is in the start state (corresponding to your start nonterminal, inputs). Say the first token is VAR_LOCAL. There's two things your parser could do:
It could match the variable case.
It could also match the inputs stmt WS_LINEBRK case: inputs matches the empty string (first line), and stmt matches variable "=" exps.
With the one token of lookahead that bison parsers use, there's no way to tell. You need to change your grammar to get rid of this case.
To fix the grammar, as Fabian has suggested, move the variable and literal to the end of exps from inputs
inputs:
| variable
| literal
exps:
...
| variable
| literal
That allows x= y,x="aliteral" syntax.
To allow for empty input lines, change the /* empty token */ rule to WS_LINEBREAK:
inputs: WS_LINEBRK
| stmt WS_LINEBRK
| inputs stmt WS_LINEBRK
;
On another note, since the scanner still looks for the SYMB_ EQUAL ; but the parser no longer defines it (its commented out), something needs to be done in order to compile. One option is to uncomment the %token definition and use SYMB_ EQUAL instead of the literal "=" in the parser .y file.