%{
#include<stdio.h>
#include<stdlib.h>
int regs[30];
%}
%token NUMBER LETTER
%left PLUS MINUS
%left MULT DIV
%%
prog: prog st | ; //when I remove this line the error goes
st : E {printf("ans %d", $1);}| LETTER '=' E {regs[$1] = $3; printf("variable contains %d",regs[$1]);};
E : E PLUS E{$$ = $1 + $3;} //addition
| E MINUS E{$$ = $1 - $3 ;} //subtraction
| MINUS E{$$ = -$2;}
| E MULT E{$$ = $1 * $3 ;}
| E DIV E { if($3)$$= $1 / $3; else yyerror("Divide by 0");}
/*|LBRACE E RBRACE{$$= $2;}
| RBRACE E LBRACE{yyerror("Wrong expression");} */
| NUMBER {$$ = $1;}
| LETTER {$$ = regs[$1];}
;
%%
int main(void)
{
printf("Enter Expression: ");
yyparse();
return 0;
}
int yyerror(char *msg)
{
printf("%s", msg);// printing error
exit(0);
}
I am not able to resolve the conflicts. Also I am getting a segmentation fault when I run it with some edits. I am using yacc and lex for the same.
The two shift-reduce conflicts are the result of the fact that you don't require any explicit separator between statements. Because of that, a = b - 3 could be interpreted as one statement or as two (a = b; - 3). The second interpretation may not seem very natural to you but it is easily derived by the grammar.
In addition, your use of unary minus leads to an incorrect parse of -2/3 as -(2/3) instead of (-2)/3. (You may or may not find this serious, since it has few semantic consequences with these particular operators.) This particular issue and a correct resolution is discussed in the bison manual, and in many many other internet resources.
Both of these explanations are made a bit more visible if you use the -v command line option to bison to produce a description of the parser. See Understanding your parser (again, in the bison manual).
Related
I am trying to create a calculator by using lex and yacc. However I can not understand how can I give operator precedence to this program? I could not find any information about it. Which code do I need to add to my project to calculate correctly?
Yacc file is:
%{
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
int yylex();
void yyerror(const char *s);
%}
%token INTEGER
%left '*' '/'
%left '+' '-'
%%
program:
program line | line
line:
expr ';' { printf("%d\n",$1); } ; | '\n'
expr:
expr '+' term { $$ = $1 + $3; }
| expr '-' term { $$ = $1 - $3; }
| expr '*' term { $$ = $1 * $3; }
| expr '/' term { $$ = $1 / $3; }
| expr '%' term { $$ = $1 % $3; }
| expr '^' term { $$ = $1 ; }
| term { $$ = $1; }
term:
INTEGER { $$ = $1; }
%%
void yyerror(const char *s) { fprintf(stderr,"%s\n",s); return ; }
int main(void) { /*yydebug=1;*/ yyparse(); return 0; }
Lex file is:
%{
#include <stdlib.h>
#include <stdio.h>
void yyerror(char*);
extern int yylval;
#include "calc.tab.h"
#include<time.h>
%}
%%
[ \t]+ ; //skip whitespace
[0-9]+ {yylval = atoi(yytext); return INTEGER;}
[-+*/%^] {return *yytext;}
\n {return *yytext;}
; {return *yytext;}
. {char msg[25]; sprintf(msg,"%s <%s>","invalid character",yytext); yyerror(msg);}
%left '*' '/'
%left '+' '-'
Precedence declarations are specified in the order from lowest precedence to highest. So in the above code you give * and / the lowest precedence level and + and - the highest. That's the opposite order of what you want, so you'll need to switch the order of these two lines. You'll also want to add the operators % and ^, which are currently part of your grammar, but not your precedence annotations.
With those changes, you'll now have specified the precedence you want, but it won't take effect yet. Why not? Because precedence annotations are used to resolve ambiguities, but your grammar isn't actually ambiguous.
The way you've written the grammar, with only the left operand of all operators being expr and the right operand being term, there's only one way to derive an expression like 2+4*2, namely by deriving 2+4 from expr and 2 from term (because deriving 4*2 from term would be impossible since term can only match a single number). So your grammar treats all operators as left-associative and having the same precedence and your precedence annotations aren't considered at all.
In order for the precedence annotations to be considered, you'll have to change your grammar, so that both operands of the operators are expr (e.g. expr '+' expr instead of expr '+' term). Written like that an expression like 2+4*2 could either be derived by deriving 2+4 from expr as the left operand and 2 from expr as the right operand or 2 as the left and 4*2 as the right and this ambiguity will be resolved using your precedence annotations.
I am new to compilers and learning to make calculator that inputs multiple line equations (one equation each line) from a .txt file. And I am facing the problem of segmentation fault.
YACC Code :
%{
#include <stdio.h>
#include <string.h>
#define YYSTYPE int /* the attribute type for Yacc's stack */
extern int yylval; /* defined by lex, holds attrib of cur token */
extern char yytext[]; /* defined by lex and holds most recent token */
extern FILE * yyin; /* defined by lex; lex reads from this file */
%}
%token NUM
%%
Begin : Line
| Begin Line
;
Line : Calc {printf("%s",$$); }
;
Calc : Expr {printf("Result = %d\n",$1);}
Expr : Fact '+' Expr { $$ = $1 + $3; }
| Fact '-' Expr { $$ = $1 - $3; }
| Fact '*' Expr { $$ = $1 * $3; }
| Fact '/' Expr { $$ = $1 / $3; }
| Fact { $$ = $1; }
| '-' Expr { $$ = -$2; }
;
Fact : '(' Expr ')' { $$ = $2; }
| Id { $$ = $1; }
;
Id : NUM { $$ = yylval; }
;
%%
void yyerror(char *mesg); /* this one is required by YACC */
main(int argc, char* *argv){
char ch;
if(argc != 2) {printf("useage: calc filename \n"); exit(1);}
if( !(yyin = fopen(argv[1],"r")) ){
printf("cannot open file\n");exit(1);
}
yyparse();
}
void yyerror(char *mesg){
printf("Bad Expression : %s\n", mesg);
exit(1); /* stop after the first error */
}
LEX Code :
%{
#include <stdio.h>
#include "y.tab.h"
int yylval; /*declared extern by yacc code. used to pass info to yacc*/
%}
letter [A-Za-z]
digit [0-9]
num ({digit})*
op "+"|"*"|"("|")"|"/"|"-"
ws [ \t\n]
other .
%%
{ws} { /* note, no return */ }
{num} { yylval = atoi(yytext); return NUM;}
{op} { return yytext[0];}
{other} { printf("bad%cbad%d\n",*yytext,*yytext); return '?'; }
%%
/* c functions called in the matching section could go here */
I am trying to print the expression along with result.
Thanks In Advance.
In your parser, you have:
Line : Calc {printf("%s",$$); }
Now $$ is the semantic value which the rule is computing, and you haven't assigned anything to it. So it would not be unreasonable to assume that it is undefined, which would be bad, but in fact it does have a value because of the default rule $$ = $1;. All the same, it would be much more readable to write
printf("%s", $1);
But that's not correct, is it? After all, you have
#define YYSTYPE int
so all semantic types are integers. But you're telling printf that $1 is a string (%s). printf will believe you, so it will go ahead and try to dereference the int as though it were a char*, with predictable results (i.e., a segfault).
You are probably using a compiler which is clever enough to notice the fact that you are trying to print an int with a %s format code. But either you haven't asked the compiler to help you or you are ignoring its advice.
Always compile with warnings enabled. If you are using gcc or clang, that means putting -Wall in the command line. (If you are using some other compiler, find out how to produce warnings. It will be documented.) And then read the warnings and fix them before trying to run the program.
There are several other errors and/or questionable practices in your code. Your grammar is inaccurate (why do you use fact as the left-hand operand of every operator?), and despite your comment, your lexical scanner ignores newline characters, so there is no way the parser can know whether expressions are one per line, two per line, or spread over multiple lines; that will make it hard to use the calculator as a command-line tool.
There is no need to define the lex macro digit; (f)lex recognizes the Posix character class [[:digit:]] (and others, documented here) automatically. Nor is it particularly useful to define the macro num. Overuse of lex macros makes your program harder to read; it is usually better to just write the patterns out in place:
[[:digit:]]+ { yylval = atoi(yytext); return NUM; }
which would be more readable and less work both for you and for anyone reading your code. (If your professor or tutor disagrees, I'd be happy to discuss the matter with them directly.)
The yacc code:
%{
#include<stdio.h>
#include<string.h>
%}
%union {
char* dval;
}
%token <dval> NUM VAR
%type <dval> E P
%left '+' '-'
%left '*' '/'
%%
statement : P {printf("\nt = %s\n \n",$1);}
;
P: E
;
E : E '+' E {strcpy($$,gencode($1,"+",$3));}
| E '-' E {strcpy($$,gencode($1,"-",$3));}
| E '*' E {strcpy($$,gencode($1,"*",$3));}
| E '/' E {strcpy($$,gencode($1,"/",$3));}
| '(' E ')' {strcpy($$,$2);}
| NUM {strcpy($$,$1);}
| VAR {strcpy($$,$1);}
;
%%
**The lex code:**
%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include"y.tab.h"
int n=0;
char *ch="t";
%}
%%
[0-9]+ {strcpy(yylval.dval,yytext); return NUM;}
[a-z]+ {strcpy(yylval.dval,yytext); return VAR;}
\n {return 0;}
. {return yytext[0];}
%%
void yyerror(char* str)
{
printf("\n%s",str);
}
char* gencode(char *first,char *op,char *second)
{
char *t;
char x[5];
t=(char*) malloc(sizeof(char)*5);
strcpy(t, ch);
itoa(n, x);
strcat(t, x);
printf("\n%s = %s %s %s\n",t,first,op,second);
n++;
t[strlen(t)]='\0';
return t;
}
main()
{
yyparse();
return 0;
}
For some reason gcc outputs the error: warning: passing argument 2 of ‘strcpy’ makes pointer from integer without a cast [enabled by default].
Where as the second argument of strcpy is the function gencode which returns a char* and not an integer.
You need to declare gencode in the beginning of the yacc file. Otherwise, the compiler sees it as undefined and assumes it returns an int.
You need to declare gencode in the .y file, either in a header you #include or directly in the .y file.
You have a much bigger problem, in that you declare dval (the type used for most rules) as a char *, but then you never initialize it to anything before writing to it with strcpy, so your code will crash (or worse) when strcpy tries to write to random addresses. You can fix this by replacing the strcpy calls in the lex file with yylval.dval = strdup(yytext) and the calls in the yacc file with just assignments to $$ (so $$ = gencode(... or whatever)
I have this yacc file
%error-verbose
%token END
%token ID
%token INT
%token IF
%token ELSE
%token WHILE
%token FOR
%token BREAK
%token CONTINUE
%token RETURN
%token SEM
%token LPAR
%token RPAR
%token PLUS
%token MINUS
%token MULT
%token DIV
%token MOD
%token GT
%token LT
%token GTE /* >= */
%token LTE /* <= */
%token EQUAL /* == */
%token NEQUAL /* != */
%token AND
%token OR
%token EQ
%token COM
%token PRINT
%token READ
%token FLOAT
%token LABR
%token RABR
%token NUM
%token STR
/*
* precedentce tabLTE
*/
%right EQ PE ME TE DE RE
%left OR
%left AND
%left EQUAL NEQUAL
%left LT GT GTE LTE
%left PLUS MINUS
%left MULT DIV MOD
%right PP MM
%{
#include<stdio.h>
extern char *yyname;
extern char *yytext;
extern int yylineno;
void yyerror(char const *msg)
{
fprintf(stderr,"%s:%d:%s\n", yyname,yylineno,msg);
}
%}
%%
program
: definitions
;
definitions
: definition
| definitions definition
;
definition:
| declaration
;
declarations
: /* null */
| declarations declaration
;
declaration
: INT declarator_list SEM
;
declarator_list
: ID
| declarator_list COM ID
;
statements
: /* null */
| statements statement
;
statement
: expression SEM
| SEM /* null statement */
| if_prefix statement
| if_prefix statement ELSE statement
| loop_prefix statement
;
if_prefix
: IF LPAR expression RPAR
;
loop_prefix
: WHILE LPAR expression RPAR
;
expression
: binary
| expression COM binary
;
binary
: ID
| LPAR expression RPAR
| ID LPAR optional_argument_list RPAR
| binary PLUS binary
| binary MINUS binary
| binary MULT binary
| binary DIV binary
| binary MOD binary
| binary GT binary
| binary LT binary
| binary GTE binary
| binary LTE binary
| binary EQUAL binary
| binary NEQUAL binary
| binary AND binary
| binary OR binary
| ID EQ binary
| ID PE binary
| ID ME binary
| ID TE binary
| ID DE binary
| ID RE binary
;
optional_argument_list
: /* no actual arguments */
| argument_list
;
argument_list
: binary
| argument_list COM binary
;
%%
#include <stdlib.h>
extern FILE *yyin;
int main(int argc, char **argv)
{
int ok;
if (argc != 2) {
fprintf(stderr, "%s: Wrong arguments\n", argv[0]);
return EXIT_FAILURE;
}
yyname = argv[1];
if ((yyin = fopen(yyname, "r")) == NULL) {
fprintf(stderr, "%s: %s: Invalid file\n", argv[0], argv[1]);
return EXIT_FAILURE;
}
return (yyparse() ? EXIT_SUCCESS : EXIT_FAILURE);
}
when the input is
int x;
everything works fine, but when the input is something other than "INT"
lets say FOR it throws an error:
unexpected FOR expecting INT or $end
so it's actually reading only the first rule from the set of rules..
Besides, it keeps showing useless non terminals and terminals warning when bison command is applied.
What is wrong with this yacc file?
The trouble is that the rules:
program
: definitions
;
definitions
: definition
| definitions definition
;
definition:
| declaration
;
declarations
: /* null */
| declarations declaration
;
declaration
: INT declarator_list SEM
;
only allow declarations through; nothing allows statements as part of a program. Your FOR is not a declaration, so the grammar rejects it.
The 'useless non-terminals' warning is trying to tell you:
You have goofed big time; there is a bug in your grammar. You have tried to write rules for some production, but you never let it be recognized, so there was no point in adding it.
Or thereabouts...
Maybe you need:
program
: definitions statements
;
Or maybe you need to allow functions as a definition too, and then the FOR statement will be part of the body of a function.
Asking my LL oracle about your amended grammar:
Out of 15 non-terminals, 14 are reachable, 1 are unreachable:
'declarations'
Circular symbols:
definitions
definitions
The complaint about circular symbols means that 'definitions' can derive itself. For example, 'definitions' can produce 'definitions definition', but 'definition' is nullable, so 'definitions' can produce just itself, kinduva infinite loop few parser generators care to deal with in any sensible way. Looking at it another way, you've defined 'definitions' to be a list of nullable symbols, so how many epsilons would you like to match? How about infinity? :-)
This is a drawback of the yacc/bison style of trying to produce some parser even if there are problems in the grammar; quite convenient if you know exactly what you're doing, but quite confusing otherwise.
But, to the narrow point of what to do about the grammar circularity that's giving you a very unuseful (but by gum compilable!) parser. How about you allow 'definitions' be nullable but not 'definition'? IOW:
definitions : | definitions definition ;
definition : declaration ;
Try to not stack nullability on top of nullability. So when you later change to:
definition : declarations ;
Don't make 'declarations' nullable (that's already handled by 'definitions' being nullable). Instead, change it to:
declarations : declaration | declarations declaration ;
That should get you past the immediate problem and onto some new ones :-)
I am trying to learn lex and yacc.
I am struggling to understand how to do the grammar rules.
My file has already been defined like:
fd 3x00
bk 100
setc 100
int xy3 fd 10 rt 90
rt
My output with the printf and printing to a file went something like this:
Keyword: fd
Illegal: 3x00
Keyword: bk
Keyword: setc
Number: 100
Keyword: int
Id: xy3
Keyword: fd
Number: 10
Keyword: rt
Number: 90
Here is my lex file - im only going to show part of it to keep this post as small as possible
fd {return FD; }
[0-9]+[a-z]+[0-9]+ {} // this is the illegal entry 3x00
[\r\t\n]+ {}
bk {return BK;}
setc {return SETC;}
[-+]?[0-9]+ {yyval.ival = atoi(yytext); return NUMBER;}
int {fprintf(yyout, "%s\n", yytext);}
xy3 {fprintf(yyout, "%s\n", yytext);}
fd[0-9]+ {fprintf(yyout, "%s\n", yytext);}
%%
Here is my yacc file. It is not complete since i dont know how to finish it.
%{
#include <ctype.h>
#include <stdio.h>
%}
%token NUMBER
%token ID
%token FD
%token BK
%token SETC
%token KEYWORD
%%
%%
main()
{
yyparse()
}
I am not sure how i would write the grammar rules for these.
Can i make my own name for the expression?
can anyone help me with one example so i can see how to finish it?
The rules should be like this:
statement: command arg {printf("Keyword: %s\n", $1);};
command: KEYWORD {$$ = $1;}
|FD {$$ = $1;}
|BK {$$ = $1;};
arg: NUMBER {printf("Number: %s\n", $1);}
|ID {printf("Id: %s\n", $1);};
That means, you should define the syntactical rules in this way. Separate alternative definitions by |, and write the desired actions in a { } block for each rule. Finish each rule with a ;. When you refer to the the tokens, use $n where n is the position of the token in the rule. The rule header can be referred to using $$.