YACC Rules not reduced - yacc

This is my code calc.y. I keep getting the error:
yacc: 1 rule never reduced
yacc: 3 reduce/reduce conflicts
not really sure what this means
Ive done some research in other places but I am now lost. Im guessing the rules being referred to is program and statement but even so... what does the reduce rule mean?
%{
#include <stdio.h>
FILE *outfile;
int yyline = 1;
int yycolumn = 1;
%}
%union{
int nw;
struct{
int v;
char s[1000];
}attr;
}
%token SEMInumber
%token LPARENnumber
%token <nw> ICONSTnumber
%token BEGINnumber
%token PROGRAMnumber
%token MINUSnumber
%token TIMESnumber
%token <nw> VARnumber
%token INTnumber
%token EOFnumber
%token COMMAnumber
%token RPARENnumber
%token <nw>IDnumber
%token ENDnumber
%token ISnumber
%token PLUSnumber
%token DIVnumber
%token PRINTnumber
%token EQnumber
%type <attr> exp
%type <attr> term
%type <attr> factor
%%
program: PROGRAMnumber IDnumber ISnumber compstate
;
compstate: BEGINnumber {print_header();} statement ENDnumber{print_end();}
| BEGINnumber {print_header();} statement SEMInumber statement ENDnumber{print_end();}
;
statement: IDnumber EQnumber exp
| PRINTnumber exp
| declaration
;
declaration: VARnumber IDnumber
| VARnumber IDnumber COMMAnumber IDnumber
;
exp: term {$$.v = $1.v; strcpy($$.s, $1.s);}
| exp PLUSnumber term {$$.v = $1.v + $3.v; sprintf($$.s, "(%s) + (%s)", $1. s, $3.s);}
| exp MINUSnumber term {$$.v = $1.v - $3.v; sprintf($$.s, "(%s) - (%s)", $1. s, $3.s);}
;
term: factor {$$.v = $1.v; strcpy($$.s, $1.s);}
| term TIMESnumber factor {$$.v = $1.v * $3.v; sprintf($$.s, "(%s) * (%s)", $1.s, $3.s);}
| term DIVnumber factor {$$.v = $1.v / $3.v; sprintf($$.s, "(%s) / (%s)", $1.s, $3.s);}
;
factor: ICONSTnumber {$$.v = $1; sprintf($$.s, "%d", $1);}
| IDnumber {$$.v = $1.v; strcpy($$.s, $1.s);}
| LPARENnumber exp RPARENnumber {$$.v = $2.v; strcpy($$.s, $2.s);}
;
%%
int main()
{
if(!yyparse())
{
printf("accept\n");
}
else
printf("reject\n");
}
void print_header() {}
void print_end(){}
void yyerror(const char *str)
{
printf("yyerror: %s at line %d\n", str, yyline);
}

When compstate shifts the BEGINnumber token, two inner rules for the mid rule action {print_header();} can be both reduced resulting in a R/R conflict. Youe can replace
compstate: BEGINnumber {print_header();} statement ENDnumber{print_end();}
| BEGINnumber {print_header();} statement SEMInumber statement
ENDnumber{print_end();}
;
with, for example
begin_number:
BEGINnumber { print_header(); }
compstate: begin_number statement ENDnumber{print_end();}
| begin_number statement SEMInumber statement
ENDnumber{print_end();}
;
to solve the conflict.

Informative Messages
%s: %d rules never reduced
Some rules are never used, either because they weren't used in the grammar or because they
were on the losing end of shift/reduce or reduce/reduce conflicts. Either change the
grammar to use the rules or remove them.

Related

Yacc parser not detecting my language well

I am new to yacc and I am trying to define some rules for my language.
I have written a grammar "well" and it runs and executes without an error but for some reason, it doesn't do what it is supposed to do.
mylex.l
%{
#include <stdio.h>
#include "myyacc.tab.h"
extern int yyval;
%}
/* KEEP TRACK OF LINE NUMBER*/
%option yylineno
uppercase [A-Z]
lowercase [a-z]
alpha [{uppercase}{lowercase}]
digit [0-9]
alphanum [{alpha}{digit}]
id uppercase({alphanum}|_)*
int_literal [0-9]+
float_literal [0-9]+\.[0-9]+
string_literal \"[^\"]*\"
comment (##)(.)*(##)
%%
"int" {return INT;}
"float" {return FLOAT;}
"boolean" {return BOOLEAN;}
"if" {return IF;}
"else" {return ELSE;}
"end" {return END;}
"true" {return TRUE;}
"false" {return FALSE;}
"read" {return READ;}
"print" {return PRINT;}
"while" {return WHILE;}
"START" {return START;}
"END" {return END;}
"+" {return ADD;}
"-" {return SUB;}
"*" {return MUL;}
"/" {return DIV;}
"&&" {return LOG_AND;}
"||" {return LOG_OR;}
"!" {return LOG_NOT;}
"==" {return EQ;}
"<>" {return NEQ;}
"<" {return LT;}
"<=" {return LEQ;}
">" {return GT;}
">=" {return GEQ;}
"=" {return ASSIGN;}
"(" {return LPAREN;}
")" {return RPAREN;}
"{" {return LBRACE;}
"}" {return RBRACE;}
{int_literal} {return INT_LITERAL;}
{float_literal} {return FLOAT_LITERAL;}
{string_literal} {return STRING_LITERAL;}
{id} {return ID;}
{comment} { ; }
%%
int yywrap() {
return 1;
}
myyacc.y
%{
#include <stdio.h>
#include <stdlib.h>
extern int yylineno;
extern FILE* yyin;
extern int yyerror (char* msg);
extern char * yytext;
%}
/* definitions section start */
%token INT FLOAT BOOLEAN IF ELSE END TRUE FALSE READ PRINT WHILE START
%token INT_LITERAL FLOAT_LITERAL STRING_LITERAL ID ERROR
%right ASSIGN
%right LOG_NOT
%left MUL DIV
%left ADD SUB
%left LPAREN RPAREN
%left LBRACE RBRACE
%left LT LEQ GT GEQ
%left EQ NEQ
%left LOG_AND
%left LOG_OR
%start program
/* definitions section end */
%%
/* rules section start */
program : START statements END {printf("No syntax errors detected")};
statements : statements statement
| statement
;
statement : dec_stmt
| assignment_stmt
| print_stmt
| read_stmt
| condition_stmt
| while_stmt
;
dec_stmt : type ID
;
type : INT
| FLOAT
| BOOLEAN
;
assignment_stmt : ID ASSIGN expression
;
expression : exp EQ exp
| exp NEQ exp
| exp LT exp
| exp LEQ exp
| exp GT exp
| exp GEQ exp
| exp
;
exp : exp MUL exp
| exp DIV exp
| exp ADD exp
| exp SUB exp
| exp LOG_AND exp
| exp LOG_OR exp
| LOG_NOT exp
| LPAREN exp RPAREN
| INT_LITERAL
| FLOAT_LITERAL
| ID
| TRUE
| FALSE
;
print_stmt : PRINT LPAREN ID RPAREN
| PRINT LPAREN STRING_LITERAL RPAREN
;
read_stmt : ID ASSIGN READ LPAREN RPAREN
;
condition_stmt : IF LPAREN expression RPAREN LBRACE statement RBRACE END
| IF LPAREN expression RPAREN LBRACE statement RBRACE ELSE LBRACE statement RBRACE END
;
while_stmt : WHILE LPAREN expression RPAREN LBRACE statement RBRACE
;
/* rules section end */
%%
/* auxiliary routines start */
int main(int argc, char *argv[])
{
// don't change this part
yyin = fopen(argv[1], "r" );
if(!yyparse())
printf("\nParsing complete\n");
else
printf("\nParsing failed\n");
fclose(yyin);
return 0;
}
int yyerror (char* msg)
{
printf("Line %d: %s near %s\n", yylineno, msg, yytext);
exit(1);
}
/* auxiliary routines end */
Test case
START
int X12
float ABC1
DDe = 7
while(QNn >0) ## this a Comment ##
{ RLk9999 = ACc - 2
CCC = true
}
if ( ACc ==5){ print ( " Inside IF inside Loop " ) } end }
print ( " Hello .. " )
END
Output
Line 3: syntax error near 12
It also gets the line number wrong.
I've been trying to see what I'm doing wrong for some time now and I'd really appreciate a second set of eyes.
You cannot use macros inside character classes. Inside a character class, pattern operators lose their special meaning, so when you write
alphanum [{alpha}{digit}]
you are defining a character class containing {, }, and the letters adghilpt. That doesn't match the 12 in X12.
Anyway, flex already has predefined sets of characters which you can include in your character classes:
* [:lower:] a-z
* [:upper:] A-Z
* [:alpha:] [:lower:][:upper:]
* [:digit:] 0-9
* [:alnum:] [:alpha:][:digit:]
Note that these can only be used inside a character class. So you could write your id pattern as
id [[:upper:]][[:alnum:]_]*
without the need for any other macros.
Please see the flex pattern documentation for more details.
In addition to #rici's answer, I've also noticed that my while_statement in the yacc file has only been set to accept only one statement in it's body

reduce/ reduce conflict - yacc (identifier)

I'm trying to write parser for c simple. The error I get is : "reduce /reduce conflict"
1) Rule exp need to get to IDENTIFIER
for example: a-b(identifier-identifier)-> exp-exp->exp
2) Rule ident_list also need to get to IDENTIFIER . This rule is used for variable declaration.
for example: a,b,c(identifier,identifier,identifier)-> ident_list .
Therefor i need both of rules, ident_list and exp to go to ->IDENTIFIER .This is causing the "reduce /reduce conflict". Any idea how to solve this?
-------------*yac code*:---------------
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node
{
char* token;
struct node* left;
struct node* right;
char* type;
}node;
typedef struct obj
{
char* type;
char* name;
struct obj* next;
}obj;
typedef struct symTbl
{
struct obj* first;
struct obj* last;
int size;
}symTbl;
node* mknode (char* token,node* left, node* right,char* Type);
void put(symTbl* tbl, char* type,char* name);
void printtree(node* tree);
#define YYSTYPE struct node*
%}
%start s
%token WHILELOOP
%token STATIF
%token ELSE
%token MAIN
%token POINTERERR
%token COMMENT
%token POINTER
%token GREATEREQUAL
%token LESSEREQUAL
%token DBLAND
%token GREATER
%token LESSER
%token POWER
%token MULTIPLY
%token MINUS
%token PLUS
%token AND
%token OR
%token NOT
%token NOTEQUAL
%token CHARERROR
%token STRINGERROR
%token POINTER
%token INTEGER
%token BOOLEAN
%token DEVIDE
%token ASSIGN
%token EQUAL
%token TYPE
%token IDENTIFIER
%token HEX IF
%token LITERCHAR
%token OCTAL
%token BINARYINT
%token LTRLSTRING
%token COMMA COLON SEMICOLON VAR RETURN RPARENC LPARENC
%left COMMA LPAREN RPAREN ELSE
%left PLUS IF WHILELOOP TYPE
%left MINUS DEVIDE RPARENC LPARENC
%left MULTIPLY EQUAL NOTEQUAL OR AND LESSEREQUAL GREATEREQUAL GREATER LESSER
%%
s:progrem{printtree($1);}
progrem:exp|var_dec|if_stnt|ident_list|bool_exp| mul_ident;
exp: exp PLUS exp{$$=mknode("+",$1,$3,"arit");}
|exp MINUS exp {$$=mknode("-",$1,$3,"arit");}
|exp DEVIDE exp {$$=mknode("/",$1,$3,"arit");}
|exp MULTIPLY exp {$$=mknode("*",$1,$3,"arit");}
|MINUS exp {$$=mknode("-",$2,NULL,"arit");}
|IDENTIFIER {$$=mknode(yytext,NULL,NULL,"id");}
|LPAREN exp RPAREN {$$= $2;};
bool_exp : exp EQUAL exp {$$=mknode("=",$1,$3,"bool");}
|exp NOTEQUAL exp {$$=mknode("!=",$1,$3,"bool");}
|exp OR exp {$$=mknode("||",$1,$3,"bool");}
|exp AND exp {$$=mknode("&&",$1,$3,"bool");}
|exp GREATEREQUAL exp {$$=mknode(">=",$1,$3,"bool");}
|exp LESSER exp {$$=mknode("<",$1,$3,"bool");}
|exp LESSEREQUAL exp {$$=mknode("<=",$1,$3,"bool");}
|exp GREATER exp {$$=mknode(">",$1,$3,"bool");}
|LPAREN bool_exp RPAREN {$$= $2;};
var_dec:ident_list COLON ident_list {$$=mknode(":",$1,$3,"dec");};
ident_list: ident_list COMMA ident_list {$$=mknode(",", $1, $3,"id_list");}
|ident_list TYPE ident_list{$$=mknode(yytext,$1,NULL,"id");}
|VAR {$$= mknode("var",NULL,NULL,"id");}
|SEMICOLON {$$= mknode(";",NULL,NULL,"id");};
|IDENTIFIER {$$=$1;}
if_stnt:IF LPAREN bool_exp RPAREN {$$=mknode("if",$3,NULL,"if_state");};
%%
#include "lex.yy.c"
main()
{
yyin=fopen("text.txt","r");
return yyparse();
}
node* mknode( char* token,node*left,node* right,char* Type)
{
node* newnode=(node*)malloc(sizeof(node));
char* newstr=(char*)malloc(sizeof(token)+1);
char* type = (char*)malloc (sizeof(Type)+1);
type[sizeof(token)]='\0';
newstr[sizeof(token)]='\0';
strcpy(newstr,token);
strcpy(type,Type);
newnode->left=left;
newnode->type=type;
newnode->right=right;
newnode->token=newstr;
return newnode;
}
void put(symTbl* tbl, char* type,char* name)
{
symTbl* tbl1=(symTbl*)malloc(sizeof(symTbl));
int size = tbl->size;
obj* newobj=(obj*)malloc(sizeof(obj));
newobj= tbl->first;
int i;
for( i =0; i<size; i++){
if(newobj->name == name){
yyerror();
newobj=newobj->next;
}
}
tbl->last->next=newobj;
tbl->last=tbl->last->next;
}
void printtree(node* tree)
{
printf("%s",tree->token);
if(tree->left)printtree(tree->left);
if(tree->right)printtree(tree->right);
}
int yyerror()
{
printf("bla bla\n");
return 0;
}
--------lex code:------------
minus "-"
colon ":"
semicolon ";"
space " "
parcent "%"
backslash "/"
charptr charptr
plus "+"
not "!"
notequal "!="
or "||"
and "&&"
multiply "*"
power "^"
dbland "&"
greater ">"
lesser "<"
type boolean|string|char|integer|intptr|charptr
return "return"
greaterequal {greater}{assign}
lesserequal {lesser}{assign}
singleQuotes \'
charERR {singleQuotes}+(({digit})+)*(({letter})+)*{singleQuotes}+
stringERR {doubleQuotes}{doubleQuotes}+|{doubleQuotes}
doubleQuotes \"
var "var"{space}*
octalDigit [1-7]
decimal {digit}|{digitNoZero}{digit}+
digitNoZero[1-9]
octal "0"{octalDigit}("0")*{octalDigit}*
integer {binaryInt}|{hex}|{octal}|{decimal}
binaryInt ("0"|"1")+"b"
hexLetter A|B|C|D|E|F
hex 0(x|X){digit}+{hexLetter}*|0(x|X){digit}*{hexLetter}+
literBool true|false
letter [a-zA-Z]
letters {letter}+
digit [0-9]
low "_"
equal "=="
assign "="
devide "/"
lparen "("
rparen ")"
lparenc "{"
rparenc "}"
identifier {letter}+{digit}*{letter}+{space}*|{space}*{letter}{space}*
literChar {singleQuotes}{letter}{singleQuotes}
ltrlString {doubleQuotes}{letters}*{decimal}*{hex}*{octal}*{binaryInt}*{dbland}*{devide}*{assign}*{equal}*{greater}*{lesser}*{greaterequal}*{lesserequal}*{mi$
pointer {colon}{space}{charptr}|"="{space}"&"{identifier}
comment {backslash}{parcent}{space}*({letters}*{space}*{identifier}*{space}*{decimal}*{space}*{hex}*{space}*{octal}*{space}*{binaryInt}*{space}*{dbland}*{dev$
pointerErr "&"{identifier}|{charptr}
statif "if"{space}*
ELSE "else"{space}*
comma ","
whileLoop "while"{space}*
main "main"
%%
{lparen} return LPAREN;
{rparen} return RPAREN;
{colon} return COLON;
{type} return TYPE;
{semicolon} return SEMICOLON;
{var} return VAR;
{whileLoop} return WHILELOOP;
{ELSE} return ELSE;
{statif} return IF;
{pointerErr} return POINTERERR;
{comment} return COMMENT;
{pointer} return POINTER;
{literChar} return LITERCHAR;
{charERR} return CHARERROR;
{stringERR} return STRINGERROR;
{ltrlString} return LTRLSTRING;
{binaryInt} return BINARYINT;
{octal} return OCTAL;
{hex} return HEX;
{return} return RETURN;
{greaterequal} return GREATEREQUAL;
{lesserequal} return LESSEREQUAL;
{dbland} return DBLAND;
{greater} return GREATER;
{lesser} return LESSER;
{lparenc} return LPARENC;
{rparenc} return RPARENC;
{power} return POWER;
{multiply} return MULTIPLY;
{plus} return PLUS;
{or} return OR;
{and} return AND;
{comma} return COMMA;
{not} return NOT;
{main} return MAIN;
{notequal} return NOTEQUAL;
{minus} return MINUS;
{integer} return INTEGER;
{literBool} return BOOLEAN;
{identifier} return IDENTIFIER;
{equal} return EQUAL;
{assign} return ASSIGN;
{devide} return DEVIDE;
. return yytext[0];
You're saying that a program can be either an exp or an ident_list, among other things. This is not particularly sensible to start with, and I suppose your intention is to do some sort of debugging. But it is not going to work because a single identifier could be an expression or a list containing exactly one identifier, and there is no obvious way for the parser to guess which one you meant. That makes your grammar ambiguous.
What yacc/bison does in this case is to choose whichever production comes earlier in the grammar file. That's not a very precise way of defining a preference, so it warns you about the conflict. But it makes it possible for you to express your preference.
Otherwise, you'd have to eliminate the ambiguity. If, for example, you decided that a single identifier should be an exp, you could insist that a top-level identifier list have at least two identifiers.

Bison:syntax error at the end of parsing

Hello this is my bison grammar file for a mini-programming language:
%{
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "projectbison.tab.h"
void yyerror(char const *);
extern FILE *yyin;
extern FILE *yyout;
extern int yylval;
extern int yyparse(void);
extern int n;
int errNum = 0;
int forNum = 0;
%}
%left PLUS MINUS
%left MULT DIV MOD
%nonassoc EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL
%token INTEGER BOOLEAN STRING VOID
%token ID
%token AND
%token BEGINP
%token ENDP
%token EXTERN
%token COMMA
%token EQ
%token RETURN1
%token IF1 ELSE1 WHILE1 FOR1 DO1
%token LOR LAND LNOT
%token TRUE FALSE
%token EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL
%token LB1 RB1
%token LCB1 RCB1
%token SEMIC
%token NEWLINE
%token PLUS MINUS
%token MULT DIV MOD
%token DIGIT STRING1
%start program
%%
/*50*/
program : external-decl program-header defin-field command-field
;
external-decl : external-decl external-prototype
|
;
external-prototype : EXTERN prototype-func NEWLINE
;
program-header : VOID ID LB1 RB1 NEWLINE
;
defin-field : defin-field definition
|
;
definition : variable-defin
| func-defin
| prototype-func
;
variable-defin : data-type var-list SEMIC newline
;
data-type : INTEGER
| BOOLEAN
| STRING
;
var-list : ID extra-ids
;
extra-ids : COMMA var-list
|
;
func-defin : func-header defin-field command-field
;
prototype-func : func-header SEMIC
;
func-header : data-type ID LB1 lists RB1 newline
;
lists: list-typ-param
|
;
list-typ-param : typical-param typical-params
;
typical-params : COMMA list-typ-param
|
;
typical-param : data-type AND ID
;
command-field : BEGINP commands newline ENDP newline
;
commands : commands newline command
|
;
command : simple-command SEMIC
| struct-command
| complex-command
;
complex-command : LCB1 newline command newline RCB1
;
struct-command : if-command
| while-command
| for-command
;
simple-command : assign
| func-call
| return-command
| null-command
;
if-command : IF1 LB1 gen-expr RB1 newline command else-clause
;
else-clause: ELSE1 newline command
;
while-command : WHILE1 LB1 gen-expr RB1 DO1 newline RCB1 command LCB1
;
for-command : FOR1 LB1 conditions RB1 newline RCB1 command LCB1
;
conditions : condition SEMIC condition SEMIC condition SEMIC
;
condition : gen-expr
|
;
assign : ID EQ gen-expr
;
func-call : ID LB1 real-params-list RB1
| ID LB1 RB1
;
real-params-list : real-param real-params
;
real-params : COMMA real-param real-params
|
;
real-param : gen-expr
;
return-command : RETURN1 gen-expr
;
null-command :
;
gen-expr : gen-terms gen-term
;
gen-terms : gen-expr LOR
|
;
gen-term : gen-factors gen-factor
;
gen-factors : gen-term LAND
|
;
gen-factor : LNOT first-gen-factor
| first-gen-factor
;
first-gen-factor : simple-expr comparison
| simple-expr
;
comparison : compare-operator simple-expr
;
compare-operator : EQUAL
| NEQUAL
| LESS
| GREATER
| LEQUAL
| GEQUAL
;
simple-expr : expresion simple-term
;
expresion : simple-expr PLUS
|simple-expr MINUS
|
;
simple-term : mul-expr simple-parag
;
mul-expr: simple-term MULT
| simple-term DIV
| simple-term MOD
|
;
simple-parag : simple-prot-oros
| MINUS simple-prot-oros
;
simple-prot-oros : ID
| constant
| func-call
| LB1 gen-expr RB1
;
constant : DIGIT
| STRING1
| TRUE
| FALSE
;
newline:NEWLINE
|
;
%%
void yyerror(char const *msg)
{
errNum++;
fprintf(stderr, "%s\n", msg);
}
int main(int argc, char **argv)
{
++argv;
--argc;
if ( argc > 0 )
{yyin= fopen( argv[0], "r" ); }
else
{yyin = stdin;
yyout = fopen ( "output", "w" );}
int a = yyparse();
if(a==0)
{printf("Done parsing\n");}
else
{printf("Yparxei lathos sti grammi: %d\n", n);}
printf("Estimated number of errors: %d\n", errNum);
return 0;
}
for a simple input like this :
void main()
integer k;
boolean l;
begin
aek=32;
end
i get the following :
$ ./MyParser.exe file2.txt
void , id ,left bracket , right bracket
integer , id ,semicolon
boolean , id ,semicolon
BEGIN PROGRAM
id ,equals , digit ,semicolon
END PROGRAM
syntax error
Yparxei lathos sti grammi: 8
Estimated number of errors: 1
And whatever change i make to the input file i get a syntax error at the end....Why do i get this and what can i do??thanks a lot in advance!here is the flex file just in case someone needs it :
%{
#include "projectbison.tab.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int n=1;
%}
%option noyywrap
digit [0-9]+
id [a-zA-Z][a-zA-Z0-9]*
%%
"(" {printf("left bracket , "); return LB1;}
")" {printf("right bracket\n"); return RB1;}
"{" {printf("left curly bracket , "); return LCB1;}
"}" {printf("right curly bracket\n"); return RCB1;}
"==" {printf("isotita ,"); return EQUAL;}
"!=" {printf("diafora ,"); return NEQUAL;}
"<" {printf("less_than ,"); return LESS;}
">" {printf("greater_than ,"); return GREATER;}
"<=" {printf("less_eq ,"); return LEQUAL;}
">=" {printf("greater_eq ,"); return GEQUAL;}
"||" {printf("lor\n"); return LOR;}
"&&" {printf("land\n"); return LAND;}
"&" {printf("and ,"); return AND;}
"!" {printf("lnot ,"); return LNOT;}
"+" {printf("plus ,"); return PLUS; }
"-" {printf("minus ,"); return MINUS;}
"*" {printf("multiply ,"); return MULT;}
"/" {printf("division ,"); return DIV;}
"%" {printf("mod ,"); return MOD;}
";" {printf("semicolon \n"); return SEMIC;}
"=" {printf("equals , "); return EQ;}
"," {printf("comma ,"); return COMMA;}
"\n" {n++; return NEWLINE;}
void {printf("void ,"); return VOID;}
return {printf("return ,"); return RETURN1;}
extern {printf("extern\n"); return EXTERN;}
integer {printf("integer ,"); return INTEGER;}
boolean {printf("boolean ,"); return BOOLEAN;}
string {printf("string ,"); return STRING;}
begin {printf("BEGIN PROGRAM\n"); return BEGINP;}
end {printf("END PROGRAM\n"); return ENDP;}
for {printf("for\n"); return FOR1;}
true {printf("true ,"); return TRUE;}
false {printf("false ,"); return FALSE;}
if {printf("if\n"); return IF1; }
else {printf("else\n"); return ELSE1; }
while {printf("while\n"); return WHILE1;}
{id} {printf("id ,"); return ID;}
{digit} {printf("digit ,"); return DIGIT;}
[a-zA-Z0-9]+ {return STRING1;}
` {/*catchcall*/ printf("Mystery character %s\n", yytext); }
<<EOF>> { static int once = 0; return once++ ? 0 : '\n'; }
%%
Your scanner pretty well guarantees that two newline characters will be sent at the end of the input: one from the newline present in the input, and another one as a result of your trapping <<EOF>>. However, your grammar doesn't appear to accept unexpected newlines, so the second newline will trigger a syntax error.
The simplest solution would be to remove the <<EOF>> rule, since text files without a terminating newline are very rare, and it is entirely legitimate to consider them syntax errors. A more general solution would be to allow any number of newline characters to appear where a newline is expected, by defining something like:
newlines: '\n' | newlines '\n';
(Using actual characters for single-character tokens makes your grammar much more readable, and simplifies your scanner. But that's a side issue.)
You might also ask yourself whether you really need to enforce newline terminators, since your grammar seems to use ; as a statement terminator, making the newline redundant (aside from stylistic considerations). Removing newlines from the grammar (and ignoring them, as with other whitespace, in the scanner) will also simplify your code.

bison grammar rules for a custom pascal like language

I'm trying to make a compiler for a custom pascal like language using bison and flex and I end up getting syntax errors for programs that should be correct according to my custom grammar.
My custom grammar:
<program> ::= program id
<block>
<block> ::= {
<sequence>
}
<sequence> ::= <statement> ( ; <statement> )*
<brackets-seq> ::= { <sequence> }
<brack-or-stat> ::= <brackets-seq> |
<statement>
<statement> ::= ε |
<assignment-stat> |
<if-stat> |
<while-stat>
<assignment-stat> ::= id := <expression>
<if-stat> ::= if (<condition>)
<brack-or-stat>
<elsepart>
<elsepart> ::= ε |
else <brack-or-stat>
<while-stat> ::= while (<condition>)
<brack-or-stat>
<expression> ::= <optional-sign> <term> ( <add-oper> <term>)*
<term> ::= <factor> (<mul-oper> <factor>)*
<factor> ::= constant |
(<expression>) |
id
<condition> ::= <boolterm> (and <boolterm>)*
<boolterm> ::= <boolfactor> (or <boolfactor>)*
<boolfactor> ::= not [<condition>] |
[<condition>] |
<expression> <relational-oper> <expression>
<relational-oper> ::= == | < | > | <> | <= | >=
<add-oper> ::= + | -
<mul-oper> ::= * | /
<optional-sign> ::= ε | <add-oper>
My grammar implementation on bison:
%{
#include <stdio.h>
#include <string.h>
int yylex(void);
void yyerror(char *s);
%}
%union {
int i;
char *s;
};
%token <i> INTEGERNUM
%token PROGRAM;
%token OR;
%token AND;
%token NOT;
%token IF;
%token ELSE;
%token WHILE;
%token PLUS;
%token MINUS;
%token MUL;
%token DIV;
%token LSB;
%token RSB;
%token LCB;
%token RCB;
%token LEFTPAR;
%token RIGHTPAR;
%token ID;
%token INT;
%token ASSIGN;
%token ISEQUAL;
%token LTHAN;
%token GTHAN;
%token NOTEQUAL;
%token LESSEQUAL;
%token GREATEREQUAL;
%left '+' '-'
%left '*' '/'
%%
program:
PROGRAM ID block
;
block:
LCB RCB
|LCB sequence RCB
;
sequence:
statement ';'sequence
|statement ';'
;
bracketsSeq:
LCB sequence RCB
;
brackOrStat:
bracketsSeq
|statement
;
statement:
assignmentStat
|ifStat
|whileStat
|
;
assignmentStat:
ID ':=' expression
ifStat:
IF LEFTPAR condition RIGHTPAR brackOrStat elsepart
;
elsepart:
ELSE brackOrStat
|
;
whileStat:
WHILE LEFTPAR condition RIGHTPAR brackOrStat
;
expression:
addOper expression
|expression addOper expression
|term
;
term:
term mulOper term
|factor
;
factor:
INT
|LEFTPAR expression RIGHTPAR
|ID
;
condition:
condition AND condition
|boolterm
;
boolterm:
boolterm OR boolterm
|boolfactor
;
boolfactor:
NOT LSB condition RSB
|LSB condition RSB
|expression relationalOper expression
;
relationalOper:
ISEQUAL
|LTHAN
|GTHAN
|NOTEQUAL
|LESSEQUAL
|GREATEREQUAL
;
addOper:
PLUS
|MINUS
;
mulOper:
MUL
|DIV
;
optionalSign
|addOper
;
%%
int main( int argc, char **argv )
{
extern FILE *yyin;
++argv, --argc; /* skip over program name */
if ( argc > 0 )
yyin = fopen( argv[0], "r" );
else
yyin = stdin;
do
yyparse();
while(!feof(yyin));
}
My flex implementation is pretty straightforward where I just return tokens for each symbol or identifier needed.
Using my implementation on the following simple program:
program circuit
{
a:=b;
}
I end up getting a syntax error. Specifically when the parsing reaches the point right after := according to my debugging prints I use:
$ ./a.exe verilog.txt
text = program
text = circuit val = circuit
text = {
text = a val = a
text = :=
syntax error
This is the first time I use flex and bison so I'm guessing that I made a wrong implementation of my original grammar to bison since after the ./bison.exe -dy comp.y command I get:
bison conflicts 64 shift/reduce
Any ideas would be helpful. Thanks!
This rule :
assignmentStat: ID ':=' expression
uses a token ':=' which bison gives a code distinct from any other token, and which your lexer has no way of knowing, so you're almost certainly not returning it. You're probably returning ASSIGN for the character sequence ':=', so you want:
assignmentStat: ID ASSIGN expression
For the shift-reduce conflicts, they mean that the parser doesn't match exactly the language you specified, but rather some subset (as determined by the default shift instead of reduce). You can use bison's -v option to get a complete printout of the parser state machine (including all the conflicts) in a .output file. You can then examine the conflicts and determine how you should change the grammar to match what you want.
When I run bison on your example, I see only 9 shift/reduce conflicts, all arising from expr: expr OP expr-style rules, which are ambiguous (may be either right- or left- recursive). The default resolution (shift) makes them all right-recursive, which may not be what you want. You can either change the grammar to not be ambiguous, or use bison's built-in precedence resolution tools to resolve them.

Yacc reading only the first grammar rule

I have this yacc file
%error-verbose
%token END
%token ID
%token INT
%token IF
%token ELSE
%token WHILE
%token FOR
%token BREAK
%token CONTINUE
%token RETURN
%token SEM
%token LPAR
%token RPAR
%token PLUS
%token MINUS
%token MULT
%token DIV
%token MOD
%token GT
%token LT
%token GTE /* >= */
%token LTE /* <= */
%token EQUAL /* == */
%token NEQUAL /* != */
%token AND
%token OR
%token EQ
%token COM
%token PRINT
%token READ
%token FLOAT
%token LABR
%token RABR
%token NUM
%token STR
/*
* precedentce tabLTE
*/
%right EQ PE ME TE DE RE
%left OR
%left AND
%left EQUAL NEQUAL
%left LT GT GTE LTE
%left PLUS MINUS
%left MULT DIV MOD
%right PP MM
%{
#include<stdio.h>
extern char *yyname;
extern char *yytext;
extern int yylineno;
void yyerror(char const *msg)
{
fprintf(stderr,"%s:%d:%s\n", yyname,yylineno,msg);
}
%}
%%
program
: definitions
;
definitions
: definition
| definitions definition
;
definition:
| declaration
;
declarations
: /* null */
| declarations declaration
;
declaration
: INT declarator_list SEM
;
declarator_list
: ID
| declarator_list COM ID
;
statements
: /* null */
| statements statement
;
statement
: expression SEM
| SEM /* null statement */
| if_prefix statement
| if_prefix statement ELSE statement
| loop_prefix statement
;
if_prefix
: IF LPAR expression RPAR
;
loop_prefix
: WHILE LPAR expression RPAR
;
expression
: binary
| expression COM binary
;
binary
: ID
| LPAR expression RPAR
| ID LPAR optional_argument_list RPAR
| binary PLUS binary
| binary MINUS binary
| binary MULT binary
| binary DIV binary
| binary MOD binary
| binary GT binary
| binary LT binary
| binary GTE binary
| binary LTE binary
| binary EQUAL binary
| binary NEQUAL binary
| binary AND binary
| binary OR binary
| ID EQ binary
| ID PE binary
| ID ME binary
| ID TE binary
| ID DE binary
| ID RE binary
;
optional_argument_list
: /* no actual arguments */
| argument_list
;
argument_list
: binary
| argument_list COM binary
;
%%
#include <stdlib.h>
extern FILE *yyin;
int main(int argc, char **argv)
{
int ok;
if (argc != 2) {
fprintf(stderr, "%s: Wrong arguments\n", argv[0]);
return EXIT_FAILURE;
}
yyname = argv[1];
if ((yyin = fopen(yyname, "r")) == NULL) {
fprintf(stderr, "%s: %s: Invalid file\n", argv[0], argv[1]);
return EXIT_FAILURE;
}
return (yyparse() ? EXIT_SUCCESS : EXIT_FAILURE);
}
when the input is
int x;
everything works fine, but when the input is something other than "INT"
lets say FOR it throws an error:
unexpected FOR expecting INT or $end
so it's actually reading only the first rule from the set of rules..
Besides, it keeps showing useless non terminals and terminals warning when bison command is applied.
What is wrong with this yacc file?
The trouble is that the rules:
program
: definitions
;
definitions
: definition
| definitions definition
;
definition:
| declaration
;
declarations
: /* null */
| declarations declaration
;
declaration
: INT declarator_list SEM
;
only allow declarations through; nothing allows statements as part of a program. Your FOR is not a declaration, so the grammar rejects it.
The 'useless non-terminals' warning is trying to tell you:
You have goofed big time; there is a bug in your grammar. You have tried to write rules for some production, but you never let it be recognized, so there was no point in adding it.
Or thereabouts...
Maybe you need:
program
: definitions statements
;
Or maybe you need to allow functions as a definition too, and then the FOR statement will be part of the body of a function.
Asking my LL oracle about your amended grammar:
Out of 15 non-terminals, 14 are reachable, 1 are unreachable:
'declarations'
Circular symbols:
definitions
definitions
The complaint about circular symbols means that 'definitions' can derive itself. For example, 'definitions' can produce 'definitions definition', but 'definition' is nullable, so 'definitions' can produce just itself, kinduva infinite loop few parser generators care to deal with in any sensible way. Looking at it another way, you've defined 'definitions' to be a list of nullable symbols, so how many epsilons would you like to match? How about infinity? :-)
This is a drawback of the yacc/bison style of trying to produce some parser even if there are problems in the grammar; quite convenient if you know exactly what you're doing, but quite confusing otherwise.
But, to the narrow point of what to do about the grammar circularity that's giving you a very unuseful (but by gum compilable!) parser. How about you allow 'definitions' be nullable but not 'definition'? IOW:
definitions : | definitions definition ;
definition : declaration ;
Try to not stack nullability on top of nullability. So when you later change to:
definition : declarations ;
Don't make 'declarations' nullable (that's already handled by 'definitions' being nullable). Instead, change it to:
declarations : declaration | declarations declaration ;
That should get you past the immediate problem and onto some new ones :-)