I am new to yacc and I am trying to define some rules for my language.
I have written a grammar "well" and it runs and executes without an error but for some reason, it doesn't do what it is supposed to do.
mylex.l
%{
#include <stdio.h>
#include "myyacc.tab.h"
extern int yyval;
%}
/* KEEP TRACK OF LINE NUMBER*/
%option yylineno
uppercase [A-Z]
lowercase [a-z]
alpha [{uppercase}{lowercase}]
digit [0-9]
alphanum [{alpha}{digit}]
id uppercase({alphanum}|_)*
int_literal [0-9]+
float_literal [0-9]+\.[0-9]+
string_literal \"[^\"]*\"
comment (##)(.)*(##)
%%
"int" {return INT;}
"float" {return FLOAT;}
"boolean" {return BOOLEAN;}
"if" {return IF;}
"else" {return ELSE;}
"end" {return END;}
"true" {return TRUE;}
"false" {return FALSE;}
"read" {return READ;}
"print" {return PRINT;}
"while" {return WHILE;}
"START" {return START;}
"END" {return END;}
"+" {return ADD;}
"-" {return SUB;}
"*" {return MUL;}
"/" {return DIV;}
"&&" {return LOG_AND;}
"||" {return LOG_OR;}
"!" {return LOG_NOT;}
"==" {return EQ;}
"<>" {return NEQ;}
"<" {return LT;}
"<=" {return LEQ;}
">" {return GT;}
">=" {return GEQ;}
"=" {return ASSIGN;}
"(" {return LPAREN;}
")" {return RPAREN;}
"{" {return LBRACE;}
"}" {return RBRACE;}
{int_literal} {return INT_LITERAL;}
{float_literal} {return FLOAT_LITERAL;}
{string_literal} {return STRING_LITERAL;}
{id} {return ID;}
{comment} { ; }
%%
int yywrap() {
return 1;
}
myyacc.y
%{
#include <stdio.h>
#include <stdlib.h>
extern int yylineno;
extern FILE* yyin;
extern int yyerror (char* msg);
extern char * yytext;
%}
/* definitions section start */
%token INT FLOAT BOOLEAN IF ELSE END TRUE FALSE READ PRINT WHILE START
%token INT_LITERAL FLOAT_LITERAL STRING_LITERAL ID ERROR
%right ASSIGN
%right LOG_NOT
%left MUL DIV
%left ADD SUB
%left LPAREN RPAREN
%left LBRACE RBRACE
%left LT LEQ GT GEQ
%left EQ NEQ
%left LOG_AND
%left LOG_OR
%start program
/* definitions section end */
%%
/* rules section start */
program : START statements END {printf("No syntax errors detected")};
statements : statements statement
| statement
;
statement : dec_stmt
| assignment_stmt
| print_stmt
| read_stmt
| condition_stmt
| while_stmt
;
dec_stmt : type ID
;
type : INT
| FLOAT
| BOOLEAN
;
assignment_stmt : ID ASSIGN expression
;
expression : exp EQ exp
| exp NEQ exp
| exp LT exp
| exp LEQ exp
| exp GT exp
| exp GEQ exp
| exp
;
exp : exp MUL exp
| exp DIV exp
| exp ADD exp
| exp SUB exp
| exp LOG_AND exp
| exp LOG_OR exp
| LOG_NOT exp
| LPAREN exp RPAREN
| INT_LITERAL
| FLOAT_LITERAL
| ID
| TRUE
| FALSE
;
print_stmt : PRINT LPAREN ID RPAREN
| PRINT LPAREN STRING_LITERAL RPAREN
;
read_stmt : ID ASSIGN READ LPAREN RPAREN
;
condition_stmt : IF LPAREN expression RPAREN LBRACE statement RBRACE END
| IF LPAREN expression RPAREN LBRACE statement RBRACE ELSE LBRACE statement RBRACE END
;
while_stmt : WHILE LPAREN expression RPAREN LBRACE statement RBRACE
;
/* rules section end */
%%
/* auxiliary routines start */
int main(int argc, char *argv[])
{
// don't change this part
yyin = fopen(argv[1], "r" );
if(!yyparse())
printf("\nParsing complete\n");
else
printf("\nParsing failed\n");
fclose(yyin);
return 0;
}
int yyerror (char* msg)
{
printf("Line %d: %s near %s\n", yylineno, msg, yytext);
exit(1);
}
/* auxiliary routines end */
Test case
START
int X12
float ABC1
DDe = 7
while(QNn >0) ## this a Comment ##
{ RLk9999 = ACc - 2
CCC = true
}
if ( ACc ==5){ print ( " Inside IF inside Loop " ) } end }
print ( " Hello .. " )
END
Output
Line 3: syntax error near 12
It also gets the line number wrong.
I've been trying to see what I'm doing wrong for some time now and I'd really appreciate a second set of eyes.
You cannot use macros inside character classes. Inside a character class, pattern operators lose their special meaning, so when you write
alphanum [{alpha}{digit}]
you are defining a character class containing {, }, and the letters adghilpt. That doesn't match the 12 in X12.
Anyway, flex already has predefined sets of characters which you can include in your character classes:
* [:lower:] a-z
* [:upper:] A-Z
* [:alpha:] [:lower:][:upper:]
* [:digit:] 0-9
* [:alnum:] [:alpha:][:digit:]
Note that these can only be used inside a character class. So you could write your id pattern as
id [[:upper:]][[:alnum:]_]*
without the need for any other macros.
Please see the flex pattern documentation for more details.
In addition to #rici's answer, I've also noticed that my while_statement in the yacc file has only been set to accept only one statement in it's body
Related
Hi I am making a program that does simple arithmetic operations using Lex and yacc, but I am having a problem with a specific error.
ex1.y
%{
#include <stdio.h>
int sym[26];
%}
%token INTEGER VARIABLE
%left '+' '-'
%left '*' '/' '%'
%%
program:
program statement '\n'
|
;
statement:
expr {printf("%d\n", $1);}
| VARIABLE '=' expr {sym[$1] = $3;}
;
expr:
INTEGER
| VARIABLE { $$ = sym[$1];}
| expr '+' expr { $$ = $1 + $3;}
| expr '-' expr { $$ = $1 - $3;}
| expr '*' expr { $$ = $1 * $3;}
| expr '/' expr { $$ = $1 / $3;}
| '(' expr ')' { $$ = $2;}
;
%%
main() { return yyparse();}
int yyerror(char *s){
fprintf(stderr,"%s\n",s);
return 0;
}
ex1.l
%{
#include <stdlib.h>
#include "y.tab.h"
%}
%%
/* variables */
[a-z] {
yylval = *yytext -'a';
return VARIABLE;
}
/* integers */
[0-9]+ {
yylval = atoi(yytext);
return INTEGER;
}
/* operators */
[-+()=/*\n] { return *yytext;}
/* skip whitespace */
[ \t] ;
/* anything else is an error */
. yyerror("invalid character");
%%
int yywrap (void){
return 1;
}
when I execute bellow instruction
$bison –d -y ex1.y
$lex ex1.l
$gcc lex.yy.c y.tab.c –o ex1
The following error occurs:
ex1.l: In function ‘yylex’:
ex1.l:28:1: warning: implicit declaration of function ‘yyerror’; did you mean ‘perror’? [-Wimplicit-function-declaration]
28 |
| ^
| perror
y.tab.c: In function ‘yyparse’:
y.tab.c:1227:16: warning: implicit declaration of function ‘yylex’ [-Wimplicit-function-declaration]
1227 | yychar = yylex ();
| ^~~~~
y.tab.c:1402:7: warning: implicit declaration of function ‘yyerror’; did you mean ‘yyerrok’? [-Wimplicit-function-declaration]
1402 | yyerror (YY_("syntax error"));
| ^~~~~~~
| yyerrok
I don't know what is wrong with my code. I would appreciate it if you could tell me how to fix the above error.
The version of bison you are using requires you to declare prototypes for yylex() and yyerror. These should go right after the #include <stdio.h> at the top of the file:
int yylex(void);
int yyerror(char* s);
I would use int yyerror(const char* s) as the prototype for yyerror, because it is more accurate, but if you do that you'll have to make the same change in the definition.
You use yyerror in your lex file, so you will have to add its declaration in that file as well.
main() hasn't been a valid prototype any time this century. Return types are required in function declarations, including main(). So I guess you are basing your code on a very old template. There are better starting points in the examples in the bison manual.
(And don't expect it to be easy to work with parser generators if you have no experience with C.)
We have a task to compile a lex and a yacc praser code then run them together by using the cc tab.y.c -ll -Ly command when we do each apart they compile just fine but the compile both parts as one gives 10 lines of errors.
First part is Lex Code:
%option yylineno
%pointer
%{
#include <stdlib.h>
#include <string.h>
void yyerror(const char *);
%}
low \_
identifier {letters}{digit}*{low}{letters}|{letters}
stringERR {doubleQuotes}{doubleQuotes}+|{doubleQuotes}
charERR {singleQuotes}+{digits}*{letters}*{singleQuotes}+
ERR {charERR}|{stringERR}
type boolean|string|char|integer|intptr|charptr|var
dbland "&&"
devide "/"
assign "="
equal "=="
greater ">"
lesser "<"
greaterequal ">="
lesserequal "<="
minus "-"
plus "+"
not "!"
notequal "!="
or "||"
multiply "*"
power "^"
AND "&"
literBool true|false
letter [a-z]|[A-Z]
letters {letter}+
singleQuotes '
literChar {singleQuotes}{letter}{singleQuotes}
digit [0-9]
digitZero 0
octalDigit [1-7]
octal {digitZero}{octalDigit}{digitZero}*{octalDigit}*
digits {digit}+
digitNoZero[1-9]
decimal {digit}|{digitNoZero}{digits}
hexLetter A|B|C|D|E|F
hex 0(x|X){digit}+{hexLetter}*|0(x|X){digit}*{hexLetter}+
letterB b
digitOne 1
binaryInt ({digitZero}|{digitOne})+{letterB}
integer {binaryInt}|{hex}|{octal}|{decimal}
doubleQuotes \"
ltrlString {doubleQuotes}{letters}*{decimal}*{hex}*{octal}*{binaryInt}*{dbland}*{devide}*{assign}*{equal}*{greater}*{lesser}*{greaterequal}*{lesserequal}*{minus}*{plus}*{not}*{notequal}*{or}*{multiply}*{AND}*{power}*{doubleQuotes}
comment {backslash}{parcent}{space}*({letters}*{space}*{identifier}*{space}*{decimal}*{space}*{hex}*{space}*{octal}*{space}*{binaryInt}*{space}*{dbland}*{devide}*{assign}*{equal}*{greater}*{lesser}*{greaterequal}*{lesserequal}*{minus}*{$nus}*{plus}*{not}*{notequal}*{or}*{multiply}*{AND}*{power}*{ltrlString}*)*{space}{parcent}{backslash}
colon ":"
openSq "["
closeSq "]"
semicolon ";"
parcent "%"
space " "
comma ","
backslash "/"
clos ")"
opn "("
charptr charptr
pointer {colon}{space}{charptr}|"="{space}"&"{identifier}
pointerErr "&"{identifier}|{charptr}
ELSE "else"{space}*
statif "if"{space}*
whileLoop "while"{space}*
returnState "return"{space}*
func "procedure"{space}*
%%
{dbland} return dbland;
{devide} return devide;
{assign} return assign;
{equal} return equal;
{greater} return greater;
{lesser} return lesser;
{greaterequal} return greaterequal;
{lesserequal} return lesserequal;
{minus} return minus;
{plus} return plus;
{not} return not;
{notequal} return notequal;
{or} return or;
{multiply} return multiply;
{power} return power;
{AND} return AND;
{literBool} return literBool;
{literChar} return literChar;
{decimal} return decimal;
{hex} return hex;
{octal} return octal;
{binaryInt} return binaryInt;
{ltrlString} return ltrlString
{type} return type;
{identifier} return identifier;
{ERR} return ERR;
{comment} return comment;
{pointer} return pointer;
{pointerErr} return pointerErr;
{statif} return statif;
{ELSE} return ELSE;
{whileLoop} return whileLoop;
{returnState} return returnState;
{func} return func;
{semicolon} return semicolon;
{comma} return comma;
[\*\(\)\.\+\-\%] { return *yytext; }
[0-9][0-9]* { return 'n'; }
[ \t\n] ; /* skip whitespace */
%%
int yywrap(void) {
return 1;
}
yacc code:
%token low identifier stringERR charERR ERR type operator literBool letter
%token dbland literChar decimal hex octal integer
%token binaryInt ltrString comment pointer pointerErr
%token statif ELSE whileLoop returnState func comma semicolon
%token EOL LPAREN RPAREN UMINUS
%left equal greater notequal lesser greaterequal lesserequal
%left '|' %left '&' %left SHIFT /* << >> */
%left minus plus
%left multiply devide '%' MOD %left power
%left not or AND comma
%nonassoc UMINUS
%%
s: BLOCK;
BLOCK: expr|logicOp|varible_declaration|ifExp|whileExp|procExp|semicolon;
expr: exp{printtree($1);}
exp:
identifier {$$=mknode(yytext,NULL,NULL);}
| LPAREN expr RPAREN {$$=$2;}
| exp plus exp {$$= mknode("+" $1,$3);}
| exp minus exp {$$= mknode("-" $1, $3);}
| exp multiply exp {$$=mknode("*" $1, $3);}
| exp devide exp {$$=mknode("/" $1, $3);}
| "-" exp %prec UMINUS {-$2}
varible_declaration: var{printtree($1);}
var : "VAR" identifier_list ":" typet ";" {$$ = mknode("var", $2, $4);}
typet:
integer{$$ = mknode(yytext,NULL,NULL);}
|binaryInt {$$ = mknode(yytext,NULL,NULL);}
|type {$$ = mknode(yytext,NULL,NULL);}
identifier_list: identifier_list comma identifier_list
{$$= mknode(",",$1, $3);}
|identifier {$$ = mknode(yytext,NULL,NULL);}
logicOp: op{printtree($1);}
op:exp equal exp {$$ = mknode("==",$1,$3);}
|exp notequal exp {$$ = mknode("!=",$1,$3);}
|exp or exp {$$ = mknode("||",$1,$3);}
|exp AND exp {$$ = mknode("&&",$1,$3);}
|exp greater exp {$$ = mknode(">",$1,$3);}
|exp greaterequal exp {$$ = mknode(">=",$1,$3);}
|exp lesser exp {$$ = mknode("<",$1,$3);}
|exp lesserequal exp {$$ = mknode("<=",$1,$3);}
ifExp: if{printtree($1);}
if:statif '(' logicOp ')' '{' BLOCK '}' ELSE '{' BLOCK '}' {$$ = mknode("if",$3,mknode("else",$6,$10));}
|statif '(' logicOp ')' '{' BLOCK '}' {$$=mknode("if",$3,$6);}
whileExp: while{printtree($1)}
while:whileLoop '(' logicOp ')' '{' BLOCK '}' {$$=mknode("while",$3,$6);}
procExp: proc{printtree($1)}
proc:func identifier '(' identifier_list ')' returnState type '{' BLOCK '}' {$$ = mknode("procedure",$2,"TODO");}
%%
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int yylex(void);
void yyerror(const char *);
tyepdef struct node{
char * token;
struct node *left;
struct node *right;
};
node * mknode(char * token , node * left,node * right);
void printtree(node * tree);
#define yySType struct node *
#include "lex.yy.c"
main()
{ return yyparse(); }
nose * mknode(char * token,node * left, node * right)
{
node * newnode = (node*)malloc(sizeof(node));
char 8 newstr = (char*)malloc(sizeof(token)+1);
strcpy("newstr,token");
newnode->left=left;
newnode->right=right;
newnode->token=newstr;
}return newnode;
void printtree(node * tree)
{
printf("%s\n",tree->token);
if (tree->left) printtree(tree->left);
if (tree->right) printtree(tree->left);
}
extern int yylineno;
void yyerror(const char *s)
{
fprintf(stderr, "%s at line %d\n", s, yylineno);
return;
}
the errors we get are the following:
[tzurisa#Ac-Aix backup]$ nano test.l
[tzurisa#Ac-Aix backup]$ lex test.l
[tzurisa#Ac-Aix backup]$ yacc test.y
[tzurisa#Ac-Aix backup]$ cc -o test y.tab.c -ll -Ly
test.y:63: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘struct’
test.y:68: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘*’ token
test.y:69: error: expected ‘)’ before ‘*’ token
In file included from test.y:72:
test.l: In function ‘yylex’:
test.l:74: error: ‘assign’ undeclared (first use in this function)
test.l:74: error: (Each undeclared identifier is reported only once
test.l:74: error: for each function it appears in.)
In file included from test.y:72:
test.l:94: error: ‘ltrlString’ undeclared (first use in this function)
test.l:95: error: expected ‘;’ before ‘break’
test.y: At top level:
test.y:75: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘*’ token
test.y:83: error: expected identifier or ‘(’ before ‘return’
test.y:84: error: expected ‘)’ before ‘*’ token
test.y: In function ‘yyparse’:
test.y:20: error: expected ‘)’ before ‘yyvsp’
test.y:21: error: expected ‘)’ before ‘yyvsp’
test.y:22: error: expected ‘)’ before ‘yyvsp’
test.y:23: error: expected ‘)’ before ‘yyvsp’
test.y:24: error: expected ‘;’ before ‘}’ token
test.y:51: error: expected ‘;’ before ‘}’ token
test.y:53: error: expected ‘;’ before ‘}’ token
will appriciate the help of anyone who can tell us whats wrong here we have tried many things we still get these errors ..
If you look at line 63 of test.y, as indicated in the first error message, you will see the first problem; you misspelled typedef. Fix that, and then check remaining errors, if any, by looking at the indicated lines.
im trying to build a compiler for Csimple. when i run the command
"cc –o test y.tab.c –ll –L.y" i get a wall of errors.
i am at a loss.
im aware that my yacc file doesnt parse well,
but first i need it to run so i can see the output.
the error appears for the line "%{" in the middle of the yacc file.
lex:
%option yylineno
%pointer
%{
#include <stdlib.h>
#include <string.h>
void yyerror(const char *);
%}
low \_
identifier {letters}{digit}*{low}{letters}|{letters}
stringERR {doubleQuotes}{doubleQuotes}+|{doubleQuotes}
charERR {singleQuotes}+{digits}*{letters}*{singleQuotes}+
ERR {charERR}|{stringERR}
type boolean|string|char|integer|intptr|charptr|var
dbland "&&"
devide "/"
assign "="
equal "=="
greater ">"
lesser "<"
greaterequal ">="
lesserequal "<="
minus "-"
plus "+"
not "!"
notequal "!="
or "||"
multiply "*"
power "^"
AND "&"
literBool true|false
letter [a-z]|[A-Z]
letters {letter}+
singleQuotes '
literChar {singleQuotes}{letter}{singleQuotes}
digit [0-9]
digitZero 0
octalDigit [1-7]
octal {digitZero}{octalDigit}{digitZero}*{octalDigit}*
digits {digit}+
digitNoZero[1-9]
decimal {digit}|{digitNoZero}{digits}
hexLetter A|B|C|D|E|F
hex 0(x|X){digit}+{hexLetter}*|0(x|X){digit}*{hexLetter}+
letterB b
digitOne 1
binaryInt ({digitZero}|{digitOne})+{letterB}
integer {binaryInt}|{hex}|{octal}|{decimal}
doubleQuotes \"
ltrlString {doubleQuotes}{letters}*{decimal}*{hex}*{octal}*{binaryInt}*{dbland}*{devide}*{assign}*{equal}*{greater}*{lesser}*{greaterequal}*{lesserequal}*{minus}*{plus}*{not}*{notequal}*{or}*{multiply}*{AND}*{power}*{doubleQuotes}
comment {backslash}{parcent}{space}*({letters}*{space}*{identifier}*{space}*{decimal}*{space}*{hex}*{space}*{octal}*{space}*{binaryInt}*{space}*{dbland}*{devide}*{assign}*{equal}*{greater}*{lesser}*{greaterequal}*{lesserequal}*{minus}*{p$us}*{plus}*{not}*{notequal}*{or}*{multiply}*{AND}*{power}*{ltrlString}*)*{space}{parcent}{backslash}
colon ":"
openSq "["
closeSq "]"
semicolon ";"
parcent "%"
space " "
comma ","
backslash "/"
clos ")"
opn "("
charptr charptr
pointer {colon}{space}{charptr}|"="{space}"&"{identifier}
pointerErr "&"{identifier}|{charptr}
ELSE "else"{space}*
statif "if"{space}*
whileLoop "while"{space}*
returnState "return"{space}*
func "procedure"{space}*
%%
{dbland} return dbland;
{devide} return devide;
{assign} return assign;
{equal} return equal;
{greater} return greater;
{greaterequal} return greaterequal;
{lesserequal} return lesserequal;
{minus} return minus;
{plus} return plus;
{not} return not;
{notequal} return notequal;
{or} return or;
{multiply} return multiply;
{power} return power;
{AND} return AND;
{literBool} return literBool;
{literChar} return literChar;
{decimal} return decimal;
{hex} return hex;
{octal} return octal;
{binaryInt} return binaryInt;
{ltrlString} return ltrlString
{type} return type;
{identifier} return identifier;
{ERR} return ERR;
{comment} return comment;
{pointer} return pointer;
{pointerErr} return pointerErr;
{statif} return statif;
{ELSE} return ELSE;
{whileLoop} return whileLoop;
{returnState} return returnState;
{func} return func;
{semicolon} return semicolon;
{comma} return comma;
[\*\(\)\.\+\-\%] { return *yytext; }
[0-9][0-9]* { return 'n'; }
[ \t\n] ; /* skip whitespace */
%%
int yywrap(void) {
return 1;
}
yacc:
%token low identifier stringERR charERR ERR type operator literBool letter
%token dbland literChar decimal hex octal integer
%token binaryInt ltrString comment pointer pointerErr
%token statif ELSE whileLoop returnState func comma semicolon
%token EOL LPAREN RPAREN UMINUS
%left equal greater notequal lesser greaterequal lesserequal
%left '|' %left '&' %left SHIFT /* << >> */
%left minus plus
%left multiply devide '%' MOD %left power
%left not or AND comma
%nonassoc UMINUS
%%
s: BLOCK;
BLOCK: expr|logicOp|varible_declaration|ifExp|whileExp|procExp|semicolon;
expr: exp{printtree($1);}
exp:
identifier {$$=mknode(yytext,NULL,NULL);}
| LPAREN expr RPAREN {$$=$2;}
| exp plus exp {$$= mknode("+" $1,$3);}
| exp minus exp {$$= mknode("-" $1, $3);}
| exp multiply exp {$$=mknode("*" $1, $3);}
| exp devide exp {$$=mknode("/" $1, $3);}
| "-" exp %prec UMINUS {-$2}
varible_declaration: var{printtree($1);}
var : "VAR" identifier_list ":" typet ";" {$$ = mknode("var", $2, $4);}
typet:
integer{$$ = mknode(yytext,NULL,NULL);}
|binaryInt {$$ = mknode(yytext,NULL,NULL);}
|type {$$ = mknode(yytext,NULL,NULL);}
identifier_list: identifier_list comma identifier_list
{$$= mknode(",",$1, $3);}
|identifier {$$ = mknode(yytext,NULL,NULL);}
logicOp: op{printtree($1);}
op:exp equal exp {$$ = mknode("==",$1,$3);}
|exp notequal exp {$$ = mknode("!=",$1,$3);}
|exp or exp {$$ = mknode("||",$1,$3);}
|exp AND exp {$$ = mknode("&&",$1,$3);}
|exp greater exp {$$ = mknode(">",$1,$3);}
|exp greaterequal exp {$$ = mknode(">=",$1,$3);}
|exp lesser exp {$$ = mknode("<",$1,$3);}
|exp lesserequal exp {$$ = mknode("<=",$1,$3);}
ifExp: if{printtree($1);}
if:statif '(' logicOp ')' '{' BLOCK '}' ELSE '{' BLOCK '}' {$$ = mknode("if",$3,mknode("else",$6,$10));}
|statif '(' logicOp ')' '{' BLOCK '}' {$$=mknode("if",$3,$6);}
whileExp: while{printtree($1)}
while:whileLoop '(' logicOp ')' '{' BLOCK '}' {$$=mknode("while",$3,$6);}
procExp: proc{printtree($1)}
proc:func identifier '(' identifier_list ')' returnState type '{' BLOCK '}' {$$ = mknode("procedure",$2,"TODO");}
%%
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define YYDEBUG 1
int yylex(void);
void yyerror(const char *);
tyepdef struct node{char * token;
struct node *left;
struct node *right;
}node;
node * mknode(char * token , node * left,node * right);
void printtree(node * tree);
%}
#define yySType struct node *
#include "lex.yy.c"
main()
{ return yyparse(); }
nose * mknode(char * token,node * left, node * right)
{
node * newnode = (node*)malloc(sizeof(node));
char 8 newstr = (char*)malloc(sizeof(token)+1);
strcpy("newstr,token");
newnode->left=left;
newnode->right=right;
newnode->token=newstr;
}return newnode;
void printtree(node * tree)
{
printf("%s\n",tree->token);
if (tree->left) printtree(tree->left);
if (tree->right) printtree(tree->left);
}
extern int yylineno;
void yyerror(const char *s)
{
fprintf(stderr, "%s at line %d\n", s, yylineno);
return;
}
executes:
lex test.l
yacc test.y
cc –o test y.tab.c –ll –L.y
this is where it all breaks down.
as i said, i get ALOT of errors, but the first one is
test.y:60: error: expected identifier or ‘(’ before ‘%’ token
Yacc directives like %{ can only appear in the FIRST section of the yacc file (before the first %%), with the exception of a few that can be in the second (before the second %%). The third section (after the second %%) is just copied verbatim to the output y.tab.c file, so there can't be anything except C code in it.
It looks like you just want that code in the output, so just delete those %{ and %} lines in the third section. Then, just go through each error or warning you get from the compiler (and do use -Wall) and figure out what they say and how to fix them.
I am writing a simple Yacc program that takes a program code and returns the counts of int and double type of variables and the functions.
I ran into a bizarre problem that a program returns a syntax error when there is a matching rule for the line, but the line picked up a different rule. I brought the components of the code that shows this error: (If you see unused variables, that's because I deleted other parts that are irrelevant to this error)
yacc code
%{
#define YYDEBUG 1
#include <stdio.h>
#include <stdlib.h>
int func_count=0;
int int_count=0;
int char_count=0;
int double_count=0;
int float_count=0;
int pointer_count=0;
int array_count=0;
int condition_count=0;
int for_count=0;
int return_count=0;
int numeric_count=0;
%}
%token INT_KEYWORD DOUBLE_KEYWORD CHAR_KEYWORD RETURN_KEYWORD FLOAT_KEYWORD IF_KEYWORD VARIABLE OPERATOR COMPARE DIGIT FOR_KEYWORD POINTER_VARIABLE
%start program
%%
program:
program statement '\n'
|
;
statement:
declaration_statement |
function_declaration_statement {func_count++;}
;
function_declaration_statement:
datatype VARIABLE '(' datatype VARIABLE ')' '{'
;
declaration_statement:
int_declaration_statement |
double_declaration_statement
;
int_declaration_statement:
INT_KEYWORD VARIABLE '[' DIGIT ']' ';'{array_count++;}
|
INT_KEYWORD VARIABLE ';' {int_count++;}
|
INT_KEYWORD VARIABLE '=' DIGIT ';' {int_count++;}
double_declaration_statement:
DOUBLE_KEYWORD VARIABLE '[' DIGIT ']' ';' {array_count++;}
|
DOUBLE_KEYWORD VARIABLE ';' {double_count++;}
|
DOUBLE_KEYWORD VARIABLE '=' DIGIT ';' {double_count++;}
datatype:
INT_KEYWORD
|
DOUBLE_KEYWORD
|
CHAR_KEYWORD
|
FLOAT_KEYWORD
;
%%
int yyerror(char *s){
fprintf(stderr,"%s\n",s);
return 0;
}
int main (void){
yydebug=1;
yyparse();
printf("#int variable=%d, #double variable=%d",int_count,double_count);
printf("#array=%d\n",array_count);
printf("#function=%d\n",func_count);
}
lex
%{
#include <stdio.h>
#include <stdlib.h>
#include "y.tab.h"
void yyerror(char *);
%}
%%
"int" {return INT_KEYWORD;}
"double" {return DOUBLE_KEYWORD;}
"char" {return CHAR_KEYWORD;}
"float" {return FLOAT_KEYWORD;}
"if" {return IF_KEYWORD;}
"for" {return FOR_KEYWORD;}
"return" {return RETURN_KEYWORD;}
"==" {return COMPARE;}
">" {return COMPARE;}
"<" {return COMPARE;}
">=" {return COMPARE;}
"<=" {return COMPARE;}
"+" {return OPERATOR;}
"-" {return OPERATOR;}
"/" {return OPERATOR;}
"*" {return OPERATOR;}
"%" {return OPERATOR;}
[0-9]+ {return DIGIT;}
[a-z]+ {return VARIABLE;}
"*"" "?[a-zA-Z]+ {return POINTER_VARIABLE;}
"[" {return *yytext;}
"=" {return *yytext;}
"]" {return *yytext;}
[;\n(){}] {return *yytext;}
[ \t] ;
. {printf("%s\n",yytext); yyerror("invalid charactor");}
%%
int yywrap(void){
return 1;
}
test file:
int a;
int a[3];
int a(int a) {
Expected output
#int variable=1, #double variable=0 #array=1
#function=1
But instead it fails at the third line, int a(int a), because the program seemed to choose int variable declaration rule, and it fails when it sees '(' token, generating a syntax error.
The debug error message says...
....
Reading a token: Next token is token INT_KEYWORD ()
Shifting token INT_KEYWORD ()
Entering state 3
Reading a token: Next token is token VARIABLE ()
Shifting token VARIABLE ()
Entering state 13
Reading a token: Next token is token '(' ()
syntax error
....
Could anyone please point out what I did wrong? Thanks.
You have two shift/reduce conflicts in your grammar. You can see where in the output file generated by yacc :
State 3
8 int_declaration_statement: INT_KEYWORD . VARIABLE '[' DIGIT ']' ';'
9 | INT_KEYWORD . VARIABLE ';'
10 | INT_KEYWORD . VARIABLE '=' DIGIT ';'
14 datatype: INT_KEYWORD .
VARIABLE shift, and go to state 13
VARIABLE [reduce using rule 14 (datatype)]
State 4
11 double_declaration_statement: DOUBLE_KEYWORD . VARIABLE '[' DIGIT ']' ';'
12 | DOUBLE_KEYWORD . VARIABLE ';'
13 | DOUBLE_KEYWORD . VARIABLE '=' DIGIT ';'
15 datatype: DOUBLE_KEYWORD .
VARIABLE shift, and go to state 14
VARIABLE [reduce using rule 15 (datatype)]
Here, when yacc encounter an INT_KEYWORD or a DOUBLE_KEYWORD, it does not know whether it needs to shift or reduce (i.e. it does not know if it is a declaration or just a datatype). By default, yacc will shift.
Also, in your function_declaration_statement, you first have a datatype: yacc will reduce it (since it is the only production rule for it). Then it will have something like INT_KEYWORD VARIABLE (or DOUBLE_KEYWORD), so it will think it is a int_declaration_statement... The syntax error happens when yacc encounter a '('.
To solve this, you can remove the function_declaration_statement and add a line to your int_declaration_statement (and double). Something like :
statement: int_declaration_statement
| double_declaration_statement
;
int_declaration_statement: INT_KEYWORD VARIABLE '[' DIGIT ']' ';'{array_count++;}
| INT_KEYWORD VARIABLE ';' {int_count++;}
| INT_KEYWORD VARIABLE '=' DIGIT ';' {int_count++;}
| INT_KEYWORD VARIABLE '(' datatype VARIABLE ')' '{' {func_count++;}
;
That will remove you shift/reduce conflicts and give you the result you want, for instance :
--- ~ » ./a.out
int a;
int a[3];
int a(int a) {
#int variable=1, #double variable=0#array=1
#function=1
Hope it helps.
Hello this is my bison grammar file for a mini-programming language:
%{
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "projectbison.tab.h"
void yyerror(char const *);
extern FILE *yyin;
extern FILE *yyout;
extern int yylval;
extern int yyparse(void);
extern int n;
int errNum = 0;
int forNum = 0;
%}
%left PLUS MINUS
%left MULT DIV MOD
%nonassoc EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL
%token INTEGER BOOLEAN STRING VOID
%token ID
%token AND
%token BEGINP
%token ENDP
%token EXTERN
%token COMMA
%token EQ
%token RETURN1
%token IF1 ELSE1 WHILE1 FOR1 DO1
%token LOR LAND LNOT
%token TRUE FALSE
%token EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL
%token LB1 RB1
%token LCB1 RCB1
%token SEMIC
%token NEWLINE
%token PLUS MINUS
%token MULT DIV MOD
%token DIGIT STRING1
%start program
%%
/*50*/
program : external-decl program-header defin-field command-field
;
external-decl : external-decl external-prototype
|
;
external-prototype : EXTERN prototype-func NEWLINE
;
program-header : VOID ID LB1 RB1 NEWLINE
;
defin-field : defin-field definition
|
;
definition : variable-defin
| func-defin
| prototype-func
;
variable-defin : data-type var-list SEMIC newline
;
data-type : INTEGER
| BOOLEAN
| STRING
;
var-list : ID extra-ids
;
extra-ids : COMMA var-list
|
;
func-defin : func-header defin-field command-field
;
prototype-func : func-header SEMIC
;
func-header : data-type ID LB1 lists RB1 newline
;
lists: list-typ-param
|
;
list-typ-param : typical-param typical-params
;
typical-params : COMMA list-typ-param
|
;
typical-param : data-type AND ID
;
command-field : BEGINP commands newline ENDP newline
;
commands : commands newline command
|
;
command : simple-command SEMIC
| struct-command
| complex-command
;
complex-command : LCB1 newline command newline RCB1
;
struct-command : if-command
| while-command
| for-command
;
simple-command : assign
| func-call
| return-command
| null-command
;
if-command : IF1 LB1 gen-expr RB1 newline command else-clause
;
else-clause: ELSE1 newline command
;
while-command : WHILE1 LB1 gen-expr RB1 DO1 newline RCB1 command LCB1
;
for-command : FOR1 LB1 conditions RB1 newline RCB1 command LCB1
;
conditions : condition SEMIC condition SEMIC condition SEMIC
;
condition : gen-expr
|
;
assign : ID EQ gen-expr
;
func-call : ID LB1 real-params-list RB1
| ID LB1 RB1
;
real-params-list : real-param real-params
;
real-params : COMMA real-param real-params
|
;
real-param : gen-expr
;
return-command : RETURN1 gen-expr
;
null-command :
;
gen-expr : gen-terms gen-term
;
gen-terms : gen-expr LOR
|
;
gen-term : gen-factors gen-factor
;
gen-factors : gen-term LAND
|
;
gen-factor : LNOT first-gen-factor
| first-gen-factor
;
first-gen-factor : simple-expr comparison
| simple-expr
;
comparison : compare-operator simple-expr
;
compare-operator : EQUAL
| NEQUAL
| LESS
| GREATER
| LEQUAL
| GEQUAL
;
simple-expr : expresion simple-term
;
expresion : simple-expr PLUS
|simple-expr MINUS
|
;
simple-term : mul-expr simple-parag
;
mul-expr: simple-term MULT
| simple-term DIV
| simple-term MOD
|
;
simple-parag : simple-prot-oros
| MINUS simple-prot-oros
;
simple-prot-oros : ID
| constant
| func-call
| LB1 gen-expr RB1
;
constant : DIGIT
| STRING1
| TRUE
| FALSE
;
newline:NEWLINE
|
;
%%
void yyerror(char const *msg)
{
errNum++;
fprintf(stderr, "%s\n", msg);
}
int main(int argc, char **argv)
{
++argv;
--argc;
if ( argc > 0 )
{yyin= fopen( argv[0], "r" ); }
else
{yyin = stdin;
yyout = fopen ( "output", "w" );}
int a = yyparse();
if(a==0)
{printf("Done parsing\n");}
else
{printf("Yparxei lathos sti grammi: %d\n", n);}
printf("Estimated number of errors: %d\n", errNum);
return 0;
}
for a simple input like this :
void main()
integer k;
boolean l;
begin
aek=32;
end
i get the following :
$ ./MyParser.exe file2.txt
void , id ,left bracket , right bracket
integer , id ,semicolon
boolean , id ,semicolon
BEGIN PROGRAM
id ,equals , digit ,semicolon
END PROGRAM
syntax error
Yparxei lathos sti grammi: 8
Estimated number of errors: 1
And whatever change i make to the input file i get a syntax error at the end....Why do i get this and what can i do??thanks a lot in advance!here is the flex file just in case someone needs it :
%{
#include "projectbison.tab.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int n=1;
%}
%option noyywrap
digit [0-9]+
id [a-zA-Z][a-zA-Z0-9]*
%%
"(" {printf("left bracket , "); return LB1;}
")" {printf("right bracket\n"); return RB1;}
"{" {printf("left curly bracket , "); return LCB1;}
"}" {printf("right curly bracket\n"); return RCB1;}
"==" {printf("isotita ,"); return EQUAL;}
"!=" {printf("diafora ,"); return NEQUAL;}
"<" {printf("less_than ,"); return LESS;}
">" {printf("greater_than ,"); return GREATER;}
"<=" {printf("less_eq ,"); return LEQUAL;}
">=" {printf("greater_eq ,"); return GEQUAL;}
"||" {printf("lor\n"); return LOR;}
"&&" {printf("land\n"); return LAND;}
"&" {printf("and ,"); return AND;}
"!" {printf("lnot ,"); return LNOT;}
"+" {printf("plus ,"); return PLUS; }
"-" {printf("minus ,"); return MINUS;}
"*" {printf("multiply ,"); return MULT;}
"/" {printf("division ,"); return DIV;}
"%" {printf("mod ,"); return MOD;}
";" {printf("semicolon \n"); return SEMIC;}
"=" {printf("equals , "); return EQ;}
"," {printf("comma ,"); return COMMA;}
"\n" {n++; return NEWLINE;}
void {printf("void ,"); return VOID;}
return {printf("return ,"); return RETURN1;}
extern {printf("extern\n"); return EXTERN;}
integer {printf("integer ,"); return INTEGER;}
boolean {printf("boolean ,"); return BOOLEAN;}
string {printf("string ,"); return STRING;}
begin {printf("BEGIN PROGRAM\n"); return BEGINP;}
end {printf("END PROGRAM\n"); return ENDP;}
for {printf("for\n"); return FOR1;}
true {printf("true ,"); return TRUE;}
false {printf("false ,"); return FALSE;}
if {printf("if\n"); return IF1; }
else {printf("else\n"); return ELSE1; }
while {printf("while\n"); return WHILE1;}
{id} {printf("id ,"); return ID;}
{digit} {printf("digit ,"); return DIGIT;}
[a-zA-Z0-9]+ {return STRING1;}
` {/*catchcall*/ printf("Mystery character %s\n", yytext); }
<<EOF>> { static int once = 0; return once++ ? 0 : '\n'; }
%%
Your scanner pretty well guarantees that two newline characters will be sent at the end of the input: one from the newline present in the input, and another one as a result of your trapping <<EOF>>. However, your grammar doesn't appear to accept unexpected newlines, so the second newline will trigger a syntax error.
The simplest solution would be to remove the <<EOF>> rule, since text files without a terminating newline are very rare, and it is entirely legitimate to consider them syntax errors. A more general solution would be to allow any number of newline characters to appear where a newline is expected, by defining something like:
newlines: '\n' | newlines '\n';
(Using actual characters for single-character tokens makes your grammar much more readable, and simplifies your scanner. But that's a side issue.)
You might also ask yourself whether you really need to enforce newline terminators, since your grammar seems to use ; as a statement terminator, making the newline redundant (aside from stylistic considerations). Removing newlines from the grammar (and ignoring them, as with other whitespace, in the scanner) will also simplify your code.