Bison generating unexpected token when grammar shoud be expecting it and other - grammar

I am writing a compiler for a formatting language and I am writing the bison file. My grammar is correct I think but when I added a recursion rule it and then read the test source file it says that it accepts the rule for the ending tag but that the token is unexpected... The thing is though is that before I added the recursion rule (for some tags inbetween the start and end tag) it worked fine... Here are some details
This is the source file
\begin{document}
\title{test}
\author{test}
\date{21/02/1985}
\pagesetup{35, 80}
\end{document}
This is the bison file
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
extern int yylex();
extern int yyparse();
extern FILE *yyin;
extern FILE *yyout;
extern int yylineno;
void yyerror(const char*);
int header_status(int,int,int,int,int);
// counters to check nubmer or document properties used, must all become 1
int title = 0;
int author = 0;
int date = 0;
int pgsetup = 0;
int tabsz = 0;
%}
%union{
int iVal;
char* sVal;
}
%error-verbose
%start source
%token <sVal> SLASH
%token <sVal> BLOCK_S BLOCK_E
%token <sVal> DOC LIST ENUM
%token <sVal> TITLE AUTHOR DATE PG_SETUP TAB_SZ SECTION PARAGRAPH ITEM LINE
%token <sVal> LBRACE RBRACE LPAREN RPAREN
%token <sVal> DOCUMENT DIMENSIONS DATE_VAL STRING
%token <iVal> NUMBER
%token <sVal> ERROR_UN ERROR_IL WORD
%%
source
: /* empty */
| entry_point doc_properties txt_properties exit_point
{
if ( header_status(title, author, date, pgsetup, tabsz) == 0 )
printf("\nfail\n"); //YYABORT;
}
;
entry_point
: SLASH BLOCK_S LBRACE DOC RBRACE
;
doc_properties
: /* empty */
| doc_properties header_properties
;
header_properties
: title_property { title++; }
| author_property { author++; }
| date_property { date++; }
| pg_setup_property { pgsetup++; }
| tab_sz_property { tabsz++; }
;
txt_properties
: /* empty */
;
title_property
: SLASH TITLE LBRACE STRING RBRACE
;
author_property
: SLASH AUTHOR LBRACE STRING RBRACE
;
date_property
: SLASH DATE LBRACE DATE_VAL RBRACE
;
pg_setup_property
: SLASH PG_SETUP LBRACE DIMENSIONS RBRACE
;
tab_sz_property
: SLASH TAB_SZ LPAREN NUMBER RPAREN
;
exit_point
: SLASH BLOCK_E LBRACE DOC RBRACE
;
%%
int main (int argc, char* argv[])
{
if ( argc < 2 || argc > 3)
{
fprintf(stdout, "%s: fatal error: needs one or two arguments\n\n\t%s inputFileName [outputFileName]\n\n", argv[0], argv[0]);
}
else if ( argc == 2 )
{
char* fn = (char *)calloc(strlen(argv[1])+12, sizeof(char));
strcpy(fn, argv[1]);
strcat(fn, ".output.txt");
fprintf(stderr, "%s: using default output naming: <%s>\n\n", argv[0], fn);
yyin = fopen(argv[1], "r");
yyout = fopen(fn, "w");
yyparse();
fclose(yyin);
fclose(yyout);
}
else if ( argc == 3 )
{
yyin = fopen(argv[1], "r");
yyout = fopen(argv[2], "w");
yyparse();
fclose(yyin);
fclose(yyout);
}
return 0;
}
void yyerror(const char* str)
{
fprintf(stderr,"syntax error[%d]: %s\n",yylineno, str);
}
int header_status(int title, int author, int date, int pgsetup, int tabsz)
{
if ( title == 1 && author == 1 && date == 1 && pgsetup == 1 && tabsz == 1 )
{
return 1;
}
else
{
if ( title > 1 ) fprintf(stderr,"syntax error: title property was declared more than once\n");
else if ( title < 1 ) fprintf(stderr,"syntax error: title property was not declared (all document properties must be present)\n");
if ( author > 1 ) fprintf(stderr,"syntax error: author property was declared more than once\n");
else if ( author < 1 ) fprintf(stderr,"syntax error: author property was not declared (all document properties must be present)\n");
if ( date > 1 ) fprintf(stderr,"syntax error: date property was declared more than once\n");
else if ( date < 1 ) fprintf(stderr,"syntax error: date property was not declared (all document properties must be present)\n");
if ( pgsetup > 1 ) fprintf(stderr,"syntax error: pagesetup property was declared more than once\n");
else if ( pgsetup < 1 ) fprintf(stderr,"syntax error: pagesetup property was not declared (all document properties must be present)\n");
if ( tabsz > 1 ) fprintf(stderr,"syntax error: title tabsize was declared more than once\n");
else if ( tabsz < 1 ) fprintf(stderr,"syntax error: title tabsize was not declared (all document properties must be present)\n");
return 0;
}
}
My problem I think is in
doc_properties
: /* empty */
| doc_properties header_properties
;
When I had it empty and just
\begin{document}
\end{document}
for the source file it was fine. Specifically the tokens would be
SLASH BLOCK_S LBRACE DOC RBRACE
SLASH BLOCK_E LBRACE DOC RBRACE
When I added the rule with the recursion though when it reached at the 'end' the trace would say that it accepted the rule (lexical) and then it generated a syntax error "unexpected BLOCK_E". The only thing I can think of is that it is expecting some other tag but in the recursion I have the empty as an alternative so why...
Also when I added the final tag
\begin{document}
\title{test}
\author{test}
\date{21/02/1985}
\pagesetup{35, 80}
\tabsize(4)
\end{document}
when it reached at 4 it says that is accepts the rule in the lex file and that rule
return NUMBER;
but it says unexpected $undefined, expecting NUMBER when it just said that it accepts the rule and frankly I don't think it could read anything else...
My question is for the first part though...
If it is any help this is the flex file
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "UnicTextLang.y.tab.h"
#define SAVE_S yylval.sVal = strdup(yytext)
#define SAVE_I yylval.iVal = atoi(yytext)
%}
WS [ \t\n\r]
TAG [a-zA-Z_][a-zA-Z0-9\-_]+
WORD [a-zA-Z0-9`~!##$%\^&*()\-_=+[\]{}\\|;:'",<.>/?]
NUMBER ([1-9])|([1-9][0-9])|([1-3][0-9][0-9])
DIMEN {NUMBER}{WS}*,{WS}*{NUMBER}
DAY (0[1-9])|([12][0-9])|(3[01])
MONTH (0[1-9])|(1[0-2])
YEAR (19|20)[0-9]{2}
DATE {DAY}\/{MONTH}\/{YEAR}
%option yylineno
%option noyywrap
%option noinput
%option nounput
%option debug
%x PROPERTY
%x VALUE
%x BLOCK
%x NUMBER
%%
^\\|{WS}\\ { BEGIN(PROPERTY); /* fprintf(stdout, "FLEX> BEGINING PROPERTY [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return SLASH; }
{WS}?\{ { BEGIN(VALUE); /* fprintf(stdout, "FLEX> READING PROPERTY VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return LBRACE; }
{WS}?\( { BEGIN(NUMBER); /* fprintf(stdout, "FLEX> READING NUMBER VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return LPAREN; }
{WS} { /* fprintf(stdout, "FLEX> EATING WHITESPACE(i)\n"); */ }
[^ \t\n\r\{(\\][^ \t\n\r]+ { fprintf(stderr, "lexical error[%d]: hingeless word: %s\n", yylineno, yytext); SAVE_S; return WORD; }
. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<PROPERTY>begin { BEGIN(BLOCK); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return BLOCK_S; }
<PROPERTY>end { BEGIN(BLOCK); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return BLOCK_E; }
<PROPERTY>title { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return TITLE; }
<PROPERTY>author { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return AUTHOR; }
<PROPERTY>date { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return DATE; }
<PROPERTY>pagesetup { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return PG_SETUP; }
<PROPERTY>tabsize { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return TAB_SZ; }
<PROPERTY>section { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return SECTION; }
<PROPERTY>paragraph { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return PARAGRAPH; }
<PROPERTY>item { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return ITEM; }
<PROPERTY>newline { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> \n\t%s\n\n", yytext); */ SAVE_S; return LINE; }
<PROPERTY>{TAG} { BEGIN(INITIAL); fprintf(stderr, "lexical error[%d]: |%s| undefined property: expecting property\n", yylineno, yytext); SAVE_S; return ERROR_UN; }
<PROPERTY>{WS} { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> EATING WHITESPACE(p)\n"); */ }
<PROPERTY>[^ \t\n\r\{(]+ { BEGIN(INITIAL); fprintf(stderr, "lexical error[%d]: |%s| undefined property: illegal character detected\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<PROPERTY>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<VALUE>{WS}*{DIMEN}{WS}* { /* fprintf(stdout, "FLEX> \n\tdims: %s\n\n", yytext); */ SAVE_S; return DIMENSIONS; }
<VALUE>{WS}*{DATE}{WS}* { /* fprintf(stdout, "FLEX> \n\tdate: %s\n\n", yytext); */ SAVE_S; return DATE_VAL; }
<VALUE>[^}]* { /* fprintf(stdout, "FLEX> \n\tstrg: %s\n\n", yytext); */ SAVE_S; return STRING; }
<VALUE>\} { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> FINISHED READING PROPERTY VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return RBRACE; }
<VALUE>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<NUMBER>{WS}*{NUMBER}{WS}* { /* fprintf(stdout, "FLEX> \n\tnumb: %s\n\n", yytext); */ SAVE_I; return NUMBER; }
<NUMBER>[^)]* { fprintf(stderr, "lexical error[%d]: |%s| illegal value: expecting number(1-399)\n", yylineno, yytext); SAVE_S; return STRING; }
<NUMBER>\) { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> FINISHED READING NUMBER VALUE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return RPAREN; }
<NUMBER>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
<BLOCK>{WS}?\{ { /* fprintf(stdout, "FLEX> READING BLOCK TYPE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return LBRACE; }
<BLOCK>{WS}*document{WS}* { /* fprintf(stdout, "FLEX> \n\tresv: %s\n\n", yytext); */ SAVE_S; return DOC; }
<BLOCK>{WS}*itemize{WS}* { /* fprintf(stdout, "FLEX> \n\tresv: %s\n\n", yytext); */ SAVE_S; return LIST; }
<BLOCK>{WS}*enumerate{WS}* { /* fprintf(stdout, "FLEX> \n\tresv: %s\n\n", yytext); */ SAVE_S; return ENUM; }
<BLOCK>[^{}]* { fprintf(stderr, "lexical error[%d]: |%s| undefined block type: expecting block type\n", yylineno, yytext); SAVE_S; return ERROR_UN; }
<BLOCK>\} { BEGIN(INITIAL); /* fprintf(stdout, "FLEX> FINISHED READING BLOCK TYPE [%d]: %s|\n", yylineno, yytext); */ SAVE_S; return RBRACE;}
<BLOCK>. { fprintf(stderr, "lexical error[%d]: illegal character detected: %s\n", yylineno, yytext); SAVE_S; return ERROR_IL; }
%%

The basic problem you have is that parser needs two-token lookahead to determine where the doc_properties end. This is because you recognize '\' as a separate token from the property string, so after seeing the input SLASH BLOCK_S with the next input token being SLASH, it doesn't know if it should reduce an empty txt_properties (anticipating a BLOCK_E after the SLASH), or shift into the header_properties rule in anticipation of matching a header property.
There are a number of what to deal with this. Perhaps the simplest would be to get rid of the SLASH token altogether, as it just serves to tell the lexer when to look for a property string. Get rid of the return SLASH; statement in the first lex action (so it doesn't return a token, but instead keeps looking for the property after the \ to return that token), and delete SLASH everywhere it appears in your grammar.
Another possibility is to unfactor the grammar to get rid of the epsilon rules (as they are what necessitate the early reductions that lead to shift/reduce conflicts). With no epsilon rules the parser can shift into a composite state where it is simultaneously recognizing multiple rules with identical prefixes on the RHS (this ability is the advantage of LR parsing over LL). To do this, you would have rules like:
source: /* empty */
| entry_point exit_point
| entry_point doc_properties exit_point
| entry_point txt_properties exit_point
| entry_point doc_properties txt_properties exit_point
;
and would change doc_properties and txt_properties to recognize 1-or-more rather than 0-or-more:
doc_properties: header_property
| doc_properties header_property
;

Related

Yacc %define parse.error verbose generates error

When I try to get more out of my "syntax error", I seem to use the way described on so many websites, but all seem to create their own errors, for some reason.
I was getting standard "syntax error" on line 5 of the Input file... so I wanted to add better error handling so I can see what exactly is the issue. But
%define parse.error verbose
However, it gives me this;
error: %define variable 'parse.error' is not used
Below are my files, as long as you keep it constructive, feel free to comment on more then just the error parts, any help is welcome :)
(As long as the errors get fixed as well :P )
Thanks in advance!
lex file;
%option nounput yylineno
%{
#include "yaccTest.tab.h"
void InvalidToken();
void extern yyerror (char *s);
%}
whitespace [ \t\r\v\f]
linefeed \n
%%
";" {return SEMICOLON;}
"=" {return EQ;}
"+" {return PLUS;}
"-" {return MINUS;}
"*" {return MULTIPLY;}
"/" {return DEVIDE;}
"(" {return BO;}
")" {return BC;}
"^" {return POWER;}
"print" {return PRINT;}
[a-zA-Z][a-zA-Z0-9]* {yylval.charValue = yytext[0]; return IDENTIFIER;}
[0-9]+ {yylval.intValue = atoi(yytext); return NUMBER;}
{whitespace} {;}
. {InvalidToken();}
%%
void yyerror(char *s) {
fprintf(stderr, "\nERROR ON LINE %d : \n %s\n", yylineno, s);
exit(0);
}
void InvalidToken(){
printf("ERROR ON LINE %d : \n Invalid Token %s\n", yylineno,yytext);
exit(0);
}
int yywrap (void) {return 1;}
yacc file;
%{
#include <stdio.h>
#include <stdlib.h>
int getVariableValue(char varID);
extern int yylineno;
int varIDs[52] = {0};
int varValues[52] = {0};
%}
%define parse.lac full
%define parse.error verbose
%union YYSTYPE {int intValue; char charValue;}
%token COLON SEMICOLON ST SE EQ GE GT PLUS MINUS MULTIPLY DEVIDE BO BC CBO CBC POWER LOOP PRINT
%token <intValue> NUMBER
%token <charValue> IDENTIFIER CHAR
%type <charValue> declaration expression
%type <intValue> numval
%right EQ
%left PLUS MINUS
%left MULTIPLY DEVIDE
%left POWER
%%
declaration : IDENTIFIER EQ expression
| declaration IDENTIFIER EQ expression
;
expression : numval SEMICOLON
| PRINT BO numval BC SEMICOLON {printf("Printing");}
;
numval : NUMBER {$$ = $1;}
| NUMBER PLUS NUMBER {$$ = $1 + $3;}
| NUMBER MINUS NUMBER {$$ = $1 - $3;}
| NUMBER MULTIPLY NUMBER {$$ = $1 * $3;}
| NUMBER DEVIDE NUMBER {$$ = $1 / $3;}
| NUMBER POWER NUMBER {int i;int j = $1;for(i = 1; i < $3; i++){j=j*$1;};$$ = j;}
;
%%
int getVariableValue(char varID) {
int i, j, localTemp;
for (i=0;i<((sizeof(varIDs)/sizeof(varIDs[0])));i++) {
if (varID == varIDs[i]) {
localTemp = varValues[i];
}
}
return localTemp;
}
int setVariableValue(char varID, int varValue) {
int i, varPresent = 0;
for (i=0;i<((sizeof(varIDs)/sizeof(varIDs[0])));i++) {
if (varID == varIDs[i]) {
varValues[i] = varValue;
varPresent = 1;
}
}
if (varPresent == 0) {
for (i=0;i<((sizeof(varIDs)/sizeof(varIDs[0])));i++) {
if (&(varIDs[i]) == NULL) {
if (&(varValues[i]) == NULL) {
varIDs[i] = varID;
varValues[i] = varValue;
}
else {
missingVarIDError(varID, varValue);
}
}
else {
notEnoughStorageError(varID, varValue);
}
}
}
}
int missingVarIDError(char *id, int val){
printf("\nERROR ON LINE %d : \nIdentifier '%s' not found, but assigned location DOES have a value; %s",yylineno,id,val);
exit(0);
}
int notEnoughStorageError(char *id, int val){
printf("\nERROR ON LINE %d : \nIdentifier '%s' did not fit in StorageArray, '%3' not stored!",yylineno,id,val);
exit(0);
}
int main (void) {
return yyparse ( );
return 0;
}
Input file;
x=4;
y=2+6;
X=2;
z=5;
print(4);

y.tab.c: undefined reference to yylex

I am trying to run an example I found online of a calculator. But I have this error showing every time I run my gcc command. Here are the commands that I run:
flex -l calc3.l
yacc -vd calc3.y
gcc y.tab.c -lm -ll
-> at this point I got this error message:
/tmp/ccPOq58f.o : In function 'yyparse':
y.tab.c: undefined reference to 'yylex'
collect2: error: ld returned 1 exit status
Here is my code:
calc3.l
%{
#include <stdlib.h>
#include "calc3.h"
#include "y.tab.h"
void yyerror(char *);
%}
%%
[a-z] {
yylval.sIndex = *yytext - 'a';
return VARIABLE;
}
0 {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[1-9][0-9]* {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[-()<>=+*/;{}.] {
return *yytext;
}
">=" return GE;
"<=" return LE;
"==" return EQ;
"!=" return NE;
"while" return WHILE;
"if" return IF;
"else" return ELSE;
"print" return PRINT;
[ \t\n]+ ; /* ignore whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
here is calc3.h
typedef enum { typeCon, typeId, typeOpr } nodeEnum;
/* constants */
typedef struct {
int value; /* value of constant */
} conNodeType;
/* identifiers */
typedef struct {
int i; /* subscript to sym array */
} idNodeType;
/* operators */
typedef struct {
int oper; /* operator */
int nops; /* number of operands */
struct nodeTypeTag **op; /* operands */
} oprNodeType;
typedef struct nodeTypeTag {
nodeEnum type; /* type of node */
union {
conNodeType con; /* constants */
idNodeType id; /* identifiers */
oprNodeType opr; /* operators */
};
} nodeType;
extern int sym[26];
and here is calc3.y
%{
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "calc3.h"
/* prototypes */
nodeType *opr(int oper, int nops, ...);
nodeType *id(int i);
nodeType *con(int value);
void freeNode(nodeType *p);
int ex(nodeType *p);
int yylex(void);
void yyerror(char *s);
int sym[26]; /* symbol table */
%}
%union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
};
%token <iValue> INTEGER
%token <sIndex> VARIABLE
%token WHILE IF PRINT
%nonassoc IFX
%nonassoc ELSE
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
%type <nPtr> stmt expr stmt_list
%%
program:
function { exit(0); }
;
function:
function stmt { ex($2); freeNode($2); }
| /* NULL */
;
stmt:
';' { $$ = opr(';', 2, NULL, NULL); }
| expr ';' { $$ = $1; }
| PRINT expr ';' { $$ = opr(PRINT, 1, $2); }
| VARIABLE '=' expr ';' { $$ = opr('=', 2, id($1), $3); }
| WHILE '(' expr ')' stmt { $$ = opr(WHILE, 2, $3, $5); }
| IF '(' expr ')' stmt %prec IFX { $$ = opr(IF, 2, $3, $5); }
| IF '(' expr ')' stmt ELSE stmt { $$ = opr(IF, 3, $3, $5, $7); }
| '{' stmt_list '}' { $$ = $2; }
;
stmt_list:
stmt { $$ = $1; }
| stmt_list stmt { $$ = opr(';', 2, $1, $2); }
;
expr:
INTEGER { $$ = con($1); }
| VARIABLE { $$ = id($1); }
| '-' expr %prec UMINUS { $$ = opr(UMINUS, 1, $2); }
| expr '+' expr { $$ = opr('+', 2, $1, $3); }
| expr '-' expr { $$ = opr('-', 2, $1, $3); }
| expr '*' expr { $$ = opr('*', 2, $1, $3); }
| expr '/' expr { $$ = opr('/', 2, $1, $3); }
| expr '<' expr { $$ = opr('<', 2, $1, $3); }
| expr '>' expr { $$ = opr('>', 2, $1, $3); }
| expr GE expr { $$ = opr(GE, 2, $1, $3); }
| expr LE expr { $$ = opr(LE, 2, $1, $3); }
| expr NE expr { $$ = opr(NE, 2, $1, $3); }
| expr EQ expr { $$ = opr(EQ, 2, $1, $3); }
| '(' expr ')' { $$ = $2; }
;
%%
nodeType *con(int value) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(nodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeCon;
p->con.value = value;
return p;
}
nodeType *id(int i) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(nodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeId;
p->id.i = i;
return p;
}
nodeType *opr(int oper, int nops, ...) {
va_list ap;
nodeType *p;
int i;
/* allocate node */
if ((p = malloc(sizeof(nodeType))) == NULL)
yyerror("out of memory");
if ((p->opr.op = malloc(nops * sizeof(nodeType *))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeOpr;
p->opr.oper = oper;
p->opr.nops = nops;
va_start(ap, nops);
for (i = 0; i < nops; i++)
p->opr.op[i] = va_arg(ap, nodeType*);
va_end(ap);
return p;
}
void freeNode(nodeType *p) {
int i;
if (!p) return;
if (p->type == typeOpr) {
for (i = 0; i < p->opr.nops; i++)
freeNode(p->opr.op[i]);
free (p->opr.op);
}
free (p);
}
void yyerror(char *s) {
fprintf(stdout, "%s\n", s);
}
int main(void) {
yyparse();
return 0;
}
If you just use
flex calc3.l
then flex produces a scanner called lex.yy.c. (I removed the -l option which was used in the original question. -l causes flex to be more compatible with certain aspects of the original lex utility, and it has no use except for compiling ancient lex scanners.)
Similarly, if you just use
yacc -vd calc3.y
the bison will produce files called y.tab.c and y.tab.h. And
gcc y.tab.c -lm -ll
will produce a file called a.out.
None of that is a good idea. It's far better to give the files meaningful names, based on the input filenames. All three of these tools understand a -o command-line flag which specifies the output name file.
So you could do this:
flex calc3.l
yacc -vd calc3.y
gcc lex.yy.c y.tab.c -lm -ll
But I'd recommend something like this:
flex -o calc3.lex.c calc3.l
bison -o calc3.tab.c -vd calc3.y
gcc -o calc3 calc3.lex.c calc3.tab.c -lm -ll
When you do this, you'll need to change the #include "y.tab.h" to #include "calc3.tab.h". (Note that if you invoke bison as bison rather than as yacc, it will automatically produce output files with names based on the grammar file. But it doesn't hurt to be explicit.)
Even better if you put it in a Makefile, or at least a script file.

Trouble with Flex error handling

The following flex file gives output that does not exit with a nonzero status when it encounters an error, like trying to write to /dev/full:
WS [ \t]+
%option noyywrap
%{
#include <stdio.h>
#include <stdlib.h>
int output(const char *);
%}
newline (\r|\n|\r\n|\n\r)
%%
#[^\r\n]*/{newline} {};
[^#]+ { output(yytext); }
<<EOF>> { output(yytext); return 0; }
%%
int main (void) {
while (yylex()) ;
return errno;
}
int output(const char *string)
{
int error;
if (fputs(string, stdout) == EOF ){
error = errno;
fprintf(stderr, "Output error: %s\n", strerror(error));
exit(errno);
}
return 0;
}
How do I fix this?
The problem was that I was not flushing stdout. Due to buffering, the printf succeeded, but the fflush failed.

Is there a better way to specify optional elements in rules of a CFG?

Consider a language and a compiler to design and develop for it.
In this language there is a particular statement that is part of the grammar: (=<identifier>). This piece can be recognized by the compiler. But spaces are allowed between the brackets and the equal sign and the identifier. So I have these possibilities:
(=<identifier>)
( = <identifier> )
(=identifier )
( =identifier )
...
Without considering the whole grammar but just the rules to handle this language feature, I have something like this (in a Bison-like syntax for grammar rules):
statement: OBRCKT EQ ID CBRCKT
| OBRCKT S EQ S ID S CBRCKT
| OBRCKT S EQ ID S CBRCKT
| OBRCKT S EQ S ID CBRCKT
| OBRCKT S EQ ID CBRCKT
| OBRCKT EQ S ID S CBRCKT
| OBRCKT EQ ID S CBRCKT
| OBRCKT EQ S ID CBRCKT
| ...
The space terminal S can appear or not. But the way rules are, I need to specify all possible combinations... Is there a better way to achieve this result?
As Jim commented, use your lexical tool to handle these cases instead of writing them into the productions of your grammar.
For example, I commonly use Flex for lexical analysis and Bison to define my grammar (probably as you have done).
You can achieve the result that you want with something like the following (this is just an example so it's pretty simple and cannot do much):
lexicalAnalyzer.l
/* lexicalAnalyzer.l
Specifications of tokens for some language.
*/
%{
%}
/*
* Definitions of regular expressions
* Note: You capture whitespace here...
*/
WSPACE [ \t\r]+ //We take care of the spaces here...
/*
* Tokens
*/
%%
"=" {
printf("TOKEN: EQ LEXEME: %s\n", yytext);
return T_EQ;
}
"(" {
printf("TOKEN: OBRCKT LEXEME: %s\n", yytext);
return T_OBRCKT;
}
")" {
printf("TOKEN: CBRCKT LEXEME: %s\n", yytext);
return T_CBRCKT;
}
"<" {
printf("TOKEN: LT LEXEME: %s\n", yytext);
return T_LT;
}
">" {
printf("TOKEN: GT LEXEME: %s\n", yytext);
return T_GT;
}
"identifier" {
printf("TOKEN: IDENT LEXEME: %s\n", yytext);
return T_IDENT;
}
{WSPACE} { }
. {
printf("TOKEN: UNKNOWN LEXEME: %s\n", yytext);
return T_UNKNOWN;
}
%%
syntaxAnalyzer.y
/*
syntaxAnalyzer.y
To create syntax analyzer:
flex file.l
bison file.y
g++ file.tab.c -o file_parser
file_parser < inputFileName
*/
/*
* Declaration section.
*/
%{
#include <stdio.h>
void printRule(const char *lhs, const char *rhs);
int yyerror(const char *s) {
printf("Error!");
}
extern "C" {
int yyparse(void);
int yylex(void);
int yywrap() {return 1;}
}
%}
/*
* Token declarations
*/
%token T_OBRCKT T_CBRCKT
%token T_LT T_GT T_EQ
%token T_IDENT T_UNKNOWN
/*
* Starting point.
*/
%start N_START
/*
* Translation rules.
*/
%%
N_START : N_STATEMENT
{
printRule("START", "STATEMENT");
printf("\n---- Completed parsing ----\n\n");
return 0;
}
;
N_STATEMENT : T_OBRCKT T_EQ T_LT T_IDENT T_GT T_CBRCKT
{
printRule("EXPR", "T_OBRCKT T_EQ T_LT T_IDENT T_GT T_CBRCKT");
}
;
%%
#include "lex.yy.c"
extern FILE *yyin;
void printRule(const char *lhs, const char *rhs) {
printf("%s -> %s\n", lhs, rhs);
return;
}
int main() {
do {
yyparse();
} while (!feof(yyin));
return 0;
}

correcting some simple logic errors in lex and yacc

Please i need help in solving those two simple logic errors that i am facing in my example.
Here are the details:
The Input File: (input.txt)
FirstName:James
LastName:Smith
normal text
The output File: (output.txt) - [with two logic errors]
The Name is: James
The Name is: LastName:Smith
The Name is: normal text
What I am expecting as output (instead of the above lines) - [without logical errors]
The Name is: James
The Name is: Smith
normal text
In other words, i don't want the lastName to be sent to output, and i want to match normal text as well if it is written after the "FirstName:" or "LastName:".
Here is my lex File (example.l):
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "y.tab.h"
/* prototypes */
void yyerror(const char*);
/* Variables: */
char *tempString;
%}
%START sBody
%%
"FirstName:" { BEGIN sBody; }
"LastName:" { BEGIN sBody; }
.? { return sNormalText; }
\n /* Ignore end of line */;
[ \t]+ /* Ignore whitespace */;
<sBody>.+ {
tempString = (char *)calloc(strlen(yytext)+1, sizeof(char));
strcpy(tempString, yytext);
yylval.sValue = tempString;
return sText;
}
%%
int main(int argc, char *argv[])
{
if ( argc < 3 )
{
printf("Please you need two args: inputFileName and outputFileName");
}
else
{
yyin = fopen(argv[1], "r");
yyout = fopen(argv[2], "w");
yyparse();
fclose(yyin);
fclose(yyout);
}
return 0;
}
Here is my yacc file: (example.y):
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "y.tab.h"
void yyerror(const char*);
int yywrap();
extern FILE *yyout;
%}
%union
{
int iValue;
char* sValue;
};
%token <sValue> sText
%token <sValue> sNormalText
%%
StartName: /* for empty */
| sName StartName
;
sName:
sText
{
fprintf(yyout, "The Name is: %s\n", $1);
}
|
sNormalText
{
fprintf(yyout, "%s\n", $1);
}
;
%%
void yyerror(const char *str)
{
fprintf(stderr,"error: %s\n",str);
}
int yywrap()
{
return 1;
}
Please if you can help me out correcting those simple logical errors, i will be grateful.
Thanks in advance for your help and for reading my post.
Part of the trouble is that you move into state 'sBody' but you never move back to the initial state 0.
Another problem - not yet a major one - is that you use a right-recursive grammar rule instead of the (natural for Yacc) left-recursive rule:
StartName: /* empty */
| sName StartName
;
vs
StartName: /* empty */
| StartName sName
;
Adding BEGIN 0; to the <sBody> Lex rule improves things a lot; the remaining trouble is that you get one more line 'Smith' in the output file for each single letter in the normal text. You need to review how the value is returned to your grammar.
By adding yylval.sValue = yytext; before the return in the rule that returns sNormalText, I got the 'expected' output.
example.l
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "y.tab.h"
/* prototypes */
void yyerror(const char*);
/* Variables: */
char *tempString;
%}
%START sBody
%%
"FirstName:" { puts("FN"); BEGIN sBody; }
"LastName:" { puts("LN"); BEGIN sBody; }
.? { printf("NT: %s\n", yytext); yylval.sValue = yytext; return sNormalText; }
\n /* Ignore end of line */;
[ \t]+ /* Ignore whitespace */;
<sBody>.+ {
tempString = (char *)calloc(strlen(yytext)+1, sizeof(char));
strcpy(tempString, yytext);
yylval.sValue = tempString;
puts("SB");
BEGIN 0;
return sText;
}
%%
int main(int argc, char *argv[])
{
if ( argc < 3 )
{
printf("Please you need two args: inputFileName and outputFileName");
}
else
{
yyin = fopen(argv[1], "r");
if (yyin == 0)
{
fprintf(stderr, "failed to open %s for reading\n", argv[1]);
exit(1);
}
yyout = fopen(argv[2], "w");
if (yyout == 0)
{
fprintf(stderr, "failed to open %s for writing\n", argv[2]);
exit(1);
}
yyparse();
fclose(yyin);
fclose(yyout);
}
return 0;
}
example.y
%{
#include <stdio.h>
#include "y.tab.h"
void yyerror(const char*);
int yywrap();
extern FILE *yyout;
%}
%union
{
char* sValue;
};
%token <sValue> sText
%token <sValue> sNormalText
%%
StartName: /* for empty */
| StartName sName
;
sName:
sText
{
fprintf(yyout, "The Name is: %s\n", $1);
}
|
sNormalText
{
fprintf(yyout, "The Text is: %s\n", $1);
}
;
%%
void yyerror(const char *str)
{
fprintf(stderr,"error: %s\n",str);
}
int yywrap()
{
return 1;
}
output.txt
The Name is: James
The Name is: Smith
The Text is: n
The Text is: o
The Text is: r
The Text is: m
The Text is: a
The Text is: l
The Text is:
The Text is: t
The Text is: e
The Text is: x
The Text is: t
It might make more sense to put yywrap() in with the lexical analyzer rather than with the grammar. I've left the terse debugging prints in the code - they helped me see what was going wrong.
FN
SB
LN
SB
NT: n
NT: o
NT: r
NT: m
NT: a
NT: l
NT:
NT: t
NT: e
NT: x
NT: t
You'll need to play with the '.?' rule to get normal text returned in its entirety. You may also have to move it around the file - start states are slightly peculiar critters. When I changed the rule to '.+', Flex gave me the warning:
example.l:25: warning, rule cannot be matched
example.l:27: warning, rule cannot be matched
These lines referred to the blank/tab and sBody rules. Moving the unqualified '.+' after the sBody rule removed the warnings, but didn't seem to do what was wanted. Have fun...