Vizpiler
An interactive visual tutorial on lexing/parsing in compilers
Credits to:
EPaperPress
and
Andrew Appel's Modern Compiler Implementation in C
for the tutorial text and much of the lex/yacc code. Also thanks to the developers of
Reflex
for allowing the lexical analysis stepper.
Lexer Theory
Lexer Code
Lexer Visual
Parser Theory
Parser Code
Parser Visual
AST Code
AST Visual
Interpret
%{ #include
typedef enum { typeCon, typeId, typeOpr } nodeEnum; /* constants */ typedef struct { int value; /* value of constant */ } conNodeType; /* identifiers */ typedef struct { int i; /* subscript to sym array */ } idNodeType; /* operators */ typedef struct { int oper; /* operator */ int nops; /* number of operands */ struct nodeTypeTag *op[1]; /* operands, extended at runtime */ } oprNodeType; typedef struct nodeTypeTag { nodeEnum type; /* type of node */ union { conNodeType con; /* constants */ idNodeType id; /* identifiers */ oprNodeType opr; /* operators */ }; } nodeType; extern int sym[26]; #include "y.tab.h" void yyerror(char *); %} %% [a-z] { yylval.sIndex = *yytext - 'a'; return VARIABLE; } 0 { yylval.iValue = atoi(yytext); return INTEGER; } [1-9][0-9]* { yylval.iValue = atoi(yytext); return INTEGER; } [-()<>=+*/;{}.] { return *yytext; } ">=" return GE; "<=" return LE; "==" return EQ; "!=" return NE; "while" return WHILE; "if" return IF; "else" return ELSE; "print" return PRINT; [ \t\n]+ ; /* ignore whitespace */ . yyerror("Unknown character"); %% int yywrap(void) { return 1; }
Compile
Enter the
flex
code you want to visualize and then compile it!
#include
#include
#include
typedef enum { typeCon, typeId, typeOpr } nodeEnum; /* constants */ typedef struct { int value; /* value of constant */ } conNodeType; /* identifiers */ typedef struct { int i; /* subscript to sym array */ } idNodeType; /* operators */ typedef struct { int oper; /* operator */ int nops; /* number of operands */ struct nodeTypeTag *op[1]; /* operands, extended at runtime */ } oprNodeType; typedef struct nodeTypeTag { nodeEnum type; /* type of node */ union { conNodeType con; /* constants */ idNodeType id; /* identifiers */ oprNodeType opr; /* operators */ }; } nodeType; #include "y.tab.h" /* yacc-generated parser header */ extern int sym[26]; /* Print label associated with node */ void printLabel(nodeType **p); /* Recursive drawing of the syntax tree */ void exNode(nodeType **p); /* Main entry point of the manipulation of the syntax tree */ int ex(nodeType *p) { printf("digraph AST {\n"); exNode(&p); printf("}\n"); return 0; } /* Recursive drawing of the syntax tree */ void exNode(nodeType **p) { int k; /* child number */ /* Add string label */ printLabel(p); /* Add arrows to children */ for (k = 0; k < (*p)->opr.nops; k++) printf("\"%p\"->\"%p\";\n", (void*)p, (void*)&((*p)->opr.op[k])); /* Process children */ for (k = 0; k < (*p)->opr.nops; k++) exNode(&((*p)->opr.op[k])); } /* Print label associated with node */ void printLabel(nodeType **p) { switch((*p)->type) { case typeCon: printf("\"%p\" [label=\"NUM(%d)\"];\n", (void*)p, (*p)->con.value); break; case typeId: printf("\"%p\" [label=\"ID(%c)\"];\n", (void*)p, (*p)->id.i + 'A'); break; case typeOpr: switch((*p)->opr.oper){ case WHILE: printf("\"%p\" [label=\"while\"];\n", (void*)p); break; case IF: printf("\"%p\" [label=\"if\"];\n", (void*)p); break; case PRINT: printf("\"%p\" [label=\"print\"];\n", (void*)p); break; case ';': printf("\"%p\" [label=\";\"];\n", (void*)p); break; case '=': printf("\"%p\" [label=\"=\"];\n", (void*)p); break; case UMINUS: printf("\"%p\" [label=\"_\"];\n", (void*)p); break; case '+': printf("\"%p\" [label=\"+\"];\n", (void*)p); break; case '-': printf("\"%p\" [label=\"-\"];\n", (void*)p); break; case '*': printf("\"%p\" [label=\"*\"];\n", (void*)p); break; case '/': printf("\"%p\" [label=\"/\"];\n", (void*)p); break; case '<': printf("\"%p\" [label=\"<\"];\n", (void*)p); break; case '>': printf("\"%p\" [label=\">\"];\n", (void*)p); break; case GE: printf("\"%p\" [label=\">=\"];\n", (void*)p); break; case LE: printf("\"%p\" [label=\"<=\"];\n", (void*)p); break; case NE: printf("\"%p\" [label=\"!=\"];\n", (void*)p); break; case EQ: printf("\"%p\" [label=\"==\"];\n", (void*)p); break; } break; } }
Compile
Enter the
C
code to generate your DOT graph and then compile it!
%{ #include
#include
#include
typedef enum { typeCon, typeId, typeOpr } nodeEnum; /* constants */ typedef struct { int value; /* value of constant */ } conNodeType; /* identifiers */ typedef struct { int i; /* subscript to sym array */ } idNodeType; /* operators */ typedef struct { int oper; /* operator */ int nops; /* number of operands */ struct nodeTypeTag *op[1]; /* operands, extended at runtime */ } oprNodeType; typedef struct nodeTypeTag { nodeEnum type; /* type of node */ union { conNodeType con; /* constants */ idNodeType id; /* identifiers */ oprNodeType opr; /* operators */ }; } nodeType; /* prototypes */ nodeType *opr(int oper, int nops, ...); nodeType *id(int i); nodeType *con(int value); void freeNode(nodeType *p); int ex(nodeType *p); int yylex(void); void yyerror(char *s); int sym[26]; /* symbol table */ %} %union { int iValue; /* integer value */ char sIndex; /* symbol table index */ nodeType *nPtr; /* node pointer */ }; %token
INTEGER %token
VARIABLE %token WHILE IF PRINT %nonassoc IFX %nonassoc ELSE %left GE LE EQ NE '>' '<' %left '+' '-' %left '*' '/' %nonassoc UMINUS %type
stmt expr stmt_list %% program: function { exit(0); } ; function: function stmt { ex($2); freeNode($2); } | /* NULL */ ; stmt: ';' { $$ = opr(';', 2, NULL, NULL); } | expr ';' { $$ = $1; } | PRINT expr ';' { $$ = opr(PRINT, 1, $2); } | VARIABLE '=' expr ';' { $$ = opr('=', 2, id($1), $3); } | WHILE '(' expr ')' stmt { $$ = opr(WHILE, 2, $3, $5); } | IF '(' expr ')' stmt %prec IFX { $$ = opr(IF, 2, $3, $5); } | IF '(' expr ')' stmt ELSE stmt { $$ = opr(IF, 3, $3, $5, $7); } | '{' stmt_list '}' { $$ = $2; } ; stmt_list: stmt { $$ = $1; } | stmt_list stmt { $$ = opr(';', 2, $1, $2); } ; expr: INTEGER { $$ = con($1); } | VARIABLE { $$ = id($1); } | '-' expr %prec UMINUS { $$ = opr(UMINUS, 1, $2); } | expr '+' expr { $$ = opr('+', 2, $1, $3); } | expr '-' expr { $$ = opr('-', 2, $1, $3); } | expr '*' expr { $$ = opr('*', 2, $1, $3); } | expr '/' expr { $$ = opr('/', 2, $1, $3); } | expr '<' expr { $$ = opr('<', 2, $1, $3); } | expr '>' expr { $$ = opr('>', 2, $1, $3); } | expr GE expr { $$ = opr(GE, 2, $1, $3); } | expr LE expr { $$ = opr(LE, 2, $1, $3); } | expr NE expr { $$ = opr(NE, 2, $1, $3); } | expr EQ expr { $$ = opr(EQ, 2, $1, $3); } | '(' expr ')' { $$ = $2; } ; %% nodeType *con(int value) { nodeType *p; /* allocate node */ if ((p = malloc(sizeof(nodeType))) == NULL) yyerror("out of memory"); /* copy information */ p->type = typeCon; p->con.value = value; return p; } nodeType *id(int i) { nodeType *p; /* allocate node */ if ((p = malloc(sizeof(nodeType))) == NULL) yyerror("out of memory"); /* copy information */ p->type = typeId; p->id.i = i; return p; } nodeType *opr(int oper, int nops, ...) { va_list ap; nodeType *p; int i; /* allocate node, extending op array */ if ((p = malloc(sizeof(nodeType) + (nops-1) * sizeof(nodeType *))) == NULL) yyerror("out of memory"); /* copy information */ p->type = typeOpr; p->opr.oper = oper; p->opr.nops = nops; va_start(ap, nops); for (i = 0; i < nops; i++) p->opr.op[i] = va_arg(ap, nodeType*); va_end(ap); return p; } void freeNode(nodeType *p) { int i; if (!p) return; if (p->type == typeOpr) { for (i = 0; i < p->opr.nops; i++) freeNode(p->opr.op[i]); } free (p); } void yyerror(char *s) { fprintf(stdout, "%s\n", s); } int main(void) { yyparse(); return 0; }
Compile
Enter the
yacc
code you want to visualize and then compile it!
Input your test code here:
a = 5+120
Start Lexing
Step Next
Step to End
Input your test code here:
print(5 + 5 + 60 * 2 / 3 - 1);
Get AST
#include
typedef enum { typeCon, typeId, typeOpr } nodeEnum; /* constants */ typedef struct { int value; /* value of constant */ } conNodeType; /* identifiers */ typedef struct { int i; /* subscript to sym array */ } idNodeType; /* operators */ typedef struct { int oper; /* operator */ int nops; /* number of operands */ struct nodeTypeTag *op[1]; /* operands, extended at runtime */ } oprNodeType; typedef struct nodeTypeTag { nodeEnum type; /* type of node */ union { conNodeType con; /* constants */ idNodeType id; /* identifiers */ oprNodeType opr; /* operators */ }; } nodeType; extern int sym[26]; #include "y.tab.h" int ex(nodeType *p) { if (!p) return 0; switch(p->type) { case typeCon: return p->con.value; case typeId: return sym[p->id.i]; case typeOpr: switch(p->opr.oper) { case WHILE: while(ex(p->opr.op[0])) ex(p->opr.op[1]); return 0; case IF: if (ex(p->opr.op[0])) ex(p->opr.op[1]); else if (p->opr.nops > 2) ex(p->opr.op[2]); return 0; case PRINT: printf("%d\n", ex(p->opr.op[0])); return 0; case ';': ex(p->opr.op[0]); return ex(p->opr.op[1]); case '=': return sym[p->opr.op[0]->id.i] = ex(p->opr.op[1]); case UMINUS: return -ex(p->opr.op[0]); case '+': return ex(p->opr.op[0]) + ex(p->opr.op[1]); case '-': return ex(p->opr.op[0]) - ex(p->opr.op[1]); case '*': return ex(p->opr.op[0]) * ex(p->opr.op[1]); case '/': return ex(p->opr.op[0]) / ex(p->opr.op[1]); case '<': return ex(p->opr.op[0]) < ex(p->opr.op[1]); case '>': return ex(p->opr.op[0]) > ex(p->opr.op[1]); case GE: return ex(p->opr.op[0]) >= ex(p->opr.op[1]); case LE: return ex(p->opr.op[0]) <= ex(p->opr.op[1]); case NE: return ex(p->opr.op[0]) != ex(p->opr.op[1]); case EQ: return ex(p->opr.op[0]) == ex(p->opr.op[1]); } } return 0; }
Input your test code here:
print(5 + 5 + 60 * 2 / 3 - 1);
Compile
Interpret