- 快召唤伙伴们来围观吧
- 微博 QQ QQ空间 贴吧
- 文档嵌入链接
- 复制
- 微信扫一扫分享
- 已成功复制到剪贴板
flex and bison
展开查看详情
1 .flex and bison Prof. Steven A. Demurjian Computer Science & Engineering Department The University of Connecticut 371 Fairfield Way, Unit 2155 Storrs, CT 06269-3155 steve@engr.uconn.edu http://www.engr.uconn.edu/~steve (860) 486 - 4818
2 .Flex and bison Two Compiler Writing Tools that are Utilized to easily Specify: Lexical Tokens and their Order of Processing (Lex) Context Free Grammar for LALR(1) ( Yacc ) Both Lex and Yacc have Long History in Computing Lex and Yacc – Earliest Days of Unix Minicomputers Flex and Bison – From GNU JFlex - Fast Scanner Generator for Java BYacc /J – Berkeley CUP, ANTRL, PCYACC, … PCLEX and PCYACC from Abacus See: http://dinosaur.compilertools.net/
3 .Lex – A Lexical Analyzer Generator A Unix Utility from early 1970s A Compiler that Takes as Source a Specification for: Tokens/Patterns of a Language Generates a “C” Lexical Analyzer Program Pictorially: Lex Compiler C Compiler a.out Lex Source Program: lex.y lex.yy.c lex.yy.c a.out Input stream Sequence of tokens
4 .Format of a Lexical Specification – 3 Parts Declarations: Defs , Constants, Types, #includes, etc. that can Occur in a C Program Regular Definitions (expressions) Translation Rules: Pairs of (Regular Expression, Action) Informs Lexical Analyzer of Action when Pattern is Recognized Auxiliary Procedures: Designer Defined C Code Can Replace System Calls See Also http://www.cs.fsu.edu/~langley/COP4342-2006-Fall/17-programdevel04.pdf http://alumni.cs.ucr.edu/~lgao/teaching/flex.html Lex.y File Format: DECLARATIONS %% TRANSLATION RULES %% AUXILIARY PROCEDURES
5 .Format of a Lexical Specification – 3 Parts Declarations: Defs , Constants, Types, #includes, etc. that can Occur in a C Program Regular Definitions (expressions) Translation Rules: Pairs of (Regular Expression, Action) Informs Lexical Analyzer of Action when Pattern is Recognized Auxiliary Procedures: Designer Defined C Code Can Replace System Calls See Also http://www.cs.fsu.edu/~langley/COP4342-2006-Fall/17-programdevel04.pdf http://alumni.cs.ucr.edu/~lgao/teaching/flex.html Lex.y File Format: DECLARATIONS %% TRANSLATION RULES %% AUXILIARY PROCEDURES
6 ."then" { #ifdef PRNTFLG printf(" %s ", yytext); #endif return(T_THEN); } "<=" {printf(" %s ", yytext);return(T_EQ);} "<" {printf(" %s ", yytext);return(T_LT);} "<>" {printf(" %s ", yytext);return(T_NE);} ">=" {printf(" %s ", yytext);return(T_GE);} ">" {printf(" %s ", yytext);return(T_GT);} {id} {printf(" %s ", yytext);return(T_IDENTIFIER);} {integer} {printf(" %s ", yytext);return(T_INTEGER);} {real} {printf(" %s ", yytext);return(T_REAL);} {string} {printf(" %s ", yytext);return(T_STRING);} {comment} {/* T_COMMENT */} {ws} {/* spaces, tabs, newlines */} %% yywrap(){return 0;} main() { int i; do { i = yylex(); } while (i!=0); } Example lex.l File Conditional compilation action EOF for input Three Variables: yytext = “currenttoken” yylen = 12 yylval = 300 Token Definitions Discard
7 ."then" { #ifdef PRNTFLG printf(" %s ", yytext); #endif return(T_THEN); } "<=" {printf(" %s ", yytext);return(T_EQ);} "<" {printf(" %s ", yytext);return(T_LT);} "<>" {printf(" %s ", yytext);return(T_NE);} ">=" {printf(" %s ", yytext);return(T_GE);} ">" {printf(" %s ", yytext);return(T_GT);} {id} {printf(" %s ", yytext);return(T_IDENTIFIER);} {integer} {printf(" %s ", yytext);return(T_INTEGER);} {real} {printf(" %s ", yytext);return(T_REAL);} {string} {printf(" %s ", yytext);return(T_STRING);} {comment} {/* T_COMMENT */} {ws} {/* spaces, tabs, newlines */} %% yywrap(){return 0;} main() { int i; do { i = yylex(); } while (i!=0); } Example lex.l File Conditional compilation action EOF for input Three Variables: yytext = “currenttoken” yylen = 12 yylval = 300 Token Definitions Discard
8 .Other Possible Actions %% ":=" {return(T_ASSIGN);} "else" {return(T_ELSE);} "then" {return(T_THEN);} "<=" {yylval = T_EQ; return(T_EQ);} ... Etc.... ">" {yylval = T_GT; return(T_GT);} {id} {yylval = install_id(); return(T_IDENTIFIER);} {integer} {yylval = install_int(); return(T_INTEGER);} {real} {yylval = install_real(); return(T_REAL);} {comment} {/* T_COMMENT */} {ws} {/* spaces, tabs, newlines */} %% install_id() { /* A procedure to install the lexeme whose first character is pointed to by yytext and whose length is yylen into symbol table and return a pointer */ } install_int() { /* Similar – but installs an integer lexeme into symbol table */ } install_real() { /* Similar – but installs a real lexeme into symbol table */ }
9 .Revisiting Internal Variables in Lex char *yytext; Pointer to current lexeme terminated by ‘
10 .Using the lex Compiler Important Highlights Unix Lex defaults with respect to: Single Rule size (2048 bytes) All Actions (20480 bytes) DFA States (512) NFA States (254) Command Line: lex myfile.l Generates lex.yy.c pclex myfile.l Generates myfile.c -v flag Includes Statistics on State Machine, etc.
11 .Highlights Generated lex.yy.c File # define output (c) putc(c, yyout); # define input() ((( yytchar=yysptr>yysbug?U(*--yysptr); getc(yyin))==10? yylineno++, yytchar):yytchar)==EOF?0:yytchar) # define uput() (yttchar= (c);if (yytchar==‚
12 .Full lex.yy.c File # include "stdio.h" # define U(x) x # define NLSTATE yyprevious3YYNEWLINE # define BEGIN yybgin - yysvec + 1 + # define INITIAL 0 # define YYLERR yysvec # define YYSTATE (yyestate-yysvec-1) # define YYOPTIM 1 # define YYLMAX BUFSIZ # define output(c) putc(c,yyout) # define inputO (((yytchar-yysptr>yysbuf?U(*--yysptr): getc(yyin))--10?(yylineno++,yytchar):yytchar)--EOF?0:yytchar) # define unput(c) {yytchar= (c);if(yytchar=-n)yylineno--;*yysptr++-yytchar;} # define yymore () (yymorfg-1) # define ECHO fprintf(yyout, "%s",yytext) # define REJECT { nstr - yyreject(); goto yyfussy;} int yyleng; extern char yytext[]; int yymorfg; extern char *yysptr, yysbuf[]; int yytchar; FILE *yyin - {stdin}, *yyout - {stdout); extern int yylineno; struct yysvf { struct yywork *yystoff; struct yysvf *yyother; int *yystops; }; struct yysvf *yyestate; extern struct yysvf yysvec[], *yybgin;
13 .Full lex.yy.c File #define T_IDENTIFIER 300 #define T INTEGER 301 #define T_REAL 302 #define T STRING 303 #define T_ASSIGN 304 #define T ELSE 305 #define T_IF 306 #define T_THEN 307 #define T_EQ 308 #define T LT 309 #define T_NE 310 #define T GE 311 #define T_GT 312 #define YYNEWLINE 10 yylex ( ) { int nstr; extern int yyprevious; while((nstr - yylook()) >- 0) yyfussy: switch(nstr) { case 0: if(yywrap()) return(0); break; case 1: {printf(" %s ", yytext);return(TASSIGN);} break; case 2: {printf(" %s ", yytext);return(T_ELSE);} break; case 3: (printf(" %s ", yytext) ;return (T IF) ; } break;
14 .Full lex.yy.c File case 4: { #ifdef PRNTFLG printf(" %s ", yytext); #endif return(T_THEN); } break; case 5: {printf(" %s ", yytext);return(T_EQ);} break; case 6: {printf(" %s ", yytext);return(T_LT);} break; case 7: {printf(" %s ", yytext);return(T_NE);) break; case 8: {printf(" %s ", yytext);return(T_GE);} break; case 9: {printf(" %s ", yytext);return(T_GT);} break; case 10: {printf(" %s ", yytext);return(T_IDENTIFIER);} break; case 11: {printf(" %s ", yytext);return(T_INTEGER);) break; case 12: {printf(" %s ", yytext) ;return(T_REAL); } break; case 13: {printf(" %s ", yytext);return(T_STRING);}
15 .Full lex.yy.c File break; case 14: {/* T COMMENT */} break; case 15: {/* spaces, tabs, newlines */} break; case -1: break; default: fprintf(yyout,"bad switch yylook %d",nstr); ) return (0); } /* end of yylex */ yywrapO{} main() { int i; do { i = yylex(); } while (i!=0); }
16 .Full lex.yy.c File break; case 14: {/* T COMMENT */} break; case 15: {/* spaces, tabs, newlines */} break; case -1: break; default: fprintf(yyout,"bad switch yylook %d",nstr); ) return (0); } /* end of yylex */ yywrapO{} main() { int i; do { i = yylex(); } while (i!=0); }
17 .A Pascal lex.l "function" {return(T_FUNCTION);} /* "goto" {return(T_GOTO);} */ "if" {return(T_IF);} "label" {return(T_LABEL);} "nil" {return(T_NIL);} "not" {return(T_NOT);} "of" {return(T_OF);} /* "packed" {return(T_PACKED);} */ "procedure" {return(T_PROCEDURE);} "end" {return(T_END);} "program" {return(T_PROGRAM);} "record" {return(T_RECORD);} "repeat" {return(T_REPEAT);} "set" {return(T_SET);} "then" {return(T_THEN);} "to" {return(T_TO);} "type" {return(T_TYPE);} "until" {return(T_UNTIL);} "var" {return(T_VAR);} "while" {return(T_WHILE);} /* "with" {return(T_WITH);} */ "+" {return(T_PLUS);} "-" {return(T_MINUS);} "or" {return(T_OR);} "and" {return(T_AND);} "div" {return(T_DIV);} "mod" {return(T_MOD);} "/" {return(T_RDIV);}
18 .A Pascal lex.l "*" {return(T_MULT);} "(" {return(T_LPAREN);} ")" {return(T_RPAREN);} "=" {return(T_EQ);} "," {return(T_COMMA);} ".." {return(T_RANGE);} "." {return(T_PERIOD);} "[" {return(T_LBRACK);} "]" {return(T_RBRACK);} "<=" {return(T_EQ);} "<" {return(T_LT);} "<>" {return(T_NE);} ">=" {return(T_GE);} ">" {return(T_GT);} "in" {return(T_IN);} "^" {return(T_UPARROW);} ";" {return(T_SEMI);} {id} {return(T_IDENTIFIER);} {integer} {return(T_INTEGER);} {real} {return(T_REAL);} {string} {return(T_STRING);} {comment} {/* T_COMMENT */} {ws} {/* spaces, tabs, newlines */}
19 .Yacc - Yet Another Compiler Compiler https://www.slideshare.net/kinnarshah8888/ch4c Also from early 1970s A Compiler that Takes as Source a Specification for: Organization of Tokens into Grammar Rules Generates a LALR(1) Parser Pictorially:
20 .A Combined View http://slideplayer.com/slide/4937457/
21 .Bison Compiler Writing Tool that Generates LALR(1) Parser Grammar Rules (BNF) can be Modified/Augmented with Semantic Actions via Code Segments Can work in Conjunction with Lex or Separately Three Major Parts of a Bison Specification: Declarations %% Grammar Rules %% User Supplied Programs
22 .Bison Compiler Writing Tool that Generates LALR(1) Parser Grammar Rules (BNF) can be Modified/Augmented with Semantic Actions via Code Segments Can work in Conjunction with Lex or Separately Three Major Parts of a Bison Specification: Declarations %% Grammar Rules %% User Supplied Programs
23 .Bison Compiler Writing Tool that Generates LALR(1) Parser Grammar Rules (BNF) can be Modified/Augmented with Semantic Actions via Code Segments Can work in Conjunction with Lex or Separately Three Major Parts of a Bison Specification: Declarations %% Grammar Rules %% User Supplied Programs
24 .Stack Performs RM Derivation in Reverse DIGIT E E + T E + T * F E + T * DIGIT E + F * DIGIT E + DIGIT * DIGIT T + DIGIT * DIGIT F + DIGIT * DIGIT DIGIT + DIGIT * DIGIT F T E + E DIGIT + E F + E T + E * T + E DIGIT * T + E F * T + E T + E E
25 .Stack Performs RM Derivation in Reverse DIGIT E E + T E + T * F E + T * DIGIT E + F * DIGIT E + DIGIT * DIGIT T + DIGIT * DIGIT F + DIGIT * DIGIT DIGIT + DIGIT * DIGIT F T E + E DIGIT + E F + E T + E * T + E DIGIT * T + E F * T + E T + E E
26 .What are the LALR(1) Item Sets? State I 6 GOTO( I 0 , digit ) F → digit . GOTO( I 0 , T) State I 3 E → T . T → T . * F State I 0 L → . E $ E → . E + T E → . T T → . T * F T → . F F → . ( E ) F → . digit GOTO( I 0 , E) State I 2 L → E . $ E → E . + T GOTO( I 0 , F) State I 4 T → F . State I 5 GOTO( I 0 , ( ) F → ( . E ) E → . E + T E → . T T → . T * F T → . F F → . ( E ) F → . Id
27 .LALR State Machine ( yacc –v *.y) Generates y.output state 0 $accept : _line $end DIGIT shift 6 ( shift 5 . error line goto 1 expr goto 2 term goto 3 fact goto 4 state 1 $accept line_$end $end accept . error state 2 line : expr_ (1) expr : expr_+ term + shift 7 • reduce 1 state 3 expr : term_ (3) term : term_* fact * shift 8 . reduce 3 state 4 term : fact_ (5) . reduce 5 state 5 fact : (_expr ) DIGIT shift 6 ( shift 5 . error expr goto 9 term goto 3 fact goto 4 state 6 fact : DIGIT (7) . reduce 7 state 7 expr : expr +_term DIGIT shift 6 ( shift 5 . error term goto 10 U fact goto 4 state 8 term : term *_fact DIGIT shift 6 ( shift 5 . error fact goto 11
28 .LALR State Machine state 9 expr : expr_+ term fact : ( expr_) + shift 7 ) shift 12 • error state 10 expr : expr + term_ (2) term : term_* fact * shift 8 • reduce 2 state 11 term : term * fact_ (4) . reduce 4 state 12 fact : ( expr )_ (6) reduce 6 7/300 terminals, 4/300 nonterminals 8/600 grammar rules, 13/1000 states 0 shift/reduce, 0 reduce/reduce conflicts reported 8/350 working sets used memory: states,etc. 69/24000, parser 9/12000 9/600 distinct lookahead sets 4 extra closures 13 shift entries, 1 exceptions 7 goto entries 3 entries saved by goto default Optimizer space used: input 38/24000, output 218/12000 218 table entries, 205 zero maximum spread: 257, maximum offset: 43
29 .How Do These Relate to Item Sets? state 0 $accept : _line $end DIGIT shift 6 ( shift 5 . error line goto 1 expr goto 2 term goto 3 fact goto 4 state 1 $accept line_$end $end accept . error state 2 line : expr_ (1) expr : expr_+ term + shift 7 • reduce 1 state 3 expr : term_ (3) term : term_* fact * shift 8 . reduce 3 state 4 term : fact_ (5) . reduce 5