使用flex和bison实现的sql引擎解析
发布时间:2020-12-15 03:40:36 所属栏目:百科 来源:网络整理
导读:由于老师要求,最近在做oceanbase存储过程的实现,在oceanbase 0.4以前是不支持存储过程的。实现的主要步骤主要包括 1、语法解析 2、词法解析 3、具体执行语法树的步骤 现在先来说说语法解析吧,在这一块主要是使用的flex( 词法分析器生成工具) 和bison(语
由于老师要求,最近在做oceanbase存储过程的实现,在oceanbase 0.4以前是不支持存储过程的。实现的主要步骤主要包括 1、语法解析2、词法解析3、具体执行语法树的步骤现在先来说说语法解析吧,在这一块主要是使用的flex( 词法分析器生成工具) 和bison(语法分析器生成器) 这两个是对用户输入的存储过程语句进行解析的 来具体说说该怎么实现对sql语句的分析吧 1、首先建立一个lex的文件 %option noyywrap nodefault yylineno case-insensitive %{ #include "prosql.tab.hpp" #include <stdarg.h> #include <string.h> #include <stdlib.h> #include <stdio.h> #include <malloc.h> //YYSTYPE yylval; int oldstate; extern "C" int yylex(); //extern "C" int yyparse(); extern "C" void yyerror(const char *s,...); extern char globalInputText[10000]; extern int readInputForLexer( char *buffer,int *numBytesRead,int maxBytesToRead ); #undef YY_INPUT #define YY_INPUT(b,r,s) readInputForLexer(b,&r,s) %} %x COMMENT %% CREATE { return CREATE; } PROCEDURE { return PROCEDURE; } SQL { return SQL; } DECLARE { return DECLARE; } SET { return SET; } BEGIN { return BEGINT; } END { return END; } INT { return INT; } VARCHAR { return VARCHAR; } DATE { return DATE; } TIME { return TIME; } DOUBLE { return DOUBLE; } IF { return IF; } THEN { return THEN; } ELSE { return ELSE; } ENDIF { return ENDIF; } FOR { return FOR; } WHEN { return WHEN; } WHILE { return WHILE; } [0-9]+ { yylval.strval = strdup(yytext);/*printf("number=%sn",yylval.strval);*/ return INTNUM; }/*number*/ [0-9]+"."[0-9]* | "."[0-9]+ | [0-9]+E[-+]?[0-9]+ | [0-9]+"."[0-9]*E[-+]?[0-9]+ | "."[0-9]*E[-+]?[0-9]+ { yylval.strval = strdup(yytext);/*printf("float=%sn",yylval.strval);*/ return APPROXNUM; }/*double*/ TRUE { yylval.strval = "1";/*printf("bool=%sn",yylval.strval);*/ return BOOL; }/*bool*/ FALSE { yylval.strval = "0";/*printf("bool=%sn",yylval.strval);*/ return BOOL; }/*bool*/ '(.|''|[^'n])*' | "(.|""|[^"n])*" { char *temp = strdup(yytext); yylval.strval = strdup(yytext); //GetCorrectString(yylval.strval,temp); /*printf("string=%sn",yylval.strval);*/ return STRING; }/*string*/ '(.|[^'n])*$ { yyerror("Unterminated string %s",yytext); } "(.|[^"n])*$ { yyerror("Unterminated string %s",yytext); } X'[0-9A-F]+' | 0X[0-9A-F]+ { yylval.strval = strdup(yytext); return STRING; } 0B[01]+ | B'[01]+' { yylval.strval = strdup(yytext); return STRING; } [-+&~|^/%*(),.;!] { return yytext[0]; } "&&" { return ANDOP; } "||" { return OR; } "<" { yylval.subtok = 1; return COMPARISON; } ">" { yylval.subtok = 2; return COMPARISON; } "!=" | "<>" { yylval.subtok = 3; return COMPARISON; } "=" { yylval.subtok = 4; return COMPARISON; } "<=" { yylval.subtok = 5; return COMPARISON; } ">=" { yylval.subtok = 6; return COMPARISON; } "<=>" { yylval.subtok = 12; return COMPARISON; } "<<" { yylval.subtok = 1; return SHIFT; } ">>" { yylval.subtok = 2; return SHIFT; } [A-Za-z][A-Za-z0-9_]* { yylval.strval = strdup(yytext); /*printf("name 1=%sn",yylval.strval);*/ return NAME; } `[^`/.n]+` { yylval.strval = strdup(yytext+1); /*printf("name 2=%sn",yylval.strval);*/ yylval.strval[yyleng-2] = 0; return NAME; } `[^`n]*$ { yyerror("unterminated quoted name %s",yytext); } @[0-9a-z_.$]+ | @"[^"n]+" | @`[^`n]+` | @'[^'n]+' { yylval.strval = strdup(yytext+1); return USERVAR; } @"[^"n]*$ { yyerror("unterminated quoted user variable %s",yytext); } @`[^`n]*$ { yyerror("unterminated quoted user variable %s",yytext); } @'[^'n]*$ { yyerror("unterminated quoted user variable %s",yytext); } ":=" { return ASSIGN; } #.* ; "--"[ t].* ; "/*" { oldstate = YY_START; BEGIN COMMENT; } <COMMENT>"*/" { BEGIN oldstate; } <COMMENT>.|n ; <COMMENT><<EOF>> { yyerror("unclosed comment"); } [ tn] /* white space */ . { yyerror("mystery character '%c'",*yytext); } %%
? ? 接下来是对词的语法识别 ? %{ #include <stdlib.h> #include <stdarg.h> #include <string.h> #include <stdio.h> #include <malloc.h> char * parsetreeroot=NULL; extern "C" int yylex(); extern "C" int yyparse(); extern "C" void yyerror(const char *s,...); char globalInputText[10000]; int globalReadOffset; int readInputForLexer( char *buffer,int maxBytesToRead ); char * mystrcat(char *s1,char *s2) { char *p1=(char *)malloc(strlen(s1)+strlen(s2)+1); strcpy(p1,s1); strcat(p1,s2); return p1; } %} %locations %union { int intval; double floatval; char *strval; int subtok; } %token <strval> NAME %token <strval> STRING %token <strval> INTNUM %token <strval> BOOL %token <strval> APPROXNUM %token <strval> USERVAR %type <strval> stmt_root create_stmt para_list definition data_type pro_block pro_parameters declare_list set_list %type <strval> assign_var pro_body pro_stmt_list sql_stmt expr %right ASSIGN %left OR %left XOR %left ANDOP %left NOT '!' %left BETWEEN %left <subtok> COMPARISON /* = <> < > <= >= <=> */ %left '|' %left '&' %left <subtok> SHIFT /* << >> */ %left '+' '-' %left '*' '/' '%' MOD %left '^' %token CREATE %token PROCEDURE %token PRONAME %token DECLARE %token SET %token BEGINT %token END %token SQL %token INT %token VARCHAR %token DATE %token TIME %token DOUBLE %token IF %token NOT %token EXISTS %token THEN %token ELSE %token ENDIF %token FOR %token WHEN %token WHILE %start stmt_root %% stmt_root: create_stmt pro_block { $$=mystrcat($1,$2); parsetreeroot=$$;} ; create_stmt: CREATE PROCEDURE NAME '(' para_list ')' { char *temp=mystrcat("create procedure ",$3); temp=mystrcat(temp,"("); temp=mystrcat(temp,$5); $$=mystrcat(temp,")(create)n"); } ; /* opt_if_not_exists: { $$ = 0; } | IF NOT EXISTS { $$ = 1; } ; */ para_list: definition { $$=$1; } |definition ',' para_list { char *temp=mystrcat($1,","); $$=mystrcat(temp,$3); } ; definition: USERVAR data_type { char *temp=mystrcat($1," "); $$=mystrcat(temp,$2); } ; data_type: DATE {$$="date"; } | TIME {$$="time"; } | VARCHAR '(' INTNUM ')' {$$="varchar"; } | INT {$$="int"; } | DOUBLE {$$="double"; } ; pro_block: BEGINT pro_parameters pro_body END { char *temp=mystrcat("beginn",$2); temp=mystrcat(temp,""); temp=mystrcat(temp,$3); $$=mystrcat(temp,"end"); //printf("pro_body %sn",$3); } ; pro_parameters: declare_list ';' { $$=mystrcat($1,";(declare)n");} |pro_parameters declare_list ';' { char *temp=mystrcat($1,$2); $$=mystrcat(temp,";(declare)n"); } |pro_parameters set_list ';' { char *temp=mystrcat($1,";(set)n"); } ; declare_list: |DECLARE definition { $$=mystrcat("declare ",$2); } |declare_list ',' definition { char *temp=mystrcat($1,$3); } ; set_list: |SET assign_var { $$=mystrcat("set ",$2); } | set_list ',' assign_var { char *temp=mystrcat($1,$3); } ; assign_var : USERVAR COMPARISON expr { char *temp=mystrcat($1,"="); $$=mystrcat(temp,$3); } ; expr: NAME { $$=$1;} | STRING { $$=$1;} | INTNUM { $$=$1;} | APPROXNUM { $$=$1;} | BOOL { $$=$1;} ; pro_body : pro_stmt_list { $$=$1; } ; pro_stmt_list: sql_stmt {$$=$1; } |pro_stmt_list sql_stmt { $$=mystrcat($1,$2); } ; sql_stmt: |SQL NAME ';' { $$=mystrcat($2,";(sql)n");} ; %% /* int main(int argc,char* argv[]) { yyparse(); }*/ int readInputForLexer( char *buffer,int maxBytesToRead ) { int numBytesToRead = maxBytesToRead; int bytesRemaining = strlen(globalInputText)-globalReadOffset; int i; if ( numBytesToRead > bytesRemaining ) { numBytesToRead = bytesRemaining; } for ( i = 0; i < numBytesToRead; i++ ) { buffer[i] = globalInputText[globalReadOffset+i]; } *numBytesRead = numBytesToRead; globalReadOffset += numBytesToRead; return 0; } void yyerror(const char *s,...) { fprintf(stderr,"error: %sn",s); } void zzerror(const char *s,...) { extern int yylineno; va_list ap; va_start(ap,s); fprintf(stderr,"%d: error: ",yylineno); vfprintf(stderr,s,ap); fprintf(stderr,"n"); } int yywrap(void) { return 1; } char* getsql() { return parsetreeroot; }
这部分就是对上一个识别出来的词 进行顺序上的确定,构成一个完整的语法 ? ? 这些需要在linux环境下进行调试 bison -d 文件名 flex 文件名 (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |