发布网友 发布时间:2022-04-14 16:48
共3个回答
懂视网 时间:2022-04-14 21:10
由于老师要求,最近在做oceanbase存储过程的实现,在oceanbase 0.4以前是不支持存储过程的。实现的主要步骤主要包括 1、语法解析 2、词法解析 3、具体执行语法树的步骤 现在先来说说语法解析吧,在这一块主要是使用的flex( 词法分析器生成工具) 和bison(语
由于老师要求,最近在做oceanbase存储过程的实现,在oceanbase 0.4以前是不支持存储过程的。实现的主要步骤主要包括
1、语法解析
2、词法解析
3、具体执行语法树的步骤
现在先来说说语法解析吧,在这一块主要是使用的flex( 词法分析器生成工具) 和bison(语法分析器生成器) 这两个是对用户输入的存储过程语句进行解析的
来具体说说该怎么实现对sql语句的分析吧
1、首先建立一个lex的文件
%option noyywrap nodefault yylineno case-insensitive %{ #include "prosql.tab.hpp" #include#include #include #include #include //YYSTYPE yylval; int oldstate; extern "C" int yylex(); //extern "C" int yyparse(); extern "C" void yyerror(const char *s, ...); extern char globalInputText[10000]; extern int readInputForLexer( char *buffer, int *numBytesRead, int maxBytesToRead ); #undef YY_INPUT #define YY_INPUT(b,r,s) readInputForLexer(b,&r,s) %} %x COMMENT %% CREATE { return CREATE; } PROCEDURE { return PROCEDURE; } SQL { return SQL; } DECLARE { return DECLARE; } SET { return SET; } BEGIN { return BEGINT; } END { return END; } INT { return INT; } VARCHAR { return VARCHAR; } DATE { return DATE; } TIME { return TIME; } DOUBLE { return DOUBLE; } IF { return IF; } THEN { return THEN; } ELSE { return ELSE; } ENDIF { return ENDIF; } FOR { return FOR; } WHEN { return WHEN; } WHILE { return WHILE; } [0-9]+ { yylval.strval = strdup(yytext);/*printf("number=%s ",yylval.strval);*/ return INTNUM; }/*number*/ [0-9]+"."[0-9]* | "."[0-9]+ | [0-9]+E[-+]?[0-9]+ | [0-9]+"."[0-9]*E[-+]?[0-9]+ | "."[0-9]*E[-+]?[0-9]+ { yylval.strval = strdup(yytext);/*printf("float=%s ",yylval.strval);*/ return APPROXNUM; }/*double*/ TRUE { yylval.strval = "1";/*printf("bool=%s ",yylval.strval);*/ return BOOL; }/*bool*/ FALSE { yylval.strval = "0";/*printf("bool=%s ",yylval.strval);*/ return BOOL; }/*bool*/ '(\.|''|[^' ])*' | "(\.|""|[^" ])*" { char *temp = strdup(yytext); yylval.strval = strdup(yytext); //GetCorrectString(yylval.strval, temp); /*printf("string=%s ",yylval.strval);*/ return STRING; }/*string*/ '(\.|[^' ])*$ { yyerror("Unterminated string %s", yytext); } "(\.|[^" ])*$ { yyerror("Unterminated string %s", yytext); } X'[0-9A-F]+' | 0X[0-9A-F]+ { yylval.strval = strdup(yytext); return STRING; } 0B[01]+ | B'[01]+' { yylval.strval = strdup(yytext); return STRING; } [-+&~|^/%*(),.;!] { return yytext[0]; } "&&" { return ANDOP; } "||" { return OR; } "<" { yylval.subtok = 1; return COMPARISON; } ">" { yylval.subtok = 2; return COMPARISON; } "!=" | "<>" { yylval.subtok = 3; return COMPARISON; } "=" { yylval.subtok = 4; return COMPARISON; } "<=" { yylval.subtok = 5; return COMPARISON; } ">=" { yylval.subtok = 6; return COMPARISON; } "<=>" { yylval.subtok = 12; return COMPARISON; } "<<" { yylval.subtok = 1; return SHIFT; } ">>" { yylval.subtok = 2; return SHIFT; } [A-Za-z][A-Za-z0-9_]* { yylval.strval = strdup(yytext); /*printf("name 1=%s ",yylval.strval);*/ return NAME; } `[^`/\. ]+` { yylval.strval = strdup(yytext+1); /*printf("name 2=%s ",yylval.strval);*/ yylval.strval[yyleng-2] = 0; return NAME; } `[^` ]*$ { yyerror("unterminated quoted name %s", yytext); } @[0-9a-z_.$]+ | @"[^" ]+" | @`[^` ]+` | @'[^' ]+' { yylval.strval = strdup(yytext+1); return USERVAR; } @"[^" ]*$ { yyerror("unterminated quoted user variable %s", yytext); } @`[^` ]*$ { yyerror("unterminated quoted user variable %s", yytext); } @'[^' ]*$ { yyerror("unterminated quoted user variable %s", yytext); } ":=" { return ASSIGN; } #.* ; "--"[ ].* ; "/*" { oldstate = YY_START; BEGIN COMMENT; } "*/" { BEGIN oldstate; } .| ; < > { yyerror("unclosed comment"); } [ ] /* white space */ . { yyerror("mystery character '%c'", *yytext); } %% 这一部分呢就是对 每个我们自定义的满足正则的识别
接下来是对词的语法识别
%{ #include#include #include #include #include char * parsetreeroot=NULL; extern "C" int yylex(); extern "C" int yyparse(); extern "C" void yyerror(const char *s, ...); char globalInputText[10000]; int globalReadOffset; int readInputForLexer( char *buffer, int *numBytesRead, int maxBytesToRead ); char * mystrcat(char *s1,char *s2) { char *p1=(char *)malloc(strlen(s1)+strlen(s2)+1); strcpy(p1,s1); strcat(p1,s2); return p1; } %} %locations %union { int intval; double floatval; char *strval; int subtok; } %token NAME %token STRING %token INTNUM %token BOOL %token APPROXNUM %token USERVAR %type stmt_root create_stmt para_list definition data_type pro_block pro_parameters declare_list set_list %type assign_var pro_body pro_stmt_list sql_stmt expr %right ASSIGN %left OR %left XOR %left ANDOP %left NOT '!' %left BETWEEN %left COMPARISON /* = <> < > <= >= <=> */ %left '|' %left '&' %left SHIFT /* << >> */ %left '+' '-' %left '*' '/' '%' MOD %left '^' %token CREATE %token PROCEDURE %token PRONAME %token DECLARE %token SET %token BEGINT %token END %token SQL %token INT %token VARCHAR %token DATE %token TIME %token DOUBLE %token IF %token NOT %token EXISTS %token THEN %token ELSE %token ENDIF %token FOR %token WHEN %token WHILE %start stmt_root %% stmt_root: create_stmt pro_block { $$=mystrcat($1,$2); parsetreeroot=$$;} ; create_stmt: CREATE PROCEDURE NAME '(' para_list ')' { char *temp=mystrcat("create procedure ",$3); temp=mystrcat(temp,"("); temp=mystrcat(temp,$5); $$=mystrcat(temp,")(create) "); } ; /* opt_if_not_exists: { $$ = 0; } | IF NOT EXISTS { $$ = 1; } ; */ para_list: definition { $$=$1; } |definition ',' para_list { char *temp=mystrcat($1,","); $$=mystrcat(temp,$3); } ; definition: USERVAR data_type { char *temp=mystrcat($1," "); $$=mystrcat(temp,$2); } ; data_type: DATE {$$="date"; } | TIME {$$="time"; } | VARCHAR '(' INTNUM ')' {$$="varchar"; } | INT {$$="int"; } | DOUBLE {$$="double"; } ; pro_block: BEGINT pro_parameters pro_body END { char *temp=mystrcat("begin ",$2); temp=mystrcat(temp,""); temp=mystrcat(temp,$3); $$=mystrcat(temp,"end"); //printf("pro_body %s ",$3); } ; pro_parameters: declare_list ';' { $$=mystrcat($1,";(declare) ");} |pro_parameters declare_list ';' { char *temp=mystrcat($1,$2); $$=mystrcat(temp,";(declare) "); } |pro_parameters set_list ';' { char *temp=mystrcat($1,$2); $$=mystrcat(temp,";(set) "); } ; declare_list: |DECLARE definition { $$=mystrcat("declare ",$2); } |declare_list ',' definition { char *temp=mystrcat($1,","); $$=mystrcat(temp,$3); } ; set_list: |SET assign_var { $$=mystrcat("set ",$2); } | set_list ',' assign_var { char *temp=mystrcat($1,","); $$=mystrcat(temp,$3); } ; assign_var : USERVAR COMPARISON expr { char *temp=mystrcat($1,"="); $$=mystrcat(temp,$3); } ; expr: NAME { $$=$1;} | STRING { $$=$1;} | INTNUM { $$=$1;} | APPROXNUM { $$=$1;} | BOOL { $$=$1;} ; pro_body : pro_stmt_list { $$=$1; } ; pro_stmt_list: sql_stmt {$$=$1; } |pro_stmt_list sql_stmt { $$=mystrcat($1,$2); } ; sql_stmt: |SQL NAME ';' { $$=mystrcat($2,";(sql) ");} ; %% /* int main(int argc, char* argv[]) { yyparse(); }*/ int readInputForLexer( char *buffer, int *numBytesRead, int maxBytesToRead ) { int numBytesToRead = maxBytesToRead; int bytesRemaining = strlen(globalInputText)-globalReadOffset; int i; if ( numBytesToRead > bytesRemaining ) { numBytesToRead = bytesRemaining; } for ( i = 0; i < numBytesToRead; i++ ) { buffer[i] = globalInputText[globalReadOffset+i]; } *numBytesRead = numBytesToRead; globalReadOffset += numBytesToRead; return 0; } void yyerror(const char *s, ...) { fprintf(stderr, "error: %s ", s); } void zzerror(const char *s, ...) { extern int yylineno; va_list ap; va_start(ap, s); fprintf(stderr, "%d: error: ", yylineno); vfprintf(stderr, s, ap); fprintf(stderr, " "); } int yywrap(void) { return 1; } char* getsql() { return parsetreeroot; } 这部分就是对上一个识别出来的词 进行顺序上的确定,构成一个完整的语法
这些需要在linux环境下进行调试
bison -d 文件名
flex 文件名
热心网友 时间:2022-04-14 18:18
作为移植版的 flex & bison for windows 似乎有问题,和正统的Flex/Bison工具输出有差异。运行的话,如果你有用main(int argc, char **argv)进行指定的话就按照你的格式进行调用,否则就用重定向工具:compiler.exe < code.txt热心网友 时间:2022-04-14 19:36
www.baidu.com