??
这是较为简单的正则表达式引擎 目前只支持. + | 三种特殊字符 发现bug 请联系 已有封装好的函数,请直接使用 未来将逐渐更新
例子如图 // regulatr.cpp : 定义控制台应用程序的入口点。 //
/*______________________________________________________ regular expression engine written by 唐hz 2015.8.10 _______________________________________________________*/ #include "stdafx.h" #include<iostream> #include<vector> using namespace std; struct State; #define error -1//此处不确定 #define normal 1 #define finish 16 #define mul_way 2 #define start 8 #define any 32 #define error_back 64 #define self_loop1 4 struct Linker{ State *from; State *to; char request; }; struct LinkerList{ Linker l; struct LinkerList *next; }; struct State{ int status; int position; LinkerList * in; LinkerList * out; }; struct Position{ State* p; int i; };
State g_State[100]; int g_next[100]; int g_start,g_finish,g_maxi=-1; int g_can_not_any=-1;
void next(char *p){ int n=strlen(p); int i=1,j=0; g_next[j]=-1; while(i<n){ if(p[i]==p[j]) g_next[i++]=j++; else { if(j!=0) j=g_next[j-1]+1; else{ j=0; g_next[i++]=-1; } }
} }
void addlinker(LinkerList **a,char b,State *c,State *d){ if(*a==NULL){ (*a)=new LinkerList; (*a)->l.request=b; (*a)->l.from=c; (*a)->l.to=d; (*a)->next=NULL; return ; } LinkerList * temp=*a; while(temp->next!=NULL){ temp=temp->next; } temp->next=new LinkerList; temp=temp->next; temp->l.request=b; temp->l.from=c; temp->l.to=d; temp->next=NULL; return ; } void addlinker2(LinkerList **a,State *d){ if(*a==NULL){ (*a)=new LinkerList; (*a)->l.request=b; (*a)->l.from=c; (*a)->l.to=d; (*a)->next=NULL; return ; } LinkerList * temp=*a,*temp1; while(temp->next&&temp->next->next!=NULL){ temp=temp->next; } temp1=temp->next; temp->next=new LinkerList; temp=temp->next; temp->l.request=b; temp->l.from=c; temp->l.to=d; temp->next=temp1; return ; } void addlinker1(LinkerList **a,State *d){ if(*a==NULL){ (*a)=new LinkerList; (*a)->l.request=b; (*a)->l.from=c; (*a)->l.to=d; (*a)->next=NULL; return ; } LinkerList * temp; temp=new LinkerList; temp->l.request=b; temp->l.from=c; temp->l.to=d; temp->next=*a; *a=temp; return ; } void connect(State &a,State &b,char r,int x=0){ if(x==0){ addlinker(&(a.out),r,&a,&b); addlinker(&(b.in),&b);} else if(x==1){ addlinker1(&(a.out),&b); addlinker1(&(b.in),&b); } else if(x==2){ addlinker2(&(a.out),&b); addlinker2(&(b.in),&b); } }
void compile(char *p,char*q){ int i=0,j=0; while(i<(int)strlen(q)){ if(q[i]=='|'){ i+=2; } else if(q[i]=='+'){ i++; } else p[j++]=q[i++]; } p[j]=' '; }
void construction(char *p){ char q[100]; compile(q,p); next(q); int i=0; for(int j=0;j<100;j++){ g_State[j].in=g_State[j].out=NULL; g_State[j].status=0;// g_State[j].position=j; } g_State[0].status|=start; connect(g_State[0],g_State[0],error,0); while(*p!='n') { switch(*p){ case '|': if(i>0) i--; p++; g_State[i].status|=mul_way; break; case '+': i--; p--; connect(g_State[i],g_State[i],*p,2); g_State[i].status|=self_loop1; i++; p+=2; break; default: if(!(g_State[i].status&start)&&!(g_State[i].status&error_back)){ g_State[i].status|=error_back; connect(g_State[i],g_State[g_next[i-1]+1],0);} if(*p=='.'){ g_State[i].status|=any; } connect(g_State[i],g_State[i+1],1); g_State[i].status|=normal; i++; p++;} } g_State[i].status|=finish;
}//查int 字节 void realease(){ LinkerList *temp,*temp1; for(int i=0;i<100;i++) { temp=g_State[i].in; while(temp){ temp1=temp; temp=temp->next; delete temp1; } temp=g_State[i].out; while(temp){ temp1=temp; temp=temp->next; delete temp1; } } }
bool NextState(Position a,char *p,vector<Position> & find,int i){ auto temp=a.p->out; Position rec; bool j=false; while(temp){ if(temp->l.request!=error&&temp->l.request==*(p+i)||(temp->l.request=='.'&&a.p->status&any&&a.p->position!=g_can_not_any)){ j=true; if(temp->l.to->position==a.p->position&&temp->l.request!='.'&&a.p->status&any){ g_can_not_any=a.p->position; } rec.p=((temp->l).to); rec.i=i+1; if(rec.i<(int)strlen(p)) find.push_back(rec); } else { if((temp->l.request==error)&&(!(a.p->status&finish))&&!j){ if(((temp->l).to)->position<g_can_not_any){ cout<<"使g_can_not_any回复初值n"; g_can_not_any=-1;} rec.p=((temp->l).to); rec.i=i; if(!(rec.p->status&start)) find.push_back(rec); } } temp=temp->next; } if(j) return true;
return false;
} bool isexist( char *p){ vector<Position> find; Position temp; temp.p=g_State; temp.i=0; find.push_back(temp); g_start=1; g_finish=0; g_maxi=-1; bool canfinish=false; while(!find.empty()){ if(temp.p->status&finish) {g_finish=temp.i; break; } if(temp.i>g_maxi) g_maxi=temp.i; find.pop_back(); NextState(temp,p,find,temp.i); if(!find.empty()) temp=find.back(); else if(g_maxi<(int)strlen(p)){ temp.i=g_maxi+1; temp.p=g_State; g_start=temp.i+1;//人是从1数而机器从0数,故加1 find.push_back(temp); } } if(g_finish>=g_start) return true; return false; } void output(char *p,int a,int b){ cout<<"n位于"<<a<<"和"<<b<<"之间n"; for(int i=a-1;i<b;i++) cout<<p[i]; cout<<"n"; } void findWholeMatch(char *p){ char *str=p; int rec=0; while(isexist(p)&&rec<(int)strlen(str)){ output(str,g_start+rec,g_finish+rec); rec+=g_finish; p+=g_finish; }
} int main(){ char expression[100]; char content[1000]; fgets(expression,100,stdin); fgets(content,1000,stdin); construction(expression); cout<<"构建完成 n"; /* if(isexist(content)){ cout<<"存在n"; cout<<"开始在"<<g_start<<"结束在"<<g_finish<<"n"; output(content,g_start,g_finish); } else cout<<"不存在n"; */
findWholeMatch(content); system("pause"); realease();
}// regulatr.cpp : 定义控制台应用程序的入口点。 // /*______________________________________________________ regular expression engine written by 唐hz 2015.8.10 _______________________________________________________*/ #include "stdafx.h" #include<iostream> #include<vector> using namespace std; struct State; #define error -1//此处不确定 #define normal 1 #define finish 16 #define mul_way 2 #define start 8 #define any 32 #define error_back 64 #define self_loop1 4 struct Linker{ State *from; State *to; char request; }; struct LinkerList{ Linker l; struct LinkerList *next; }; struct State{ int status; int position; LinkerList * in; LinkerList * out; }; struct Position{ State* p; int i; }; State g_State[100]; int g_next[100]; int g_start,g_maxi=-1; int g_can_not_any=-1; void next(char *p){ int n=strlen(p); int i=1,j=0; g_next[j]=-1; while(i<n){ if(p[i]==p[j]) g_next[i++]=j++; else { if(j!=0) j=g_next[j-1]+1; else{ j=0; g_next[i++]=-1; } } } } void addlinker(LinkerList **a,State *d){ if(*a==NULL){ (*a)=new LinkerList; (*a)->l.request=b; (*a)->l.from=c; (*a)->l.to=d; (*a)->next=NULL; return ; } LinkerList * temp; temp=new LinkerList; temp->l.request=b; temp->l.from=c; temp->l.to=d; temp->next=*a; *a=temp; return ; } void connect(State &a,&b); } } void compile(char *p,j=0; while(i<(int)strlen(q)){ if(q[i]=='|'){ i+=2; } else if(q[i]=='+'){ i++; } else p[j++]=q[i++]; } p[j]=' '; } void construction(char *p){ char q[100]; compile(q,0); while(*p!='n') { switch(*p){ case '|': if(i>0) i--; p++; g_State[i].status|=mul_way; break; case '+': i--; p--; connect(g_State[i],1); g_State[i].status|=normal; i++; p++;} } g_State[i].status|=finish; }//查int 字节 void realease(){ LinkerList *temp,*temp1; for(int i=0;i<100;i++) { temp=g_State[i].in; while(temp){ temp1=temp; temp=temp->next; delete temp1; } temp=g_State[i].out; while(temp){ temp1=temp; temp=temp->next; delete temp1; } } } bool NextState(Position a,int i){ auto temp=a.p->out; Position rec; bool j=false; while(temp){ if(temp->l.request!=error&&temp->l.request==*(p+i)||(temp->l.request=='.'&&a.p->status&any&&a.p->position!=g_can_not_any)){ j=true; if(temp->l.to->position==a.p->position&&temp->l.request!='.'&&a.p->status&any){ g_can_not_any=a.p->position; } rec.p=((temp->l).to); rec.i=i+1; if(rec.i<(int)strlen(p)) find.push_back(rec); } else { if((temp->l.request==error)&&(!(a.p->status&finish))&&!j){ if(((temp->l).to)->position<g_can_not_any){ cout<<"使g_can_not_any回复初值n"; g_can_not_any=-1;} rec.p=((temp->l).to); rec.i=i; if(!(rec.p->status&start)) find.push_back(rec); } } temp=temp->next; } if(j) return true; return false; } bool isexist( char *p){ vector<Position> find; Position temp; temp.p=g_State; temp.i=0; find.push_back(temp); g_start=1; g_finish=0; g_maxi=-1; bool canfinish=false; while(!find.empty()){ if(temp.p->status&finish) {g_finish=temp.i; break; } if(temp.i>g_maxi) g_maxi=temp.i; find.pop_back(); NextState(temp,g_finish+rec); rec+=g_finish; p+=g_finish; } } int main(){ char expression[100]; char content[1000]; fgets(expression,g_finish); } else cout<<"不存在n"; */ findWholeMatch(content); system("pause"); realease(); }
采用了NFA,未来将逐步更新。 (编辑:李大同)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|