ICU正则表达式初试
发布时间:2020-12-14 02:06:10 所属栏目:百科 来源:网络整理
导读:学习了一下ICU下的正则表达式用法,随即编码一试,代码如下: #include iostream#include "unicode/utypes.h"#include "unicode/ucnv.h"#include "unicode/regex.h"using namespace std;int reg_exp_match(const char*pat,const char* sour);#pragma comment
学习了一下ICU下的正则表达式用法,随即编码一试,代码如下: #include <iostream> #include "unicode/utypes.h" #include "unicode/ucnv.h" #include "unicode/regex.h" using namespace std; int reg_exp_match(const char*pat,const char* sour); #pragma comment(lib,"icuuc.lib") #pragma comment(lib,"icuin.lib") int main() { /*int num = reg_exp_match("a[abc]c+[0-9]","abc6dfdacc6fdhaac8kjhdacc4s");*/ const char* sour = "ab2013-04-26dfd6f2012-12-09dhaac8kj2016-05-12hdacc4s"; /*int num = reg_exp_match("^(d{4})-(d{2}-(dd))$",sour);*/ int num = reg_exp_match( "[0-9]{2,4}-[0-9]{1,2}-[0-9]{1,2}",sour); cout<<num<<endl; system("pause"); return 0; } int reg_exp_match(const char*pat,const char* sour) { int num = 0; int LEN = strlen(sour); UConverter *cv = NULL; RegexPattern *REPattern = NULL;///正则表达式 RegexMatcher *REMatcher = NULL;//匹配器 UErrorCode status = U_ZERO_ERROR; UnicodeString patString(pat); //Unicode正则表达式组装,这些函数经常代替构造函数来创建RegexPattern对象 REPattern = RegexPattern::compile(patString,status); if (U_FAILURE(status)) { return 0; } //把母串转换为Unicode UnicodeString inputString(sour); //剔除字符串中的一些序列 UnicodeString unEscapedInput = inputString.unescape(); //创建一个正则表达式匹配器 REMatcher = REPattern->matcher(unEscapedInput,status); if (U_FAILURE(status)) { return 0; } //UnicodeString ss = REMatcher->input();//返回母串 //UnicodeString ss = REMatcher->refreshInputText("123",status); //UnicodeString another = (UnicodeString)"abc5fdhaac2kjhdacc1h"; //REMatcher->reset(another); ////用replaceStr替换母串中第一个的正则表达式字符串 /*UnicodeString substring = REMatcher->replaceFirst(replaceStr,status);*/ int32_t leng = inputString.length(); char *result = new char[4*leng]; memset(result,4*leng); cv = ucnv_open("GB18030",&status); ucnv_fromUChars(cv,result,4*leng,inputString.getBuffer(),leng,&status); ucnv_close(cv); int64_t pos = 0; LEN = strlen(result); printf("%sn",result); UnicodeString dest[10]; //该函数以模式串为分隔符将待处理的字符串分为几组存放到数组dest中 int32_t gp = REMatcher->split(inputString,dest,10,status); //int64_t pos = 0; //int32_t count = 0; //if(REMatcher->find()) //{ // //必须在find函数之后str才有值 // UnicodeString str = REMatcher->group(status); //} //groupCount函数根据正则表达式中的括号数目来计数的 //count = REMatcher->groupCount(); while(pos < LEN) { if(REMatcher->find(pos,status)) { /*if(REMatcher->find()) {*/ //必须在find函数之后str才有值 UnicodeString str = REMatcher->group(status); /*}*/ num++; pos = REMatcher->end64(status); } else { break; } } //count = REMatcher->groupCount(); return num; } (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |