使用ICU库中正则表达式匹配关键字示例
发布时间:2020-12-14 01:55:57 所属栏目:百科 来源:网络整理
导读:#include iostream//#include "unicode/unistr.h"//#include unicode/ucsdet.h#include unicode/ucnv.h#include string.hifdef _DEBUG#ifdef linux#else#ifdef _WIN64#pragma comment(lib,"icuin64d.lib")#pragma comment(lib,"icuuc64d.lib")#else#pragma c
#include <iostream> //#include "unicode/unistr.h" //#include <unicode/ucsdet.h> #include <unicode/ucnv.h> #include <string.h> ifdef _DEBUG #ifdef linux #else #ifdef _WIN64 #pragma comment(lib,"icuin64d.lib") #pragma comment(lib,"icuuc64d.lib") #else #pragma comment(lib,"icuin32d.lib") #pragma comment(lib,"icuuc32d.lib") #endif #endif #else #ifdef linux #else #ifdef _WIN64 #pragma comment(lib,"icuin64.lib") #pragma comment(lib,"icuuc64.lib") #else #pragma comment(lib,"icuin32.lib") #pragma comment(lib,"icuuc32.lib") #endif #endif #endif int32_t BUFFSIZE = 8; int FindSubNum(UnicodeString USrcStr,UnicodeString USubStr,int index) { int32_t num = 0; int pos = USrcStr.indexOf(USubStr); while(pos != -1) { num++; pos += index; pos = USrcStr.indexOf(USubStr,pos); } return num; } int FindSubFromBuf(char* buff,int buflen,UnicodeString liststring) { if((buff == NULL) || liststring.isEmpty() || buflen == 0) { return 0; } UCharsetDetector* dector = NULL; UErrorCode status = U_ZERO_ERROR; UConverter *conv = NULL; const UCharsetMatch *csm = NULL; int num = 0; //先检测buff里的字符编码格式 dector = ucsdet_open(&status); if(status != U_ZERO_ERROR) { //std::cout<<"open charset detector failed!n"; ucsdet_close(dector); return 0; } ucsdet_setText(dector,buff,buflen,&status); if(status != U_ZERO_ERROR) { //std::cout<<"set fail!n"; ucsdet_close(dector); return 0; } csm = ucsdet_detect(dector,&status); const char* detected = ucsdet_getName(csm,&status); ucsdet_close(dector); //然后转化为Unicode编码进行比较 UChar *target = NULL; //指向存储转换后的字符串的结尾 UChar *targetLimit = NULL; //指向缓冲区尾部的指针 const char *source = NULL; //指向源代码页缓冲区 const char *sourceLimit = NULL; //指向缓冲区的尾部的字节 int32_t *offset = NULL; //表示什么也不做*/ int32_t numread = 0; //实际读了多少字节 int32_t buffsize = 0; conv = ucnv_open(detected,&status); if(status != U_ZERO_ERROR) { std::cout<<"open converter failed!n"; ucnv_close(conv); return 0; } buffsize = BUFFSIZE/ucnv_getMinCharSize(conv); char* read = buff; char* inbuf = new char[BUFFSIZE*sizeof(char) +1]; UChar* uBuf = new UChar[BUFFSIZE*sizeof(UChar) + 2]; memset(inbuf,BUFFSIZE*sizeof(char) + 1); memset(uBuf,BUFFSIZE*sizeof(UChar) + 2); UnicodeString readbuff= UnicodeString(""); UnicodeString tempUStr = UnicodeString(""); while((read-buff)<buflen) { memcpy(inbuf,read,BUFFSIZE); int fin_len = buflen-(read-buff); if (fin_len > BUFFSIZE) { fin_len = BUFFSIZE; } read = read + fin_len; numread = strlen(inbuf); /* UnicodeString readbuff;*/ source = inbuf; sourceLimit = inbuf + numread; do { target = uBuf; targetLimit = uBuf + buffsize;//分食 ucnv_toUnicode(conv,&target,targetLimit,&source,sourceLimit,NULL,(read-buff == buflen)?true:false,&status); if(status != U_ZERO_ERROR) { //std::cout<<"Convert fail!n"; if (uBuf) { delete [] uBuf; uBuf = NULL; } if (inbuf) { delete []inbuf; inbuf = NULL; } ucnv_close(conv); return 0; } } while(source < sourceLimit); //用uBuf初始化UnicString对象 readbuff = UnicodeString(uBuf); int32_t readbuflen = readbuff.length(); //第一次先给midUStr赋空,防止在进行 int32_t plen = readbuff.length(); UnicodeString temp = tempUStr + readbuff; int len = liststring.length(); num += FindSubNum(temp,liststring,len); //每保存一段内存块的回退字节就清空一次便于下一次继续存放 //保留本次的后 len-1个字符 tempUStr = temp.tempSubString(temp.length() - (len-1),len); memset(inbuf,BUFFSIZE+1); memset(uBuf,BUFFSIZE*sizeof(UChar)+2); } if (uBuf) { delete [] uBuf; uBuf = NULL; } if (inbuf) { delete []inbuf; inbuf = NULL; } ucnv_close(conv); return num; } (编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |