简介
?
??? Jakarta-ORO是最全面以及优化得最好的正则表达式API之一,Jakarta-ORO库以前叫做OROMatcher,是由Daniel F. Savarese编写,后来他赠给Jakarta Project。 ?
??? Jakarta-ORO正则表达式库支持Perl5兼容的正则表达式语法。 ?
??? 环境 jdk1.5.0_12, jakarta-oro-2.0.8?
?
-
public ? static ? void ?simpleContains()? throws ?Exception?{????
-
????Pattern?pattern?=?new ?Perl5Compiler().compile( "//d+" );????
-
????Perl5Matcher?matcher?=?new ?Perl5Matcher();????
-
????PatternMatcherInput?matcherInput?=?new ?PatternMatcherInput( "北京2008年8月08日20时" );????
-
????while ?(matcher.contains(matcherInput,?pattern))?{????
-
????????MatchResult?result?=?matcher.getMatch();????
-
????????System.out.println(result.toString());????
-
????}????
-
}????
public static void simpleContains() throws Exception {
Pattern pattern = new Perl5Compiler().compile("//d+");
Perl5Matcher matcher = new Perl5Matcher();
PatternMatcherInput matcherInput = new PatternMatcherInput("北京2008年8月08日20时");
while (matcher.contains(matcherInput,pattern)) {
MatchResult result = matcher.getMatch();
System.out.println(result.toString());
}
}
?
输出: ?
2008?
8?
08?
20 ?
?
-
public ? static ? void ?simpleResults()? throws ?Exception?{????
-
????Pattern?pattern?=?new ?Perl5Compiler().compile( "(//d+//.//d+//.//d+//.//d+)@(//d{2}///d{2}///d{4})" );????
-
????Perl5Matcher?matcher?=?new ?Perl5Matcher();????
-
????PatternMatcherInput?matcherInput?=?new ?PatternMatcherInput( "202.108.9.38@08/10/2008" );????
-
????while ?(matcher.contains(matcherInput,?pattern))?{????
-
????????MatchResult?result?=?matcher.getMatch();????
-
????????for ( int ?i?=? 0 ;?i?<?result.groups();?i++)?{????
-
????????????System.out.printf("%s?:?%s/n" ,?i,?result.group(i));????
-
????????}????
-
????}????
-
}????
public static void simpleResults() throws Exception {
Pattern pattern = new Perl5Compiler().compile("(//d+//.//d+//.//d+//.//d+)@(//d{2}///d{2}///d{4})");
Perl5Matcher matcher = new Perl5Matcher();
PatternMatcherInput matcherInput = new PatternMatcherInput("202.108.9.38@08/10/2008");
while (matcher.contains(matcherInput,pattern)) {
MatchResult result = matcher.getMatch();
for(int i = 0; i < result.groups(); i++) {
System.out.printf("%s : %s/n",i,result.group(i));
}
}
}
?
输出: ?
0 : 202.108.9.38@08/10/2008?
1 : 202.108.9.38?
2 : 08/10/2008 ?
?
-
public ? static ? void ?spiltIt()? throws ?Exception?{????
-
????String?rawStr?=?"北京;朝阳;鸟巢奥运会场" ;????
-
????ArrayList<String>?results?=?new ?ArrayList<String>();????
-
????Perl5Matcher?matcher?=?new ?Perl5Matcher();????
-
????Pattern?pattern?=?new ?Perl5Compiler().compile( ";" );????
-
????Util.split(results,?matcher,?pattern,?rawStr);????
-
????for ?(String?r?:?results)?{????
-
????????System.out.println(r);????
-
????}????
-
}????
public static void spiltIt() throws Exception {
String rawStr = "北京;朝阳;鸟巢奥运会场";
ArrayList<String> results = new ArrayList<String>();
Perl5Matcher matcher = new Perl5Matcher();
Pattern pattern = new Perl5Compiler().compile(";");
Util.split(results,matcher,pattern,rawStr);
for (String r : results) {
System.out.println(r);
}
}
?
输出: ?
北京?
朝阳?
鸟巢奥运会场 ?
替换: ?
?
-
public ? static ? void ?substituteIt()? throws ?Exception?{????
-
????String?rawStr?=?"2008-08-11?17:16:32" ;????
-
????Perl5Matcher?matcher?=?new ?Perl5Matcher();????
-
????Pattern?pattern?=?new ?Perl5Compiler().compile( "-" );????
-
????String?result?=?Util.substitute(matcher,?new ?Perl5Substitution( "," ),?rawStr,?Util.SUBSTITUTE_ALL);????
-
????System.out.println(result);????
-
}????
public static void substituteIt() throws Exception {
String rawStr = "2008-08-11 17:16:32";
Perl5Matcher matcher = new Perl5Matcher();
Pattern pattern = new Perl5Compiler().compile("-");
String result = Util.substitute(matcher,new Perl5Substitution(","),rawStr,Util.SUBSTITUTE_ALL);
System.out.println(result);
}
?
输出: ?
2008,08,11 17:16:32 ?
替换2: ?
?
-
public ? static ? void ?substituteIt2()? throws ?Exception?{????
-
????String?rawStr?=?"2008-08-11?17:16:32" ;????
-
????Perl5Matcher?matcher?=?new ?Perl5Matcher();????
-
????Pattern?pattern?=?new ?Perl5Compiler().compile( "(//d{4})-(//d{2})-(//d{2})?(//d{2}://d{2}://d{2})" );????
-
????String?result?=?Util.substitute(matcher,?new ?Perl5Substitution( "变换?$3,$2,$1?$4" ),?Util.SUBSTITUTE_ALL);????
-
????System.out.println("格式yyyy-MM-dd?HH:mm:ss到dd,MM,yyyy?HH:mm:ss" );????
-
????System.out.println(result);????
-
}????
public static void substituteIt2() throws Exception {
String rawStr = "2008-08-11 17:16:32";
Perl5Matcher matcher = new Perl5Matcher();
Pattern pattern = new Perl5Compiler().compile("(//d{4})-(//d{2})-(//d{2}) (//d{2}://d{2}://d{2})");
String result = Util.substitute(matcher,new Perl5Substitution("变换 $3,$1 $4"),Util.SUBSTITUTE_ALL);
System.out.println("格式yyyy-MM-dd HH:mm:ss到dd,yyyy HH:mm:ss");
System.out.println(result);
}
?
输出: ?
格式yyyy-MM-dd HH:mm:ss到dd,yyyy HH:mm:ss?
变换 11,2008 17:16:32 ?
小结: ?
上面的几种用法看起来要比java.util.regex包用着要复杂,如果是简单应用的话还是用自带的包省力。?
下面几种用法是Perl5格式用法,如果习惯这种格式,用起来还是比较爽的。?
Perl5Util查找格式: ?
[ m ] / pattern / [ i ][ m ][ s ][ x ]?
第一个m写不写都可以(The m prefix is optional)?
i? 忽略大小写(case insensitive match)?
m? 多行模式(treat the input as consisting of multiple lines)?
s? 单行模式(treat the input as consisting of a single line)?
x? 使用扩展语法 (enable extended expression syntax)?
查找? MyDoc/Java/SimpleJava? 时?
正常写法? "m/MyDoc///Java///SimpleJava/"?
扩展写法? "m#MyDoc/Java/SimpleJava#x"?
Perl5Util查找: ?
?
-
public ? static ? void ?perl5Match()?{????
-
????Perl5Util?plUtil?=?new ?Perl5Util();????
-
????PatternMatcherInput?matcherInput?=?new ?PatternMatcherInput( "北京2008年8月08日20时" );????
-
????while ?(plUtil.match( "///d+/" ,?matcherInput))?{????
-
????????MatchResult?result?=?plUtil.getMatch();????
-
????????System.out.println(result.toString());????
-
????}????
-
}????
public static void perl5Match() {
Perl5Util plUtil = new Perl5Util();
PatternMatcherInput matcherInput = new PatternMatcherInput("北京2008年8月08日20时");
while (plUtil.match("///d+/",matcherInput)) {
MatchResult result = plUtil.getMatch();
System.out.println(result.toString());
}
}
?
输出: ?
2008?
8?
08?
20 ?
分组:
?
-
public ? static ? void ?perl5Match2()?{????
-
????Perl5Util?plUtil?=?new ?Perl5Util();????
-
????PatternMatcherInput?matcherInput?=?new ?PatternMatcherInput( "202.108.9.38@08/10/2008" );????
-
????while ?(plUtil.match( "m#08(/10/)2008#x" ,?matcherInput))?{????
-
????????MatchResult?result?=?plUtil.getMatch();????
-
????????System.out.printf("%s?:?%s/n" ,? 0 ,?result.group( 0 ));????
-
????????System.out.printf("%s?:?%s/n" ,? 1 ,?result.group( 1 ));????
-
????}????
-
}????
public static void perl5Match2() {
Perl5Util plUtil = new Perl5Util();
PatternMatcherInput matcherInput = new PatternMatcherInput("202.108.9.38@08/10/2008");
while (plUtil.match("m#08(/10/)2008#x",matcherInput)) {
MatchResult result = plUtil.getMatch();
System.out.printf("%s : %s/n",result.group(0));
System.out.printf("%s : %s/n",1,result.group(1));
}
}
?
输出: ?
0 : 08/10/2008?
1 : /10/ ?
Perl5Util替换格式: ?
s / pattern / replacement / [ g ][ i ][ m ][ o ][ s ][ x ]?
第一个s必须要写(The s prefix is mandatory)?
g 全部替换。默认只替换第一个?
i? 忽略大小写(case insensitive match)?
m? 多行模式(treat the input as consisting of multiple lines)?
o?? 只替换第一个(吾不确认)?
s? 单行模式(treat the input as consisting of a single line)?
x? 使用扩展语法 (enable extended expression syntax)?
Perl5Util替换: ?
-
public ? static ? void ?perl5Substitute()? throws ?Exception?{????
-
????String?rawStr?=?"2008-08-11?17:16:32" ;????
-
????Perl5Util?plutil?=?new ?Perl5Util();????
-
????String?result?=?plutil.substitute("s/-/,/g" ,?rawStr);????
-
????System.out.println(result);????
-
}????
public static void perl5Substitute() throws Exception {
String rawStr = "2008-08-11 17:16:32";
Perl5Util plutil = new Perl5Util();
String result = plutil.substitute("s/-/,/g",rawStr);
System.out.println(result);
}
输出: ?
2008,11 17:16:32 ?
Perl5Util替换2: ?
-
public ? static ? void ?perl5Substitute2()? throws ?Exception?{????
-
????String?rawStr?=?"2008-08-11?17:16:32" ;????
-
????Perl5Util?plutil?=?new ?Perl5Util();????
-
????String?result?=?plutil.substitute("s/(//d{4})-(//d{2})-(//d{2})?(//d{2}://d{2}://d{2})/变换?$3,$1?$4/g" ,?rawStr);????
-
????System.out.println("格式yyyy-MM-dd?HH:mm:ss到dd,yyyy?HH:mm:ss" );????
-
????System.out.println(result);????
-
}????
public static void perl5Substitute2() throws Exception {
String rawStr = "2008-08-11 17:16:32";
Perl5Util plutil = new Perl5Util();
String result = plutil.substitute("s/(//d{4})-(//d{2})-(//d{2}) (//d{2}://d{2}://d{2})/变换 $3,$1 $4/g",rawStr);
System.out.println("格式yyyy-MM-dd HH:mm:ss到dd,yyyy HH:mm:ss");
System.out.println(result);
}
输出: ?
格式yyyy-MM-dd HH:mm:ss到dd,2008 17:16:32 ?
Perl5Util替换3:
-
public ? static ? void ?perl5AddCommas()? throws ?Exception?{????
-
????String?rawStr?=?"314159265" ;????
-
????Perl5Util?plutil?=?new ?Perl5Util();????
-
????while (plutil.match( "/[+-]?//d*//d{4}/" ,?rawStr)){????
-
????????rawStr?=?plutil.substitute("s/([+-]?//d*//d)(//d{3})/$1,$2/" ,?rawStr);????
-
????????System.out.println(rawStr);????
-
????}????
-
????System.out.println(rawStr);????
-
}????
public static void perl5AddCommas() throws Exception {
String rawStr = "314159265";
Perl5Util plutil = new Perl5Util();
while(plutil.match("/[+-]?//d*//d{4}/",rawStr)){
rawStr = plutil.substitute("s/([+-]?//d*//d)(//d{3})/$1,$2/",rawStr);
System.out.println(rawStr);
}
System.out.println(rawStr);
}
输出: ?
314159,265?
314,159,265 ?
过滤: ?
-
public ? static ? void ?filter()?{????
-
????String?path?=?"D://MyDoc//Java//SimpleJava" ;????
-
????File?file?=?new ?File(path);????
-
????String[]?globList?=?file.list(new ?GlobFilenameFilter( "*.java" ));????
-
????for ?(String?fn?:?globList)?{????
-
????????System.out.println(fn);????
-
????}????
-
????String[]?perl5List?=?file.list(new ?Perl5FilenameFilter( ".+//.java" ));????
-
????for ?(String?fn?:?perl5List)?{????
-
????????System.out.println(fn);????
-
????}????
-
}????
public static void filter() {
String path = "D://MyDoc//Java//SimpleJava";
File file = new File(path);
String[] globList = file.list(new GlobFilenameFilter("*.java"));
for (String fn : globList) {
System.out.println(fn);
}
String[] perl5List = file.list(new Perl5FilenameFilter(".+//.java"));
for (String fn : perl5List) {
System.out.println(fn);
}
}
输出: ?
DepCreater.java?
ReadAndWrite.java?
DepCreater.java?
ReadAndWrite.java ?
模式匹配的几个问题: ?
表达式的递归匹配?
用正则表达式来分析一个计算式中的括号配对情况,想办法把有嵌套的的成对括号或者成对标签匹配出来。?
非贪婪匹配的效率?
当一个表达式中,有多个非贪婪匹配时,或者多个未知匹配次数的表达式时,这个表达式将可能存在效率上的陷阱。有时候,匹配速度非常慢。(简单避开方法表达式使用不超过一个非贪婪)
?
相关网站:
http://www.oschina.net/bbs/thread/10601?lp=8 http://xzio.javaeye.com/blog/121291 http://www.zeuux.org/science/learning-regex.cn.html http://blog.csdn.net/yuanchao3333/archive/2009/01/31/3855418.aspx http://www.360doc.com/content/10/0805/09/561_43777969.shtml http://www.51testing.com/html/26/122326-215574.html