加入收藏 | 设为首页 | 会员中心 | 我要投稿 李大同 (https://www.lidatong.com.cn/)- 科技、建站、经验、云计算、5G、大数据,站长网!
当前位置: 首页 > 百科 > 正文

2013-正则表达式解析文本

发布时间:2020-12-14 01:57:04 所属栏目:百科 来源:网络整理
导读:在项目中可能会出现这样的场景:需要从一段文本中解析出数据, 列如:需要从下文找出注红的数据 FSI/*CXS KA 909Y22MAR PEK1630 2020HKG0X 333 S CX 806Y23MAR HKG1150 1315ORD0S 77W 01 YOW2+YX2 17758 CNY INCL TAX*SYSTEM DEFAULT-CHECK OPERATING CARRIE

在项目中可能会出现这样的场景:需要从一段文本中解析出数据,

列如:需要从下文找出注红的数据

 FSI/*CX

S KA   909Y22MAR PEK1630 2020HKG0X    333   

S CX   806Y23MAR HKG1150 1315ORD0S    77W   

01 YOW2+YX2            17758 CNY                    INCL TAX

*SYSTEM DEFAULT-CHECK OPERATING CARRIER 

*INTERLINE AGREEMENT PRICING APPLIED

*ATTN PRICED ON 21JAN14*1307

 BJS

XHKG YOW2            NVB      NVA22MAR 2PC  

 CHI YX2             NVB      NVA22MAR 2PC  

FARE  CNY   16480   

TAX   CNY     90CN CNY     94HK CNY   1094XT

TOTAL CNY   17758   

22MAR14BJS KA X/HKG563.99CX CHI Q4.25 2140.91NUC2709.15END R

OE6.081590  

XT CNY 106US CNY 31XA CNY 43XY CNY 34YC CNY 880YR   

ENDOS 02 *T1

*AUTO BAGGAGE INFORMATION AVAILABLE - SEE FSB   

RFSONLN/1E /EFEP_13/FCC=T/ 

通过下面这个解析类,可以实现我们的功能,主要用到了正则表达式的()捕获功能
package cn.test;


import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class QTaxParser1 {
	private static final String QTAX_PATTERN = "^[0-9]{1,2}(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC).*";
	private static final String TAX_PATTERN = "^TAX.*";
	private static final String NUM_PATTERN = "([0-9]+)([A-Z]+) *";
	private static final String QNUM_PATTERN = "Q([0-9]+.{0,1}[0-9]*)";
	private static final String QROE_PATTERN = "s+Rs*Os*Es*(([0-9]s*)+(.s*){0,1}([0-9]s*)*)s+";
	private static final String RATE_PATTERN = "=([0-9]+.{0,1}[0-9]*)";

	private Map<String,Double> tax = new HashMap<String,Double>();
	private List<Double> qTax = new ArrayList<Double>();
	private Double roe;
	private static Logger log = LoggerFactory.getLogger(QTaxParser1.class);

	public Map<String,Double> getTax(String txt){
		// 分解出TAX 行
		List<String> taxLine = parase(txt,TAX_PATTERN);

		if ((taxLine != null) && (taxLine.size() > 0)) {
			// 处理TAX 行
			List<String> taxItem = parase(taxLine.get(0),NUM_PATTERN);

			for (int i = 0; i < taxItem.size(); i += 2) {
				tax.put(taxItem.get(i + 1),Double.parseDouble(taxItem.get(i)));
			}
		}
		
		return tax;
	}
	
	public List<Double> getQTax(String txt){
		// 分解出TAX 行
		List<String> taxLine = parase(txt,TAX_PATTERN);

		if ((taxLine != null) && (taxLine.size() > 0)) {
			// 分解出Q行
			List<String> qTaxLine = parase(txt,QTAX_PATTERN,false);

			if ((qTaxLine != null) && (qTaxLine.size() > 0)) {
				// 处理QTAX 行
				List<String> qTaxItem = parase(qTaxLine.get(0),QNUM_PATTERN);
				// 提取Q值
				for (int i = 0; i < qTaxItem.size(); i++) {
					qTax.add(Double.parseDouble(qTaxItem.get(i)));
				}
				
			}

		}
		return qTax;
	}
	
	public Double getROE(String txt) {
		// 分解出ROE行
		List<String> roeItem = parase(txt,QROE_PATTERN);
		// 提取ROE值
		if (roeItem.size() > 0) {
			roe = Double.parseDouble(roeItem.get(0).replaceAll("s*",""));
		}
		return roe;
	}

	public boolean isTaxPage(String txt) {
		Pattern ptn = Pattern.compile(QTAX_PATTERN,Pattern.MULTILINE);
		Matcher m = ptn.matcher(txt);
		if (m.find()) {
			log.debug("TAX Match:" + m.group());
			return true;
		}
		return false;
	}

	public String getRateValue(String txt) {
		List<String> rates = parase(txt,RATE_PATTERN);
		if (rates.size() > 0) {
			return parase(txt,RATE_PATTERN).get(0);
		} else {
			return null;
		}

	}

	private List<String> parase(String txt,String pattern) {
		return parase(txt,pattern,true);
	}

	private static List<String> parase(String txt,String pattern,boolean grouped) {

		Pattern ptn = Pattern.compile(pattern,Pattern.MULTILINE);
		Matcher m = ptn.matcher(txt);

		List<String> matches = new ArrayList<String>();

		if (!grouped || (m.groupCount() == 0)) {
			if (m.find()) {
				matches.add(m.group());
			}
		} else {

			while (m.find()) {

				for (int i = 1; i <= m.groupCount(); i++) {
					matches.add(m.group(i));
				}
			}
		}

		return matches;

	}

}
测试用例
package itour.cn.fare.gateway;

import cn.test.QTaxParser1;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;

public class QTaxParserTest {
	public static void main(String[] args) {
		QTaxParser1 parser = new QTaxParser1();
		String txt =" FSICH/*CX  "+ 
"n"+
"S KA   909Y22MAR PEK1630 2020HKG0X    333  "+ 
"n"+
"S CX   806Y23MAR HKG1150 1315ORD0S    77W   "+
"n"+
"01 YOW2+YX2  CH        13464 CNY                    INCL TAX"+
"n"+
"*SYSTEM DEFAULT-CHECK OPERATING CARRIER "+
"n"+
"*INTERLINE AGREEMENT PRICING APPLIED"+
"n"+
"*ACCOMPANIED VALIDATION-ALL PAX MUST BE TKTD AT SAME TIME  "+ 
"n"+
"*VERIFY AGE REQUIREMENTS"+
"n"+
"*ATTN PRICED ON 21JAN14*1158"+
"n"+
"BJS"+
"n"+
"XHKG YOW2     CH25   NVB      NVA22MAR 2PC "+ 
"n"+
" CHI YX2      CH25   NVB      NVA22MAR 2PC "+ 
"n"+
"FARE  CNY   12370   "+
"n"+
"TAX    EXEMPT CN   CNY    106US CNY    988XT"+
"n"+
"TOTAL CNY   13464   "+
"n"+
"22MAR14BJS KA X/HKG422.99CX CHI Q4.25 1605.68NUC2032.92END R"+
"n"+
"OE6.081590  "+
"n"+
"XT CNY 31XA CNY 43XY CNY 34YC CNY 880YR "+
"n"+
"ENDOS 02 *T1"+
"n"+
"*AUTO BAGGAGE INFORMATION AVAILABLE - SEE FSB "+  
"n"+
"RFSONLN/1E /EFEP_23/FCC=T/";

		System.out.println(JSONObject.fromObject(parser.getTax(txt)).toString());
		System.out.println(JSONArray.fromObject(parser.getQTax(txt)).toString());
		System.out.println(JSONArray.fromObject(parser.getROE(txt)).toString());
    }
}

(编辑:李大同)

【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!

    推荐文章
      热点阅读