xml教程之java解析xml文档
1、java解析xml文件介绍XML解析方式分为两种:DOM方式和SAX方式 2、jaxpJAXP:(Java API for XML Processing)开发包是JavaSE的一部分,它由以下几个包及其子包组成: 调用 DocumentBuilderFactory.newInstance() 方法得到创建 DOM 解析器的工厂。
调用工厂对象的 newDocumentBuilder方法得到 DOM 解析器对象。
调用 DOM 解析器对象的 parse() 方法解析 XML 文档,得到代表整个文档的 Document 对象,进行可以利用DOM特性对整个XML文档进行操作了。
3、dom编程DOM模型(document object model) 4、sax解析在使用 DOM 解析 XML 文档时,需要读取整个 XML 文档,在内存中构架代表整个 DOM 树的Doucment对象,从而再对XML文档进行操作。此种情况下,如果 XML 文档特别大,就会消耗计算机的大量内存,并且容易导致内存溢出。 SAX解析允许在读取文档的时候,即对文档进行处理,而不必等到整个文档装载完才会文档进行操作。 SAX采用事件处理的方式解析XML文件,利用 SAX 解析 XML 文档,涉及两个部分:解析器和事件处理器: 使用SAXParserFactory创建SAX解析工厂
SAXParserFactory spf = SAXParserFactory.newInstance();
通过SAX解析工厂得到解析器对象
SAXParser sp = spf.newSAXParser();
通过解析器对象得到一个XML的读取器
XMLReader xmlReader = sp.getXMLReader();
设置读取器的事件处理器
xmlReader.setContentHandler(new BookParserHandler());
解析xml文件
xmlReader.parse("book.xml");
5、dom4j解析Dom4j是一个简单、灵活的开放源代码的库。Dom4j是由早期开发JDOM的人分离出来而后独立开发的。与JDOM不同的是,dom4j使用接口和抽象基类,虽然Dom4j的API相对要复杂一些,但它提供了比JDOM更好的灵活性。 DOM4j中,获得Document对象的方式有三种:
1.读取XML文件,获得document对象
SAXReader reader = new SAXReader(); Document document = reader.read(new File("input.xml"));
2.解析XML形式的文本,得到document对象.
String text = "<members></members>"; Document document = DocumentHelper.parseText(text);
3.主动创建document对象.
Document document = DocumentHelper.createDocument(); //创建根节点
Element root = document.addElement("members");
5.1、节点对象 1.获取文档的根节点. 2.取得某个节点的子节点. 3.取得节点的文字 4.取得某节点下所有名为“member”的子节点,并进行遍历. List nodes = rootElm.elements(“member”); for (Iterator it = nodes.iterator(); it.hasNext();) { Element elm = (Element) it.next(); // do something } 5.对某节点下的所有子节点进行遍历. for(Iterator it=root.elementIterator();it.hasNext();){ Element element = (Element) it.next(); // do something } 6.在某节点下添加子节点.Element ageElm = newMemberElm.addElement(“age”); 7.设置节点文字. element.setText(“29”); 8.删除某节点.//childElm是待删除的节点,parentElm是其父节点 5.2、节点对象属性 1.取得某节点下的某属性 Element root=document.getRootElement(); //属性名name 3.删除某属性 Attribute attribute=root.attribute(“size”); root.remove(attribute); 3.遍历某节点的所有属性 Element root=document.getRootElement(); for(Iterator it=root.attributeIterator();it.hasNext();){ Attribute attribute = (Attribute) it.next(); String text=attribute.getText(); System.out.println(text); } 4.设置某节点的属性和文字. newMemberElm.addAttribute(“name”,“sitinspring”); 5.设置属性的文字 Attribute attribute=root.attribute(“name”); attribute.setText(“sitinspring”); 5.3、写入xml 1.文档中全为英文,不设置编码,直接写入的形式. XMLWriter writer = new XMLWriter(new FileWriter(“output.xml”)); writer.write(document); writer.close(); 2.文档中含有中文,设置编码格式写入的形式.OutputFormat format = OutputFormat.createPrettyPrint();// 指定XML编码 5.4、字符串和xml转换 1.将字符串转化为XML
String text = "<members> <member>sitinspring</member></members>";Document document = DocumentHelper.parseText(text);
2.将文档或节点的XML转化为字符串.
SAXReader reader = new SAXReader();Document document = reader.read(new File("input.xml")); Element root=document.getRootElement();
String docXmlText=document.asXML();
String rootXmlText=root.asXML();Element memberElm=root.element("member");String memberXmlText=memberElm.asXML();
实例: package com.sihai.dom4jtest;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import com.sihai.entity.Book;
public class DOM4JTest {
private static ArrayList<Book> bookList = new ArrayList<Book>();
/** * @param args */
public static void main(String[] args) {
// 解析books.xml文件
// 创建SAXReader的对象reader
SAXReader reader = new SAXReader();
try {
// 通过reader对象的read方法加载books.xml文件,获取docuemnt对象。
Document document = reader.read(new File("src/res/books.xml"));
// 通过document对象获取根节点bookstore
Element bookStore = document.getRootElement();
// 通过element对象的elementIterator方法获取迭代器
Iterator it = bookStore.elementIterator();
// 遍历迭代器,获取根节点中的信息(书籍)
while (it.hasNext()) {
System.out.println("=====开始遍历某一本书=====");
Element book = (Element) it.next();
// 获取book的属性名以及 属性值
List<Attribute> bookAttrs = book.attributes();
for (Attribute attr : bookAttrs) {
System.out.println("属性名:" + attr.getName() + "--属性值:"
+ attr.getValue());
}
Iterator itt = book.elementIterator();
while (itt.hasNext()) {
Element bookChild = (Element) itt.next();
System.out.println("节点名:" + bookChild.getName() + "--节点值:" + bookChild.getStringValue());
}
System.out.println("=====结束遍历某一本书=====");
}
} catch (DocumentException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
2、jdom解析package com.sihai.jdomtest;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;
import com.sihai.entity.Book;
public class JDOMTest {
private static ArrayList<Book> booksList = new ArrayList<Book>();
/** * @param args */
public static void main(String[] args) {
// 进行对books.xml文件的JDOM解析
// 准备工作
// 1.创建一个SAXBuilder的对象
SAXBuilder saxBuilder = new SAXBuilder();
InputStream in;
try {
// 2.创建一个输入流,将xml文件加载到输入流中
in = new FileInputStream("src/res/books.xml");
InputStreamReader isr = new InputStreamReader(in,"UTF-8");
// 3.通过saxBuilder的build方法,将输入流加载到saxBuilder中
Document document = saxBuilder.build(isr);
// 4.通过document对象获取xml文件的根节点
Element rootElement = document.getRootElement();
// 5.获取根节点下的子节点的List集合
List<Element> bookList = rootElement.getChildren();
// 继续进行解析
for (Element book : bookList) {
Book bookEntity = new Book();
System.out.println("======开始解析第" + (bookList.indexOf(book) + 1)
+ "书======");
// 解析book的属性集合
List<Attribute> attrList = book.getAttributes();
// //知道节点下属性名称时,获取节点值
// book.getAttributeValue("id");
// 遍历attrList(针对不清楚book节点下属性的名字及数量)
for (Attribute attr : attrList) {
// 获取属性名
String attrName = attr.getName();
// 获取属性值
String attrValue = attr.getValue();
System.out.println("属性名:" + attrName + "----属性值:"
+ attrValue);
if (attrName.equals("id")) {
bookEntity.setId(attrValue);
}
}
// 对book节点的子节点的节点名以及节点值的遍历
List<Element> bookChilds = book.getChildren();
for (Element child : bookChilds) {
System.out.println("节点名:" + child.getName() + "----节点值:"
+ child.getValue());
if (child.getName().equals("name")) {
bookEntity.setName(child.getValue());
}
else if (child.getName().equals("author")) {
bookEntity.setAuthor(child.getValue());
}
else if (child.getName().equals("year")) {
bookEntity.setYear(child.getValue());
}
else if (child.getName().equals("price")) {
bookEntity.setPrice(child.getValue());
}
else if (child.getName().equals("language")) {
bookEntity.setLanguage(child.getValue());
}
}
System.out.println("======结束解析第" + (bookList.indexOf(book) + 1)
+ "书======");
booksList.add(bookEntity);
bookEntity = null;
System.out.println(booksList.size());
System.out.println(booksList.get(0).getId());
System.out.println(booksList.get(0).getName());
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (JDOMException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
3、sax方式解析package com.sihai.test;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import com.sihai.entity.Book;
import com.sihai.handler.SAXParserHandler;
public class SAXTest {
/** * @param args */
public static void main(String[] args) {
//锟斤拷取一锟斤拷SAXParserFactory锟斤拷实锟斤拷
SAXParserFactory factory = SAXParserFactory.newInstance();
//通锟斤拷factory锟斤拷取SAXParser实锟斤拷
try {
SAXParser parser = factory.newSAXParser();
//锟斤拷锟斤拷SAXParserHandler锟斤拷锟斤拷
SAXParserHandler handler = new SAXParserHandler();
parser.parse("books.xml",handler);
System.out.println("~!~!~!共有" + handler.getBookList().size()
+ "本书");
for (Book book : handler.getBookList()) {
System.out.println(book.getId());
System.out.println(book.getName());
System.out.println(book.getAuthor());
System.out.println(book.getYear());
System.out.println(book.getPrice());
System.out.println(book.getLanguage());
System.out.println("----finish----");
}
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
4、四种方式比较 package com.parser.test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import org.jdom2.Attribute;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.sihai.entity.Book;
import com.sihai.handler.SAXParserHandler;
public class ParseTest {
public void domXmlParser() {
ArrayList<Book> bookLists = new ArrayList<Book>();
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.parse("books.xml");
NodeList bookList = document.getElementsByTagName("book");
for (int i = 0; i < bookList.getLength(); i++) {
Node book = bookList.item(i);
Book bookEntity = new Book();
NamedNodeMap attrs = book.getAttributes();
for (int j = 0; j < attrs.getLength(); j++) {
Node attr = attrs.item(j);
if (attr.getNodeName().equals("id")) {
bookEntity.setId(attr.getNodeValue());
}
}
NodeList childNodes = book.getChildNodes();
for (int k = 0; k < childNodes.getLength(); k++) {
if (childNodes.item(k).getNodeType() == Node.ELEMENT_NODE) {
String name = childNodes.item(k).getNodeName();
String value = childNodes.item(k).getFirstChild().getNodeValue();
if (name.equals("name")) {
bookEntity.setName(value);
}
else if (name.equals("author")) {
bookEntity.setAuthor(value);
}
else if (name.equals("year")) {
bookEntity.setYear(value);
}
else if (name.equals("price")) {
bookEntity.setPrice(value);
}
else if (name.equals("language")) {
bookEntity.setLanguage(value);
}
}
}
bookLists.add(bookEntity);
bookEntity = null;
}
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void saxXmlParser(){
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
SAXParser parser = factory.newSAXParser();
SAXParserHandler handler = new SAXParserHandler();
parser.parse("books.xml",handler);
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void jdomXmlParser() {
ArrayList<Book> booksList = new ArrayList<Book>();
SAXBuilder saxBuilder = new SAXBuilder();
InputStream in;
try {
in = new FileInputStream("books.xml");
InputStreamReader isr = new InputStreamReader(in,"UTF-8");
org.jdom2.Document document = saxBuilder.build(isr);
org.jdom2.Element rootElement = document.getRootElement();
List<org.jdom2.Element> bookList = rootElement.getChildren();
for (org.jdom2.Element book : bookList) {
Book bookEntity = new Book();
List<Attribute> attrList = book.getAttributes();
for (Attribute attr : attrList) {
String attrName = attr.getName();
String attrValue = attr.getValue();
if (attrName.equals("id")) {
bookEntity.setId(attrValue);
}
}
List<org.jdom2.Element> bookChilds = book.getChildren();
for (org.jdom2.Element child : bookChilds) {
if (child.getName().equals("name")) {
bookEntity.setName(child.getValue());
}
else if (child.getName().equals("author")) {
bookEntity.setAuthor(child.getValue());
}
else if (child.getName().equals("year")) {
bookEntity.setYear(child.getValue());
}
else if (child.getName().equals("price")) {
bookEntity.setPrice(child.getValue());
}
else if (child.getName().equals("language")) {
bookEntity.setLanguage(child.getValue());
}
}
booksList.add(bookEntity);
bookEntity = null;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (JDOMException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void dom4jXmlParser(){
ArrayList<Book> booksList = new ArrayList<Book>();
SAXReader reader = new SAXReader();
try {
org.dom4j.Document document = reader.read(new File("books.xml"));
org.dom4j.Element bookStore = document.getRootElement();
List<org.dom4j.Element> bookEles = bookStore.elements();
for (org.dom4j.Element book : bookEles) {
Book bookEntity = new Book();
List<org.dom4j.Attribute> bookAttrs = book.attributes();
for (org.dom4j.Attribute attr : bookAttrs) {
if (attr.getName().equals("id")) {
bookEntity.setId(attr.getValue());
}
}
List<org.dom4j.Element> bookss = book.elements();
for (org.dom4j.Element bookChild : bookss) {
String name = bookChild.getName();
String value = bookChild.getStringValue();
if (name.equals("name")) {
bookEntity.setName(value);
}
else if (name.equals("author")) {
bookEntity.setAuthor(value);
}
else if (name.equals("year")) {
bookEntity.setYear(value);
}
else if (name.equals("price")) {
bookEntity.setPrice(value);
}
else if (name.equals("language")) {
bookEntity.setLanguage(value);
}
}
booksList.add(bookEntity);
bookEntity = null;
}
} catch (DocumentException e) {
e.printStackTrace();
}
}
@Test
public void testPerformance() throws Exception{
System.out.println("性能测试:");
//测试DOM的性能:
long start = System.currentTimeMillis();
domXmlParser();
System.out.println("DOM:"+ (System.currentTimeMillis() - start) );
//测试SAX的性能:
start = System.currentTimeMillis();
saxXmlParser();
System.out.println("SAX:"+ (System.currentTimeMillis() - start) );
//测试JDOM的性能:
start = System.currentTimeMillis();
jdomXmlParser();
System.out.println("JDOM:"+ (System.currentTimeMillis() - start) );
//测试DOM4J的性能:
start = System.currentTimeMillis();
dom4jXmlParser();
System.out.println("DOM4J:"+ (System.currentTimeMillis() - start) );
}
}
(编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |