运用java解析XML

使用java解析XML

Java本身提供了三种API进行XML文本的解析。

1.DOM：DOM方式解析XML文本时，先分析整个XML，形成XML的DOM树，然后节点列表的方式遍历它。

package xmlHandler;

import java.io.File;
import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;

public class DOMParserXml {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
        DOMParserXml domparser=new DOMParserXml();
        domparser.domParserForXML(System.getProperty("user.dir")+"/person.xml");
        domparser.XPathGetValue(System.getProperty("user.dir")+"/person.xml");
	}

	// DOM解析器生成tree，解析过程也必须严格按照tree结构写成循环解析
	public void domParserForXML(String xmlPath) {
		Person person = new Person();// 用于存放解析出的值
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();// 创建XML的DOM工厂
		
		try {
			DocumentBuilder builder = factory.newDocumentBuilder();
			File xmlFile = new File(xmlPath);// 可以用InputStream、URL、File来设置XML来源
			Document doc = builder.parse(xmlFile);// 读入文档,这是一个接口
			Element root = doc.getDocumentElement();// 获取根节点
			System.out.println("根节点名称：" + root.getTagName());
			NodeList children = root.getChildNodes();// 这里的child可以使文本、子元素、其他节点
			for (int i = 0; i < children.getLength(); i++) {
				Node child = children.item(i);
				//使用语句factory.setIgnoringElementContentWhitespace(true);//过滤空白字符，就不需要判断类型了。
				if (child instanceof Element) {// 解析器将标签之间的空格也认为是子元素，用instanceof过滤它们
					
					Element childElement = (Element) child;
					System.out.print("\t子节点名称" + childElement.getTagName());
					System.out.println("\t属性值："
							+ childElement.getAttribute("sex"));
					//可以通过getAttributes方法获取所有属性值的键值对。
					person.setSex(childElement.getAttribute("sex"));
					NodeList grandsons = childElement.getChildNodes();
					for (int j = 0; j < grandsons.getLength(); j++) {
						Node grandson = grandsons.item(j);
						if (grandson instanceof Element) {
							Element grandElement = (Element) grandson;
							System.out.print("\t\t孙节点名称:"
									+ grandElement.getTagName());
							Text textNode = (Text) grandElement.getFirstChild();
							System.out.println("\t孙节点文本值:"
									+ textNode.getData().trim());
							if (grandElement.getTagName().equals("name")) {
								person.setName(textNode.getData().trim());
							}
							if (grandElement.getTagName().equals("description")) {
								person
										.setDescription(textNode.getData()
												.trim());
							}
						}
					}
				}
			}
		} catch (ParserConfigurationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}// 创建解析器
		catch (SAXException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		System.out.println("解析结果：：：" + person.toString());
	}

	
	public void XPathGetValue(String xmlFilePath){
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();// 创建XML的DOM工厂
		try {
			DocumentBuilder builder = factory.newDocumentBuilder();
			File file=new File(xmlFilePath);
			Document doc=builder.parse(file);
			//通过XPath表达式快速访问XML文件的节点及其文本值
			XPathFactory xpathFactory=XPathFactory.newInstance();
			XPath xpath=xpathFactory.newXPath();
			System.out.println("==============使用XPath快速访问XML节点值：");
			String query="/persons/person/name";
			System.out.println(xpath.evaluate(query, doc));
			
		} catch (ParserConfigurationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (SAXException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (XPathExpressionException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

2.SAXParser使用事件流驱动的方式进行XML文本的解析。

package xmlHandler;

import java.io.File;
import java.io.IOException;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class SAXParserXML {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		SAXParserFactory factory = SAXParserFactory.newInstance();
		try {
			SAXParser parser = factory.newSAXParser();
			File file = new File(System.getProperty("user.dir") + "/person.xml");
			parser.parse(file, new PersonXMLHandler());// 此处的file可以使文件、输入流、或URL字符串
		} catch (ParserConfigurationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (SAXException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

	}

}

class PersonXMLHandler extends DefaultHandler {
	Person person;
	String temp = "";
	String currenttag = null;

	@Override
	public void characters(char[] ch, int start, int length)
			throws SAXException {
		// TODO Auto-generated method stub
		if (currenttag != null) {
			String value = new String(ch, start, length);
			if (currenttag.equals("name")) {
				person.setName(value);
			}
			if (currenttag.equals("description")) {
				temp += value;// 处理长文本
			}
		}
	}

	@Override
	public void endDocument() throws SAXException {
		// TODO Auto-generated method stub
		super.endDocument();
	}

	@Override
	public void endElement(String uri, String localName, String qName)
			throws SAXException {
		// TODO Auto-generated method stub
		currenttag=null;
		if (qName.endsWith("description")) {
			person.setDescription(temp);
			temp = "";
		}
		if (qName.endsWith("person"))
			System.out.println("当前解析出的结果：" + person.toString());
	}

	@Override
	public void startDocument() throws SAXException {
		// TODO Auto-generated method stub
		super.startDocument();
	}

	@Override
	public void startElement(String uri, String localName, String qName,
			Attributes attributes) throws SAXException {
		// TODO Auto-generated method stub
		if (qName.equals("person")) {
			person = new Person();
			person.setSex(attributes.getValue("sex"));
		}
		currenttag = qName;
	}

}

3，StAX同样也是事件流驱动的解析器，只是不再进行startElement、characters的区分。

package xmlHandler;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;

import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

public class StAXParserXML {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
        StAXParserXML parser=new StAXParserXML();
        parser.StAXParser(System.getProperty("user.dir") + "/person.xml");
	}

	// StAXParser 是一种“pull parser”，允许通过循环来迭代访问所有事件
	public void StAXParser(String xmlPath) {
		XMLInputFactory factory = XMLInputFactory.newInstance();
		Person person=new Person();
		File file = new File(xmlPath);
		try {
			//参数必须是InputStream类型，可以是File的包装器、URL打开的InputStream
			XMLStreamReader parser = factory
					.createXMLStreamReader(new FileInputStream(file));
			// parser遍历xml文档时产生一系列事件
			while (parser.hasNext()) {
				int event = parser.next();// 获取当前event
				if (event == XMLStreamConstants.START_ELEMENT) {//如果是element
					if (parser.getLocalName().equals("person")) {//判断标签值
						person.setSex(parser.getAttributeValue(0));//获取属性值
					}
					if(parser.getLocalName().equals("name")){
						person.setName(parser.getElementText());//获取元素文本
					}
					if(parser.getLocalName().equals("description")){
						person.setDescription(parser.getElementText());
					}
				}
			}
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (XMLStreamException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}// 这里的参数只能是InputStream
		System.out.println(person.toString());
	}

}

具体的参见源代码就行了

相关推荐