应用SAX将特定格式的XML转成CSV文件
使用SAX将特定格式的XML转成CSV文件
直接贴代码
输入的XML文档:
输出的CSV文件:
直接贴代码
package org.autumn.kettle; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.util.Date; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class LearnSAX { public static void main(String[] args) throws Exception, SAXException { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser parser = factory.newSAXParser(); Date before = new Date(); parser.parse(new File("D:/hotel.xml"), new MyHandler()); Date after = new Date(); System.out.println("it takes " + (after.getTime() - before.getTime()) + "ms"); } } class MyHandler extends DefaultHandler { OutputStreamWriter out = null; @Override public void startDocument() throws SAXException { try { out = new OutputStreamWriter( new FileOutputStream("D:/xml2csv.csv"), "GBK"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } } @Override public void endDocument() throws SAXException { try { out.close(); } catch (IOException e) { e.printStackTrace(); } } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if (qName.equalsIgnoreCase("row")) { try { out.write("\r\n"); } catch (IOException e) { e.printStackTrace(); } } } @Override public void characters(char ch[], int start, int length) throws SAXException { String s = new String(ch, start, length); if (!s.trim().isEmpty()) try { out.write(s + ","); } catch (IOException e) { e.printStackTrace(); } } }
输入的XML文档:
<?xml version="1.0" encoding="UTF-8"?> <Rows> <Row><domain>YJ</domain> <hotelname>阳江猾令新另赂宾馆</hotelname> <hotelno>230016</hotelno> <lxr>邹垮龚</lxr> <tel>23238687</tel> <address>阳江市铁核估金伎区0号</address> <ssq>2</ssq> <bz/> </Row> <Row><domain>QY</domain> <hotelname>清远埔沃睬储酒店</hotelname> <hotelno>443171</hotelno> <lxr>柏弗蚊</lxr> <tel>21289491</tel> <address>清远市陨促醛映区49号</address> <ssq>3</ssq> <bz/> </Row> </Rows>
输出的CSV文件:
YJ,阳江猾令新另赂宾馆,230016,邹垮龚,23238687,阳江市铁核估金伎区0号,2, QY,清远埔沃睬储酒店,443171,柏弗蚊,21289491,清远市陨促醛映区49号,3,