java把Word文件转成html的字符串返回出去

1、需求是把前端上传的word文件解析出来,生成html的字符串返回给前端去展示,Word里面的图片可以忽略不显示,所以这段代码去掉了解析图片的代码

package com.lieni.core.util;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.web.multipart.MultipartFile;
import org.w3c.dom.Document;

import com.itextpdf.text.log.Logger;
import com.itextpdf.text.log.LoggerFactory;

/**
 * Created by LTmei on 2018/10/10 10:00
 */
public class Word2HtmlUtil {

    /**
     * logger
     */
    private static final Logger logger = LoggerFactory.getLogger(Word2HtmlUtil.class);

    public static String Word2007ToHtml(MultipartFile file) throws IOException {

        if (file.isEmpty() || file.getSize() <= 0) {
            logger.error("Sorry File does not Exists!");
            return null;
        } else {
            if (file.getOriginalFilename().endsWith(".docx") || file.getOriginalFilename().endsWith(".DOCX")) {

                // 1) 加载word文档生成 XWPFDocument对象
                InputStream in = file.getInputStream();
                XWPFDocument document = new XWPFDocument(in);

                // 也可以使用字符数组流获取解析的内容
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                XHTMLConverter.getInstance().convert(document, baos, null);
                String content = baos.toString();
                baos.close();
                return content;
            } else {
                logger.error("Enter only MS Office 2007+ files");
                return null;
            }
        }
    }

    public static String Word2003ToHtml(MultipartFile file)
            throws IOException, ParserConfigurationException, TransformerException {

        if (file.isEmpty() || file.getSize() <= 0) {
            logger.error("Sorry File does not Exists!");
            return null;
        } else {
            if (file.getOriginalFilename().endsWith(".doc") || file.getOriginalFilename().endsWith(".DOC")) {
                InputStream input = file.getInputStream();
                HWPFDocument wordDocument = new HWPFDocument(input);
                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                        DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

                // 解析word文档
                wordToHtmlConverter.processDocument(wordDocument);
                Document htmlDocument = wordToHtmlConverter.getDocument();

                // 也可以使用字符数组流获取解析的内容
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                DOMSource domSource = new DOMSource(htmlDocument);
                StreamResult streamResult = new StreamResult(baos);

                TransformerFactory factory = TransformerFactory.newInstance();
                Transformer serializer = factory.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(domSource, streamResult);

                // 也可以使用字符数组流获取解析的内容
                String content = new String(baos.toByteArray());
                baos.close();
                return content;
            } else {
                logger.error("Enter only MS Office 2003 files");
                return null;
            }
        }

    }

}