失眠网,内容丰富有趣,生活中的好帮手!
失眠网 > Java如何将word转成html_[JavaWeb基础] 025.JAVA把word转换成html

Java如何将word转成html_[JavaWeb基础] 025.JAVA把word转换成html

时间:2018-09-24 12:07:09

相关推荐

Java如何将word转成html_[JavaWeb基础] 025.JAVA把word转换成html

用第三方插件POI把word文档转换成HTML,下面直接上代码

package com.babybus.sdteam.wordtopdf;

import java.io.BufferedWriter;

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.OutputStreamWriter;

import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerConfigurationException;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.usermodel.Picture;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.jsoup.Jsoup;

import org.w3c.dom.Document;

public class WordToHtml {

/**

* 转换word到html

*

* @param path

* @return

* @throws IOException

* @throws FileNotFoundException

* @throws ParserConfigurationException

* @throws TransformerException

*/

public static String convertWordToHtml(String path)

throws FileNotFoundException, IOException,

ParserConfigurationException, TransformerException {

// 转换的结果路径

String htmlPath = "D://test//1.html";

// 创建word文档

HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(path));

// 兼容 以上版本

// XSSFWorkbook xssfwork=new XSSFWorkbook(new FileInputStream(fileName));

// 创建一个转换器

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

DocumentBuilderFactory.newInstance().newDocumentBuilder()

.newDocument());

// 设置图片管理器

wordToHtmlConverter.setPicturesManager(new PicturesManager() {

public String savePicture(byte[] content, PictureType pictureType,

String suggestedName, float widthInches, float heightInches) {

return "test/" + suggestedName;

}

});

// 处理word文档

wordToHtmlConverter.processDocument(wordDocument);

// 保存图片集合

List pics = wordDocument.getPicturesTable().getAllPictures();

if (pics != null) {

for (int i = 0; i < pics.size(); i++) {

Picture pic = (Picture) pics.get(i);

try {

pic.writeImageContent(new FileOutputStream("D:/test/"

+ pic.suggestFullFileName()));

} catch (FileNotFoundException e) {

e.printStackTrace();

}

}

}

// 取出转换的文档

Document htmlDocument = wordToHtmlConverter.getDocument();

// 创建输出流 和创建DOM源

ByteArrayOutputStream out = new ByteArrayOutputStream();

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(out);

// 转换工厂

TransformerFactory tf = TransformerFactory.newInstance();

Transformer serializer = tf.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "HTML");

serializer.transform(domSource, streamResult);

out.close();

// 写入文件

writeFile(new String(out.toByteArray()), htmlPath);

return htmlPath;

}

/**

* 写入文件

*

* @param content

* @param path

*/

public static void writeFile(String content, String path) {

FileOutputStream fos = null;

BufferedWriter bw = null;

org.jsoup.nodes.Document doc = Jsoup.parse(content);

content = doc.html();

try {

File file = new File(path);

fos = new FileOutputStream(file);

bw = new BufferedWriter(new OutputStreamWriter(fos, "GB2312"));

bw.write(content);

} catch (FileNotFoundException fnfe) {

fnfe.printStackTrace();

} catch (IOException ioe) {

ioe.printStackTrace();

} finally {

try {

if (bw != null)

bw.close();

if (fos != null)

fos.close();

} catch (IOException ie) {

}

}

}

}

编码必须用GB2312,用UTF8会有乱码问题。

本站文章为宝宝巴士)

如果觉得《Java如何将word转成html_[JavaWeb基础] 025.JAVA把word转换成html》对你有帮助,请点赞、收藏,并留下你的观点哦!

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。