失眠网,内容丰富有趣,生活中的好帮手!
失眠网 > Java 将word文档转成html内容 输出到富文本

Java 将word文档转成html内容 输出到富文本

时间:2019-07-01 17:44:00

相关推荐

Java 将word文档转成html内容 输出到富文本

使用Java将word文档转成html内容,输出到富文本

上传word文档 解析到富文本

将word解析成html工具类

import mon.utils.DateUtil;import mon.utils.RandomUtil;import mons.fileupload.FileItem;import mons.fileupload.FileItemFactory;import mons.fileupload.disk.DiskFileItemFactory;import mons.io.FileUtils;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.converter.WordToHtmlConverter;import org.apache.poi.hwpf.usermodel.PictureType;import org.apache.poi.xwpf.converter.core.BasicURIResolver;import org.apache.poi.xwpf.converter.core.FileImageExtractor;import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;import org.apache.poi.xwpf.usermodel.XWPFDocument;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import org.springframework.web.multipart.MultipartFile;import org.springframework.monsMultipartFile;import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.transform.OutputKeys;import javax.xml.transform.Transformer;import javax.xml.transform.TransformerFactory;import javax.xml.transform.dom.DOMSource;import javax.xml.transform.stream.StreamResult;import java.io.*;/*** @date * @description*/public class WordToHtmlUtil {private static final Logger logger = LoggerFactory.getLogger(WordToHtmlUtil.class);/*** 上传Word文档,返回解析后的Html*/public static String docToHtmlText(MultipartFile file) throws Exception {//使用字符数组流获取解析的内容ByteArrayOutputStream baos = new ByteArrayOutputStream();OutputStream outStream = new BufferedOutputStream(baos);try {//将上传的文件传入Document转换HWPFDocument wordDocument = new HWPFDocument(file.getInputStream());Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);//将读取到的图片上传并添加链接地址wordToHtmlConverter.setPicturesManager((imageStream, pictureType, name, width, height) -> {try {//首先要判断图片是否能识别if (pictureType.equals(PictureType.UNKNOWN)) {return "[不能识别的图片]";}//此处上传到自己的文件服务器 todoString qiNiuName = "";//文件名boolean upload = FileUtil.upload(new FileInputStream(fileImage), qiNiuName); return "上传后的图片地址";} catch (Exception e) {logger.info("upload exception", e);}return "[图片上传失败]";});// word文档转Html文档wordToHtmlConverter.processDocument(wordDocument);Document htmlDocument = wordToHtmlConverter.getDocument();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(outStream);TransformerFactory factory = TransformerFactory.newInstance();Transformer serializer = factory.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "html");serializer.transform(domSource, streamResult);String content = baos.toString();logger.info("docToHtmlText--->{}", content);return content;} catch (Exception e) {logger.error("docToHtmlText 异常", e);throw new AppRuntimeException(e);} finally {baos.close();outStream.close();}}/*** 上传docx文档,返回解析后的Html*/public static String docxToHtmlText(MultipartFile file) throws Exception {ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();try {// 将上传的文件传入Document转换XWPFDocument docxDocument = new XWPFDocument(file.getInputStream());XHTMLOptions options = XHTMLOptions.create();// 设置图片存储路径String path = System.getProperty("java.io.tmpdir");String firstImagePathStr = path + "/" + System.currentTimeMillis();options.setExtractor(new FileImageExtractor(new File(firstImagePathStr)));options.URIResolver(new BasicURIResolver(firstImagePathStr));// 转换htmldocxDocument.createNumbering();XHTMLConverter.getInstance().convert(docxDocument, htmlStream, options);String htmlStr = htmlStream.toString();String middleImageDirStr = "/word/media";String imageDirStr = firstImagePathStr + middleImageDirStr;File imageDir = new File(imageDirStr);String[] imageList = imageDir.list();if (imageList != null) {for (int i = 0; i < imageList.length; i++) {try {String oneImagePathStr = imageDirStr + "/" + imageList[i];File fileImage = new File(oneImagePathStr);if (fileImage.exists()) {String name = fileImage.getName();String suffix = name.substring(name.indexOf("."), name.length()).toLowerCase();//此处上传到自己的文件服务器 todoString qiNiuName = "";//文件名boolean upload = FileUtil.upload(new FileInputStream(fileImage), qiNiuName); if (!upload) {continue;} else {//修改文档中的图片信息htmlStr = htmlStr.replace(oneImagePathStr, "上传后的图片地址");}}} catch (Exception e) {logger.info("upload docxToHtmlText exception", e);}}}//删除图片路径File firstImagePath = new File(firstImagePathStr);FileUtils.deleteDirectory(firstImagePath);return htmlStr;} catch (Exception e) {logger.error("docxToHtmlText 解析异常", e);throw new AppRuntimeException(e);} finally {if (htmlStream != null) {htmlStream.close();}}}/* public static void main(String[] args) {try {String content = docxToHtmlText(getMulFileByPath("C:\\Users\\Administrator\\Desktop\\hah.docx"));System.out.println(content);} catch (Exception e) {e.printStackTrace();}}*//*** 获取MultipartFile文件** @param picPath* @return*/private static MultipartFile getMulFileByPath(String picPath) {FileItem fileItem = createFileItem(picPath);MultipartFile mfile = new CommonsMultipartFile(fileItem);return mfile;}private static FileItem createFileItem(String filePath) {FileItemFactory factory = new DiskFileItemFactory(16, null);String textFieldName = "textField";int num = filePath.lastIndexOf(".");String extFile = filePath.substring(num);FileItem item = factory.createItem(textFieldName, "text/plain", true,"MyFileName" + extFile);File newfile = new File(filePath);int bytesRead = 0;byte[] buffer = new byte[8192];try {FileInputStream fis = new FileInputStream(newfile);OutputStream os = item.getOutputStream();while ((bytesRead = fis.read(buffer, 0, 8192))!= -1) {os.write(buffer, 0, bytesRead);}os.close();fis.close();} catch (IOException e) {e.printStackTrace();}return item;}}

如果觉得《Java 将word文档转成html内容 输出到富文本》对你有帮助,请点赞、收藏,并留下你的观点哦!

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。