高防服务器

Java怎么实现Word/Pdf/TXT转html


Java怎么实现Word/Pdf/TXT转html

发布时间:2022-03-04 17:38:22 来源:高防服务器网 阅读:75 作者:iii 栏目:web开发

本篇内容介绍了“Java怎么实现Word/Pdf/TXT转html”的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!

一:Java实现将word转换为html

   1:引入依赖

 1 <dependency>   2   <groupId>fr.opensagres.xdocreport</groupId>   3   <artifactId>fr.opensagres.xdocreport.document</artifactId>   4   <version>1.0.5</version>   5 </dependency>   6 <dependency>    7   <groupId>fr.opensagres.xdocreport</groupId>    8   <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>    9   <version>1.0.5</version>   10 </dependency>  11   <dependency>  12   <groupId>org.apache.poi</groupId>  13   <artifactId>poi</artifactId>  14   <version>3.12</version>  15 </dependency>  16 <dependency>  17   <groupId>org.apache.poi</groupId>  18   <artifactId>poi-scratchpad</artifactId>  19   <version>3.12</version>  20 </dependency>

  2:代码demo

  1 package com.svse.controller;    2     3 import javax.xml.parsers.DocumentBuilderFactory;    4 import javax.xml.parsers.ParserConfigurationException;    5 import javax.xml.transform.OutputKeys;    6 import javax.xml.transform.Transformer;    7 import javax.xml.transform.TransformerException;    8 import javax.xml.transform.TransformerFactory;    9 import javax.xml.transform.dom.DOMSource;   10 import javax.xml.transform.stream.StreamResult;   11    12 import org.apache.poi.hwpf.HWPFDocument;   13 import org.apache.poi.hwpf.converter.PicturesManager;   14 import org.apache.poi.hwpf.converter.WordToHtmlConverter;   15 import org.apache.poi.hwpf.usermodel.PictureType;   16 import org.apache.poi.xwpf.converter.core.BasicURIResolver;   17 import org.apache.poi.xwpf.converter.core.FileImageExtractor;   18 import org.apache.poi.xwpf.converter.core.FileURIResolver;   19 import org.apache.poi.xwpf.converter.core.IURIResolver;   20 import org.apache.poi.xwpf.converter.core.IXWPFConverter;   21 import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;   22 import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;   23 import org.apache.poi.xwpf.usermodel.XWPFDocument;   24 /**   25  * word 转换成html   26  */   27 public class TestWordToHtml {   28    29     public static  final String STORAGEPATH="C://works//files//";   30     public static  final String IP="192.168.30.222";   31     public static  final String PORT="8010";   32     public static void main(String[] args) throws IOException, TransformerException, ParserConfigurationException {   33         TestWordToHtml wt=new TestWordToHtml();   34         //wt.Word2003ToHtml("甲骨文考证.doc");   35         wt.Word2007ToHtml("甲骨文考证.docx");   36    37     }   38          39      /**   40      * 2003版本word转换成html   41      * @throws IOException   42      * @throws TransformerException   43      * @throws ParserConfigurationException   44      */   45     public void Word2003ToHtml(String fileName) throws IOException, TransformerException, ParserConfigurationException {   46           47         final String imagepath = STORAGEPATH+"fileImage/";//解析时候如果doc文件中有图片  图片会保存在此路径   48         final String strRanString=getRandomNum();   49         String filepath =STORAGEPATH;   50         String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2003.html";   51         final String file = filepath + fileName;   52         InputStream input = new FileInputStream(new File(file));   53         HWPFDocument wordDocument = new HWPFDocument(input);   54         WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());   55         //设置图片存放的位置   56         wordToHtmlConverter.setPicturesManager(new PicturesManager() {   57             public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {   58                 File imgPath = new File(imagepath);   59                 if(!imgPath.exists()){//图片目录不存在则创建   60                     imgPath.mkdirs();   61                 }   62                    63                 File file = new File(imagepath +strRanString+suggestedName);   64                 try {   65                     OutputStream os = new FileOutputStream(file);   66                     os.write(content);   67                     os.close();   68                 } catch (FileNotFoundException e) {   69                     e.printStackTrace();   70                 } catch (IOException e) {   71                     e.printStackTrace();   72                 }   73                    74                 return  "http://"+IP+":"+PORT+"//uploadFile/fileImage/"+strRanString+suggestedName;   75                // return imagepath +strRanString+suggestedName;   76             }   77         });   78            79         //解析word文档   80         wordToHtmlConverter.processDocument(wordDocument);   81         Document htmlDocument = wordToHtmlConverter.getDocument();   82            83         File htmlFile = new File(filepath +strRanString+htmlName);   84         OutputStream outStream = new FileOutputStream(htmlFile);   85            86    87         DOMSource domSource = new DOMSource(htmlDocument);   88         StreamResult streamResult = new StreamResult(outStream);   89    90         TransformerFactory factory = TransformerFactory.newInstance();   91         Transformer serializer = factory.newTransformer();   92         serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");   93         serializer.setOutputProperty(OutputKeys.INDENT, "yes");   94         serializer.setOutputProperty(OutputKeys.METHOD, "html");   95            96         serializer.transform(domSource, streamResult);   97         outStream.close();   98            99         System.out.println("生成html文件路径:"+ "http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);  100     }  101   102     /**  103      * 2007版本word转换成html  104      * @throws IOException  105      */  106     public void Word2007ToHtml(String fileName) throws IOException {  107           108        final String strRanString=getRandomNum();  109           110         String filepath = STORAGEPATH+strRanString;  111         String htmlName =fileName.substring(0, fileName.indexOf("."))+ "2007.html";  112         File f = new File(STORAGEPATH+fileName);    113         if (!f.exists()) {    114             System.out.println("Sorry File does not Exists!");    115         } else {    116             if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {    117                 try {  118                     // 1) 加载word文档生成 XWPFDocument对象    119                     InputStream in = new FileInputStream(f);    120                     XWPFDocument document = new XWPFDocument(in);    121         122                     // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)    123                     File imageFolderFile = new File(filepath);    124                     XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));    125                     options.setExtractor(new FileImageExtractor(imageFolderFile));    126                     options.URIResolver(new IURIResolver() {  127                         public String resolve(String uri) {  128                             //http://192.168.30.222:8010//uploadFile/....  129                             return "http://"+IP+":"+PORT+"//uploadFile/"+strRanString +"/"+ uri;  130                         }  131                     });  132                       133                     options.setIgnoreStylesIfUnused(false);    134                     options.setFragment(true);    135                         136                     // 3) 将 XWPFDocument转换成XHTML    137                     OutputStream out = new FileOutputStream(new File(filepath + htmlName));    138                     IXWPFConverter<XHTMLOptions> converter = XHTMLConverter.getInstance();  139                     converter.convert(document,out, options);  140                     //XHTMLConverter.getInstance().convert(document, out, options);    141                     System.out.println("html路径:"+"http://"+IP+":"+PORT+"//uploadFile/"+strRanString+htmlName);  142                 } catch (Exception e) {  143                     e.printStackTrace();  144                 }  145               146             } else {    147                 System.out.println("Enter only MS Office 2007+ files");    148             }    149         }    150     }    151   152      /**  153      *功能说明:生成时间戳  154      *创建人:zsq  155      *创建时间:2019年12月7日 下午2:37:09  156      *  157      */  158      public static String getRandomNum(){  159          Date dt = new Date();  160          SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");    161          String str=sdf.format(dt);  162          return str;  163      }  164        165    }

二:Java实现将Pdf转换为html

  1: 引入依赖

 1 <dependency>   2             <groupId>net.sf.cssbox</groupId>   3             <artifactId>pdf2dom</artifactId>   4             <version>1.7</version>   5         </dependency>    6         <dependency>   7             <groupId>org.apache.pdfbox</groupId>   8             <artifactId>pdfbox</artifactId>   9             <version>2.0.12</version>  10         </dependency>  11         <dependency>  12             <groupId>org.apache.pdfbox</groupId>  13             <artifactId>pdfbox-tools</artifactId>  14             <version>2.0.12</version>  15  </dependency>  16

2:代码Demo

 1 public class PdfToHtml {   2    3   /*   4     pdf转换html   5      */   6     public void pdfToHtmlTest(String inPdfPath,String outputHtmlPath)  {   7        // String outputPath = "C:\works\files\ZSQ保密知识测试题库.html";   8     9        //try() 写在()里面会自动关闭流  10         try{  11             BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(outputHtmlPath)),"utf-8"));  12             //加载PDF文档  13             //PDDocument document = PDDocument.load(bytes);  14             PDDocument document = PDDocument.load(new File(inPdfPath));  15             PDFDomTree pdfDomTree = new PDFDomTree();  16             pdfDomTree.writeText(document,out);  17         } catch (Exception e) {  18             e.printStackTrace();  19         }  20     }  21   22     public static void main(String[] args) throws IOException {  23         PdfToHtml ph=new PdfToHtml();  24         String pdfPath="C:\works\files\武研中心行政考勤制度.pdf";  25         String outputPath="C:\works\files\武研中心行政考勤制度.html";  26         ph.pdfToHtmlTest(pdfPath,outputPath);  27   }  28   29 }

三:Java实现将TXT转换为html

 1  /*   2      * txt文档转html   3        filePath:txt原文件路径   4        htmlPosition:转化后生成的html路径   5     */   6     public static void txtToHtml(String filePath, String htmlPosition) {   7         try {   8             //String encoding = "GBK";   9             File file = new File(filePath);  10             if (file.isFile() && file.exists()) { // 判断文件是否存在  11                 InputStreamReader read = new InputStreamReader(new FileInputStream(file), "GBK");  12                 // 考虑到编码格式  13                 BufferedReader bufferedReader = new BufferedReader(read);  14                 // 写文件  15                 FileOutputStream fos = new FileOutputStream(new File(htmlPosition));  16                 OutputStreamWriter osw = new OutputStreamWriter(fos, "GBK");  17                 BufferedWriter bw = new BufferedWriter(osw);  18                 String lineTxt = null;  19                 while ((lineTxt = bufferedReader.readLine()) != null) {  20                     bw.write("&nbsp&nbsp&nbsp"+lineTxt + "</br>");  21                 }  22                 bw.close();  23                 osw.close();  24                 fos.close();  25                 read.close();  26             } else {  27                 System.out.println("找不到指定的文件");  28             }  29         } catch (Exception e) {  30             System.out.println("读取文件内容出错");  31             e.printStackTrace();  32         }  33     }

“Java怎么实现Word/Pdf/TXT转html”的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注高防服务器网网站,小编将为大家输出更多高质量的实用文章!

[微信提示:高防服务器能助您降低 IT 成本,提升运维效率,使您更专注于核心业务创新。

[图文来源于网络,不代表本站立场,如有侵权,请联系高防服务器网删除]
[