Word转换PDF

yuyu888 于 2022-07-29 发布

xdocreport

maven 依赖

在pom.xml 中添加

		<dependency>
			<groupId>fr.opensagres.xdocreport</groupId>
			<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
			<version>1.0.6</version>
		</dependency>
		<dependency>
			<groupId>cn.lesper</groupId>
			<artifactId>iTextAsian</artifactId>
			<version>3.0</version>
		</dependency>

转换类


import com.lowagie.text.Font;
import com.lowagie.text.pdf.BaseFont;
import fr.opensagres.xdocreport.itext.extension.font.IFontProvider;
import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import java.awt.*;
import java.io.*;

public class Docx2PdfConversion {
    public void run(InputStream inputStream, String outPath) {
        try (
//            InputStream is = new FileInputStream(new File("/Users/test/Desktop/upload_202101_20210122164216.docx"));
                InputStream is = inputStream;
                OutputStream out = new FileOutputStream(new File(outPath));
            ) {
            long start = System.currentTimeMillis();
            // 1) Load DOCX into XWPFDocument
            XWPFDocument document = new XWPFDocument(is);
            // 2) Prepare Pdf options
            PdfOptions options = PdfOptions.create();
            options.fontProvider(new IFontProvider() {
                public com.lowagie.text.Font getFont(String familyName, String encoding, float size, int style, Color color) {
                    try {
                        BaseFont bfChinese = BaseFont.createFont("STSong-Light"
                                , "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
                        Font fontChinese = new Font(bfChinese,
                                size, style, color);
                        if (familyName != null)
                            fontChinese.setFamily(familyName);
                        return fontChinese;
                    } catch (Exception e) {
                        e.printStackTrace();
                        return null;
                    }
                }
            });
            // 3) Convert XWPFDocument to Pdf
            PdfConverter.getInstance().convert(document, out, options);
            System.out.println("rdtschools-Docx2PdfConversion-word-sample.docx was converted to a PDF file in :: "
                            + (System.currentTimeMillis() - start) + " milli seconds");
        } catch (Throwable e) {
            e.printStackTrace();
        }
    }
}

说明

使用该方法有些格式word在转换过程中会丢格式,容易失真 但是xdocreport 可以是实现用word模版动态生成word:https://blog.csdn.net/liuxuli232/article/details/105580931

aspose-words

该方法需要破解,参考:https://blog.csdn.net/bibiboyx/article/details/121224466 https://www.cnblogs.com/huaixiaonian/p/14700007.html

这里使用引入jar包的方式, 源文件:https://github.com/yuyu888/yuyu888.github.io/blob/main/posts/2022/aspose-words-15.8.0-crack.jar

maven 依赖

新建一个src同级目录lib, 将jar包放入其中 在pom.xml 中添加

		<dependency>
			<groupId>com.aspose</groupId>
			<artifactId>aspose-words</artifactId>
			<version>15.8.0</version>
			<scope>system</scope>
			<systemPath>${pom.basedir}/lib/aspose-words-15.8.0-crack.jar</systemPath>
		</dependency>

转换类


import com.aspose.words.*;
import wordUtil.LicenseLoad;

import java.io.*;
import java.nio.charset.StandardCharsets;


public class WordUtil {

    public static boolean getLicense(String path)throws Exception {
        boolean result = false;
        InputStream is = null;
        try {
            is= new FileInputStream(path);
            //InputStream is = Word2PdfAsposeUtil.class.getClassLoader().getResourceAsStream("license.xml"); // license.xml应放在..\WebRoot\WEB-INF\classes路径下
            License aposeLic = new License();
            aposeLic.setLicense(is);
            result = true;
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return result;
    }

    public static boolean doc2pdf(String inPath, String outPath,String license)throws Exception {
//        if (!getLicense(license)) {// 验证License 若不验证则转化出的pdf文档会有水印产生
//            return false;
//        }
        LicenseLoad.getLicense();

        FileOutputStream os = null;
        try {
            long old = System.currentTimeMillis();
            File file = new File(outPath);
            os = new FileOutputStream(file);
            Document doc = new Document(inPath); //Address是将要被转化的word文档
            doc.save(os, SaveFormat.PDF);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,
            // EPUB, XPS, SWF 相互转换
            long now = System.currentTimeMillis();
            System.out.println("pdf转换成功,共耗时:" + ((now - old) / 1000.0) + "秒"); // 转化用时
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }finally {
            if (os != null) {
                try {
                    os.flush();
                    os.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return true;
    }

    public static boolean docxToDoc(String inPath, String outPath,String license)throws Exception {
//        if (!getLicense(license)) {// 验证License 若不验证则转化出的pdf文档会有水印产生
//            return false;
//        }
        LicenseLoad.getLicense();

        FileOutputStream os = null;
        try {
            long old = System.currentTimeMillis();
            File file = new File(outPath);
            os = new FileOutputStream(file);
            Document doc = new Document(inPath); //Address是将要被转化的word文档
            doc.save(os, SaveFormat.DOC);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,
            // EPUB, XPS, SWF 相互转换
            long now = System.currentTimeMillis();
            System.out.println("pdf转换成功,共耗时:" + ((now - old) / 1000.0) + "秒"); // 转化用时
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }finally {
            if (os != null) {
                try {
                    os.flush();
                    os.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return true;
    }

    public static boolean docxToHtml(String inPath, String outPath,String license)throws Exception {
//        if (!getLicense(license)) {// 验证License 若不验证则转化出的pdf文档会有水印产生
//            return false;
//        }
        LicenseLoad.getLicense();
        FileOutputStream os = null;
        try {
            long old = System.currentTimeMillis();
            File file = new File(outPath);
            os = new FileOutputStream(file);
            Document doc = new Document(inPath); //Address是将要被转化的word文档
            doc.save(os, SaveFormat.HTML);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,
            // EPUB, XPS, SWF 相互转换
            long now = System.currentTimeMillis();
            System.out.println("pdf转换成功,共耗时:" + ((now - old) / 1000.0) + "秒"); // 转化用时
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }finally {
            if (os != null) {
                try {
                    os.flush();
                    os.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return true;
    }

    /**
     * 将word的内容转为html返回字符串,图片全部转为base64编码。
     * @param in
     * @return
     */
    public static String wordToHtml(InputStream in) {
        LicenseLoad.getLicense();// 验证License 若不验证则转化出的文件会有水印产生
        ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();
        String htmlText = "";
        try {
            Document doc = new Document(in);
            HtmlSaveOptions opts = new HtmlSaveOptions(SaveFormat.HTML);
            opts.setExportXhtmlTransitional(true);
            opts.setExportImagesAsBase64(true);
            opts.setExportPageSetup(true);
            doc.save(htmlStream,opts);
            htmlText = new String(htmlStream.toByteArray(), StandardCharsets.UTF_8);
            htmlStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        return htmlText;
    }

    /**
     * html内容转word
     * @param html  html内容
     * @param wordPath  word保存路径
     * @return
     */
    public static boolean htmlToWord(String html,String wordPath) {
        LicenseLoad.getLicense();
        try {
            Document doc = new Document();
            DocumentBuilder builder = new DocumentBuilder(doc);
            builder.insertHtml(html);
            //生成doc文件
            doc.save(wordPath, SaveOptions.createSaveOptions(SaveFormat.DOC));
            return true;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

}

说明

该方法可以进行很多中文件相互转换,但是在使用word转pdf时, 还原度比较高, 但是发现字体统一被转换成了 Arial Unicode MS,具体解决还需研究

字体问题解决办法

string userfontsfoloder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData)+ "\Microsoft\Windows\Fonts\";
        Console.WriteLine("userfontsfoloder:" + userfontsfoloder);
string outFilePdf = Path.GetDirectoryName(txtpicDir.Text) + "\" + Path.GetFileNameWithoutExtension(txtpicDir.Text) + ".pdf";
Aspose.Words.Document document = new Aspose.Words.Document(txtpicDir.Text);
ArrayList fontSources = new ArrayList(FontSettings.DefaultInstance.GetFontsSources());
//将用户目录字体添加到字体源中
Aspose.Words.Fonts.FolderFontSource folderFontSource = new Aspose.Words.Fonts.FolderFontSource(userfontsfoloder, true);
        fontSources.Add(folderFontSource);
Aspose.Words.Fonts.FontSourceBase[] updatedFontSources = (Aspose.Words.Fonts.FontSourceBase[])fontSources.ToArray(typeof(Aspose.Words.Fonts.FontSourceBase));
        FontSettings.DefaultInstance.SetFontsSources(updatedFontSources);