xdocreport
maven 依赖
在pom.xml 中添加
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>cn.lesper</groupId>
<artifactId>iTextAsian</artifactId>
<version>3.0</version>
</dependency>
转换类
import com.lowagie.text.Font;
import com.lowagie.text.pdf.BaseFont;
import fr.opensagres.xdocreport.itext.extension.font.IFontProvider;
import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.awt.*;
import java.io.*;
public class Docx2PdfConversion {
public void run(InputStream inputStream, String outPath) {
try (
// InputStream is = new FileInputStream(new File("/Users/test/Desktop/upload_202101_20210122164216.docx"));
InputStream is = inputStream;
OutputStream out = new FileOutputStream(new File(outPath));
) {
long start = System.currentTimeMillis();
// 1) Load DOCX into XWPFDocument
XWPFDocument document = new XWPFDocument(is);
// 2) Prepare Pdf options
PdfOptions options = PdfOptions.create();
options.fontProvider(new IFontProvider() {
public com.lowagie.text.Font getFont(String familyName, String encoding, float size, int style, Color color) {
try {
BaseFont bfChinese = BaseFont.createFont("STSong-Light"
, "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
Font fontChinese = new Font(bfChinese,
size, style, color);
if (familyName != null)
fontChinese.setFamily(familyName);
return fontChinese;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
});
// 3) Convert XWPFDocument to Pdf
PdfConverter.getInstance().convert(document, out, options);
System.out.println("rdtschools-Docx2PdfConversion-word-sample.docx was converted to a PDF file in :: "
+ (System.currentTimeMillis() - start) + " milli seconds");
} catch (Throwable e) {
e.printStackTrace();
}
}
}
说明
使用该方法有些格式word在转换过程中会丢格式,容易失真 但是xdocreport 可以是实现用word模版动态生成word:https://blog.csdn.net/liuxuli232/article/details/105580931
aspose-words
该方法需要破解,参考:https://blog.csdn.net/bibiboyx/article/details/121224466 https://www.cnblogs.com/huaixiaonian/p/14700007.html
这里使用引入jar包的方式, 源文件:https://github.com/yuyu888/yuyu888.github.io/blob/main/posts/2022/aspose-words-15.8.0-crack.jar
maven 依赖
新建一个src同级目录lib, 将jar包放入其中 在pom.xml 中添加
<dependency>
<groupId>com.aspose</groupId>
<artifactId>aspose-words</artifactId>
<version>15.8.0</version>
<scope>system</scope>
<systemPath>${pom.basedir}/lib/aspose-words-15.8.0-crack.jar</systemPath>
</dependency>
转换类
import com.aspose.words.*;
import wordUtil.LicenseLoad;
import java.io.*;
import java.nio.charset.StandardCharsets;
public class WordUtil {
public static boolean getLicense(String path)throws Exception {
boolean result = false;
InputStream is = null;
try {
is= new FileInputStream(path);
//InputStream is = Word2PdfAsposeUtil.class.getClassLoader().getResourceAsStream("license.xml"); // license.xml应放在..\WebRoot\WEB-INF\classes路径下
License aposeLic = new License();
aposeLic.setLicense(is);
result = true;
} catch (Exception e) {
e.printStackTrace();
}finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
public static boolean doc2pdf(String inPath, String outPath,String license)throws Exception {
// if (!getLicense(license)) {// 验证License 若不验证则转化出的pdf文档会有水印产生
// return false;
// }
LicenseLoad.getLicense();
FileOutputStream os = null;
try {
long old = System.currentTimeMillis();
File file = new File(outPath);
os = new FileOutputStream(file);
Document doc = new Document(inPath); //Address是将要被转化的word文档
doc.save(os, SaveFormat.PDF);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,
// EPUB, XPS, SWF 相互转换
long now = System.currentTimeMillis();
System.out.println("pdf转换成功,共耗时:" + ((now - old) / 1000.0) + "秒"); // 转化用时
} catch (Exception e) {
e.printStackTrace();
return false;
}finally {
if (os != null) {
try {
os.flush();
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return true;
}
public static boolean docxToDoc(String inPath, String outPath,String license)throws Exception {
// if (!getLicense(license)) {// 验证License 若不验证则转化出的pdf文档会有水印产生
// return false;
// }
LicenseLoad.getLicense();
FileOutputStream os = null;
try {
long old = System.currentTimeMillis();
File file = new File(outPath);
os = new FileOutputStream(file);
Document doc = new Document(inPath); //Address是将要被转化的word文档
doc.save(os, SaveFormat.DOC);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,
// EPUB, XPS, SWF 相互转换
long now = System.currentTimeMillis();
System.out.println("pdf转换成功,共耗时:" + ((now - old) / 1000.0) + "秒"); // 转化用时
} catch (Exception e) {
e.printStackTrace();
return false;
}finally {
if (os != null) {
try {
os.flush();
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return true;
}
public static boolean docxToHtml(String inPath, String outPath,String license)throws Exception {
// if (!getLicense(license)) {// 验证License 若不验证则转化出的pdf文档会有水印产生
// return false;
// }
LicenseLoad.getLicense();
FileOutputStream os = null;
try {
long old = System.currentTimeMillis();
File file = new File(outPath);
os = new FileOutputStream(file);
Document doc = new Document(inPath); //Address是将要被转化的word文档
doc.save(os, SaveFormat.HTML);// 全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF,
// EPUB, XPS, SWF 相互转换
long now = System.currentTimeMillis();
System.out.println("pdf转换成功,共耗时:" + ((now - old) / 1000.0) + "秒"); // 转化用时
} catch (Exception e) {
e.printStackTrace();
return false;
}finally {
if (os != null) {
try {
os.flush();
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return true;
}
/**
* 将word的内容转为html返回字符串,图片全部转为base64编码。
* @param in
* @return
*/
public static String wordToHtml(InputStream in) {
LicenseLoad.getLicense();// 验证License 若不验证则转化出的文件会有水印产生
ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();
String htmlText = "";
try {
Document doc = new Document(in);
HtmlSaveOptions opts = new HtmlSaveOptions(SaveFormat.HTML);
opts.setExportXhtmlTransitional(true);
opts.setExportImagesAsBase64(true);
opts.setExportPageSetup(true);
doc.save(htmlStream,opts);
htmlText = new String(htmlStream.toByteArray(), StandardCharsets.UTF_8);
htmlStream.close();
} catch (Exception e) {
e.printStackTrace();
}
return htmlText;
}
/**
* html内容转word
* @param html html内容
* @param wordPath word保存路径
* @return
*/
public static boolean htmlToWord(String html,String wordPath) {
LicenseLoad.getLicense();
try {
Document doc = new Document();
DocumentBuilder builder = new DocumentBuilder(doc);
builder.insertHtml(html);
//生成doc文件
doc.save(wordPath, SaveOptions.createSaveOptions(SaveFormat.DOC));
return true;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
}
说明
该方法可以进行很多中文件相互转换,但是在使用word转pdf时, 还原度比较高, 但是发现字体统一被转换成了 Arial Unicode MS,具体解决还需研究
字体问题解决办法
string userfontsfoloder = Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData)+ "\Microsoft\Windows\Fonts\";
Console.WriteLine("userfontsfoloder:" + userfontsfoloder);
string outFilePdf = Path.GetDirectoryName(txtpicDir.Text) + "\" + Path.GetFileNameWithoutExtension(txtpicDir.Text) + ".pdf";
Aspose.Words.Document document = new Aspose.Words.Document(txtpicDir.Text);
ArrayList fontSources = new ArrayList(FontSettings.DefaultInstance.GetFontsSources());
//将用户目录字体添加到字体源中
Aspose.Words.Fonts.FolderFontSource folderFontSource = new Aspose.Words.Fonts.FolderFontSource(userfontsfoloder, true);
fontSources.Add(folderFontSource);
Aspose.Words.Fonts.FontSourceBase[] updatedFontSources = (Aspose.Words.Fonts.FontSourceBase[])fontSources.ToArray(typeof(Aspose.Words.Fonts.FontSourceBase));
FontSettings.DefaultInstance.SetFontsSources(updatedFontSources);