我使用docx4j根据已有文件(文件中有图片、表格、复选框等非文字内容)生成目录和页码,虽然可以生成,但是耗时太长,需要70多秒,且有很多ERROR和WARN日志,需要优化,并缩短时间消耗。
代码如下
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.toc.TocGenerator;
import java.io.File;
public class WordWithTOCDocx4j4 {
public static void main(String[] args) throws Exception {
// 指定DOCX文件路径
File docxFile = new File("D:\\projects\\test\\a.docx");
// 加载DOCX文件并创建WordprocessingMLPackage对象
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(docxFile);
TocGenerator tocGenerator = new TocGenerator(wordMLPackage);
tocGenerator.generateToc( 0, " TOC \\o \"1-3\" \\h \\z \\u ", false);
// 保存文档
wordMLPackage.save(new File("D:\\projects\\test\\aWithTOC222.docx"));
}
}
maven依赖包如下:
<properties>
<docx4j.version>11.4.8</docx4j.version>
</properties>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-core</artifactId>
<version>${docx4j.version}</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-JAXB-MOXy</artifactId>
<version>${docx4j.version}</version>
<exclusions>
<exclusion>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-ImportXHTML</artifactId>
<version>${docx4j.version}</version>
<exclusions>
<exclusion>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-export-fo</artifactId>
<version>${docx4j.version}</version>
<exclusions>
<exclusion>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-core</artifactId>
</exclusion>
</exclusions>
</dependency>
通过日志分析,像是 org.docx4j和org.apache.fop.apps.FOUserAgent无法获取可用的字体和格式,耗费了较多的时间并且出现了错误。我想通过给fop增加字体样式的设置,但是没有找到相关的方法。请大家帮忙看看,谢谢。
ERROR日志如下:
ERROR org.docx4j.model.listnumbering.ListLevel Unhandled numFmt: CHINESE_COUNTING
ERROR org.docx4j.wml.Highlight Can't set w:highlight from 'none'
ERROR org.docx4j.model.images.WordXmlPictureE10 org.docx4j.vml.CTShape
ERROR org.docx4j.model.images.WordXmlPictureE10 Couldn't find shape!
ERROR org.docx4j.model.images.WordXmlPictureE10 WordXmlPictureE10 object was null!
ERROR org.docx4j.convert.out.fo.FOPAreaTreeHelper For @bpda,
<block bap="0 0 0 0" bpd="0" ipd="511755" ipda="511755" visibility="visible"/>
ERROR org.docx4j.convert.out.fo.FOPAreaTreeHelper For input string: ""
java.lang.NumberFormatException: For input string: ""
at java.base/java.lang.NumberFormatException.forInputString(NumberFormatException.java:67)
at java.base/java.lang.Integer.parseInt(Integer.java:672)
at java.base/java.lang.Integer.parseInt(Integer.java:778)
at org.docx4j.convert.out.fo.FOPAreaTreeHelper.calculateHFExtents(FOPAreaTreeHelper.java:368)
at org.docx4j.convert.out.fo.LayoutMasterSetBuilder.fixExtents(LayoutMasterSetBuilder.java:146)
at org.docx4j.convert.out.fo.LayoutMasterSetBuilder.getLayoutMasterSetFragment(LayoutMasterSetBuilder.java:97)
at org.docx4j.convert.out.fo.XsltFOFunctions.getLayoutMasterSetFragment(XsltFOFunctions.java:85)
at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
at java.base/java.lang.reflect.Method.invoke(Method.java:580)
at org.docx4j.org.apache.xalan.extensions.ExtensionHandlerJavaPackage.callFunction(ExtensionHandlerJavaPackage.java:343)
at org.docx4j.org.apache.xalan.extensions.ExtensionHandlerJavaPackage.callFunction(ExtensionHandlerJavaPackage.java:440)
at org.docx4j.org.apache.xalan.extensions.ExtensionsTable.extFunction(ExtensionsTable.java:226)
at org.docx4j.org.apache.xalan.transformer.TransformerImpl.extFunction(TransformerImpl.java:491)
at org.docx4j.org.apache.xpath.functions.FuncExtFunction.execute(FuncExtFunction.java:208)
at org.docx4j.org.apache.xpath.XPath.execute(XPath.java:342)
at org.docx4j.org.apache.xalan.templates.ElemCopyOf.execute(ElemCopyOf.java:134)
at org.docx4j.org.apache.xalan.transformer.TransformerImpl.executeChildTemplates(TransformerImpl.java:2418)
at org.docx4j.org.apache.xalan.templates.ElemLiteralResult.execute(ElemLiteralResult.java:1376)
at org.docx4j.org.apache.xalan.templates.ElemApplyTemplates.transformSelectedNodes(ElemApplyTemplates.java:395)
at org.docx4j.org.apache.xalan.templates.ElemApplyTemplates.execute(ElemApplyTemplates.java:178)
at org.docx4j.org.apache.xalan.transformer.TransformerImpl.executeChildTemplates(TransformerImpl.java:2418)
at org.docx4j.org.apache.xalan.transformer.TransformerImpl.applyTemplateToNode(TransformerImpl.java:2288)
at org.docx4j.org.apache.xalan.transformer.TransformerImpl.transformNode(TransformerImpl.java:1374)
at org.docx4j.org.apache.xalan.transformer.TransformerImpl.transform(TransformerImpl.java:727)
at org.docx4j.org.apache.xalan.transformer.TransformerImpl.transform(TransformerImpl.java:1291)
at org.docx4j.org.apache.xalan.transformer.TransformerImpl.transform(TransformerImpl.java:1269)
at org.docx4j.XmlUtils.transform(XmlUtils.java:1518)
at org.docx4j.XmlUtils.transform(XmlUtils.java:1337)
at org.docx4j.convert.out.common.AbstractXsltExporterDelegate.process(AbstractXsltExporterDelegate.java:66)
at org.docx4j.convert.out.common.AbstractWmlExporter.process(AbstractWmlExporter.java:82)
at org.docx4j.convert.out.common.AbstractWmlExporter.process(AbstractWmlExporter.java:32)
at org.docx4j.convert.out.common.AbstractExporter.export(AbstractExporter.java:80)
at org.docx4j.Docx4J.toFO(Docx4J.java:711)
at org.docx4j.toc.TocGenerator.getPageNumbersMapViaFOP(TocGenerator.java:767)
at org.docx4j.toc.TocGenerator.getPageNumbersMap(TocGenerator.java:652)
at org.docx4j.toc.TocGenerator.populateToc(TocGenerator.java:399)
at org.docx4j.toc.TocGenerator.generateToc(TocGenerator.java:263)
at org.docx4j.toc.TocGenerator.generateToc(TocGenerator.java:231)
at org.docx4j.toc.TocGenerator.generateToc(TocGenerator.java:191)
at com.anan.test.poi.tl.WordWithTOCDocx4j4.main(WordWithTOCDocx4j4.java:22)
WARN日志如下:
WARN org.docx4j.fonts.GlyphCheck Couldn't get font 宋体
WARN org.docx4j.fonts.RunFontSelector TODO: how to handle char '“' in range c>='\u2000' && c<='\u2EFF'?
WARN org.docx4j.fonts.IdentityPlusMapper - - No physical font for: 黑体
WARN org.docx4j.fonts.fop.util.FopConfigUtil Document font 黑体 is not mapped to a physical font!
WARN org.docx4j.fonts.PhysicalFonts No entry in MicrosoftFontsRegistry for: Calibri Light
WARN org.docx4j.model.styles.StyleUtil TODO: implementation is incomplete
WARN org.docx4j.model.properties.paragraph.Indent Only left/first-line indentation is handled at present
WARN org.docx4j.convert.out.common.AbstractConversionContext NOT IMPLEMENTED: support for w:tblPrEx;
WARN org.apache.fop.apps.FOUserAgent Font "Symbol,normal,700" not found. Substituting with "Symbol,normal,400".
WARN org.apache.fop.apps.FOUserAgent Font "ZapfDingbats,normal,700" not found. Substituting with "ZapfDingbats,normal,400".
WARN org.apache.fop.apps.FOUserAgent Font "Times New Roman,normal,400" not found. Substituting with "any,normal,400".
WARN org.apache.fop.apps.FOUserAgent Font "Calibri Light,normal,700" not found. Substituting with "any,normal,700".
WARN org.apache.fop.apps.FOUserAgent Font "SimSun,normal,700" not found. Substituting with "any,normal,700".
WARN org.apache.fop.apps.FOUserAgent Font "SimSun,normal,400" not found. Substituting with "any,normal,400".
WARN org.apache.fop.apps.FOUserAgent Glyph "第" (0x7b2c) not available in font "Times-Roman".
WARN org.apache.fop.apps.FOUserAgent Glyph "哈" (0x54c8) not available in font "Times-Bold".