Page 1 of 1

java word to pdf converter

PostPosted: Mon Feb 05, 2018 6:37 pm
by Asttle
I am using docx4j to convert word to pdf my word doc contains arabic and english characters in output pdf arabic characters are messed up
why?
i have attached the word document and pdf document text


Code: Select all
void createPDF() throws FileNotFoundException, Docx4JException, Exception {
         //To change body of generated methods, choose Tools | Templates.
          FOSettings foSettings = Docx4J.createFOSettings();
      InputStream is = new FileInputStream(new File("Documents\\Sampledoc.docx"));
      WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(is);

      //Print all available physical fonts
      PhysicalFonts.discoverPhysicalFonts();
      Map<String, PhysicalFont> physicalFonts = PhysicalFonts.getPhysicalFonts();
      Iterator<Entry<String, PhysicalFont>> availableFonts = physicalFonts.entrySet().iterator();
      while(availableFonts.hasNext()) {
         Entry<String, PhysicalFont> font = availableFonts.next();
         String key = font.getKey();
         PhysicalFont pFont = font.getValue();
         System.out.println("Key is " + key + ";; Name " + pFont.getName());
      }
      Mapper fontMapper = new IdentityPlusMapper();
      PhysicalFont font  = PhysicalFonts.get("Arial Unicode MS");
      fontMapper.put("Arial", font);
      wordMLPackage.setFontMapper(fontMapper);
      foSettings.setWmlPackage(wordMLPackage);
      OutputStream pdfOutputStream = new FileOutputStream("Documents\\output.pdf");
      System.out.println(foSettings.getSettings());
      Docx4J.toFO(foSettings, pdfOutputStream, Docx4J.FLAG_EXPORT_PREFER_XSL);
      System.out.println(" Done !!!!");
    }






While loading some other word files(arabic + english + chinese) these exceptions are thrown




Exceptions:
Exception in thread "main" org.docx4j.openpackaging.exceptions.Docx4JException: Exception exporting package
at org.docx4j.convert.out.common.AbstractExporter.export(AbstractExporter.java:109)
at org.docx4j.Docx4J.toFO(Docx4J.java:597)
at convertoutpdfviaxslfo.Conversion1.createPDF(Conversion1.java:55)
at convertoutpdfviaxslfo.ExampleFO2PDF.main(ExampleFO2PDF.java:19)
Caused by: org.docx4j.openpackaging.exceptions.Docx4JException: Exception writing Document to OutputStream: For TrueType collection you must specify which font to select (-ttcname)
at org.docx4j.utils.XmlSerializerUtil.serialize(XmlSerializerUtil.java:50)
at org.docx4j.utils.XmlSerializerUtil.serialize(XmlSerializerUtil.java:14)
at org.docx4j.convert.out.fo.renderers.FORendererApacheFOP.render(FORendererApacheFOP.java:209)
at org.docx4j.convert.out.fo.renderers.FORendererApacheFOP.render(FORendererApacheFOP.java:159)
at org.docx4j.convert.out.fo.AbstractFOExporter.postprocess(AbstractFOExporter.java:168)
at org.docx4j.convert.out.fo.AbstractFOExporter.postprocess(AbstractFOExporter.java:47)
at org.docx4j.convert.out.common.AbstractExporter.export(AbstractExporter.java:82)
... 3 more
Caused by: java.lang.IllegalArgumentException: For TrueType collection you must specify which font to select (-ttcname)
at org.apache.fop.fonts.truetype.TTFFile.readFont(TTFFile.java:476)
at org.apache.fop.fonts.truetype.TTFFontLoader.read(TTFFontLoader.java:58)
at org.apache.fop.fonts.FontLoader.getFont(FontLoader.java:164)
at org.apache.fop.fonts.FontLoader.loadFont(FontLoader.java:113)
at org.apache.fop.fonts.LazyFont.load(LazyFont.java:126)
at org.apache.fop.fonts.LazyFont.getAscender(LazyFont.java:233)
at org.apache.fop.fonts.Font.getAscender(Font.java:96)
at org.apache.fop.layoutmgr.inline.AlignmentContext.<init>(AlignmentContext.java:127)
at org.apache.fop.layoutmgr.inline.InlineLayoutManager.getNextKnuthElements(InlineLayoutManager.java:255)
at org.apache.fop.layoutmgr.inline.InlineLayoutManager.getNextKnuthElements(InlineLayoutManager.java:315)
at org.apache.fop.layoutmgr.inline.LineLayoutManager.collectInlineKnuthElements(LineLayoutManager.java:658)
at org.apache.fop.layoutmgr.inline.LineLayoutManager.getNextKnuthElements(LineLayoutManager.java:594)
at org.apache.fop.layoutmgr.BlockStackingLayoutManager.getNextKnuthElements(BlockStackingLayoutManager.java:294)
at org.apache.fop.layoutmgr.BlockLayoutManager.getNextKnuthElements(BlockLayoutManager.java:116)
at org.apache.fop.layoutmgr.FlowLayoutManager.getNextKnuthElements(FlowLayoutManager.java:107)
at org.apache.fop.layoutmgr.PageBreaker.getNextKnuthElements(PageBreaker.java:145)
at org.apache.fop.layoutmgr.AbstractBreaker.getNextBlockList(AbstractBreaker.java:552)
at org.apache.fop.layoutmgr.PageBreaker.getNextBlockList(PageBreaker.java:137)
at org.apache.fop.layoutmgr.AbstractBreaker.doLayout(AbstractBreaker.java:302)
at org.apache.fop.layoutmgr.AbstractBreaker.doLayout(AbstractBreaker.java:264)
at org.apache.fop.layoutmgr.PageSequenceLayoutManager.activateLayout(PageSequenceLayoutManager.java:106)
at org.apache.fop.area.AreaTreeHandler.endPageSequence(AreaTreeHandler.java:234)
at org.apache.fop.fo.pagination.PageSequence.endOfNode(PageSequence.java:123)
at org.apache.fop.fo.FOTreeBuilder$MainFOHandler.endElement(FOTreeBuilder.java:340)
at org.apache.fop.fo.FOTreeBuilder.endElement(FOTreeBuilder.java:169)
at org.docx4j.org.apache.xalan.transformer.TransformerIdentityImpl.endElement(TransformerIdentityImpl.java:1106)
at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.endElement(AbstractSAXParser.java:609)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanEndElement(XMLDocumentFragmentScannerImpl.java:1782)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl$FragmentContentDriver.next(XMLDocumentFragmentScannerImpl.java:2973)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:606)
at com.sun.org.apache.xerces.internal.impl.XMLNSDocumentScannerImpl.next(XMLNSDocumentScannerImpl.java:117)
at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:510)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:848)
at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:777)
at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213)
at org.docx4j.org.apache.xalan.transformer.TransformerIdentityImpl.transform(TransformerIdentityImpl.java:489)
at org.docx4j.utils.XmlSerializerUtil.serialize(XmlSerializerUtil.java:47)
... 9 more

Re: java word to pdf converter

PostPosted: Tue Feb 06, 2018 8:43 pm
by jason
Please attach source docx?