Changeset 321 for trunk/docx4j/src/main/java/org/docx4j/openpackaging/packages/WordprocessingMLPackage.java
- Timestamp:
- 04/11/08 15:26:10 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/docx4j/src/main/java/org/docx4j/openpackaging/packages/WordprocessingMLPackage.java
r299 r321 23 23 24 24 import java.io.File; 25 import java.io.FileInputStream; 25 26 import java.io.OutputStream; 26 27 import java.util.Iterator; … … 30 31 import javax.xml.bind.JAXBElement; 31 32 import javax.xml.bind.Marshaller; 33 import javax.xml.bind.Unmarshaller; 32 34 import javax.xml.parsers.DocumentBuilderFactory; 33 35 … … 50 52 import org.docx4j.openpackaging.parts.WordprocessingML.GlossaryDocumentPart; 51 53 import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart; 54 import org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart; 52 55 import org.docx4j.openpackaging.parts.relationships.Namespaces; 53 56 … … 60 63 61 64 65 /** 66 * @author jharrop 67 * 68 */ 62 69 public class WordprocessingMLPackage extends Package { 63 70 … … 167 174 168 175 176 177 /* Output in pck:package/pck:part format, as emitted by Word 2007. 178 * 179 */ 180 public org.docx4j.wml.Package exportPkgXml() { 181 // so, put the 2 parts together into a single document 182 // The JAXB object org.docx4j.wml.Package is 183 // custom built for this purpose. 184 185 // Create a org.docx4j.wml.Package object 186 org.docx4j.wml.ObjectFactory factory = new org.docx4j.wml.ObjectFactory(); 187 org.docx4j.wml.Package pkg = factory.createPackage(); 188 189 // Set its parts 190 191 // .. the main document part 192 org.docx4j.wml.Package.Part pkgPartDocument = factory.createPackagePart(); 193 194 MainDocumentPart documentPart = getMainDocumentPart(); 195 196 pkgPartDocument.setName(documentPart.getPartName().getName()); 197 pkgPartDocument.setContentType(documentPart.getContentType() ); 198 199 org.docx4j.wml.Package.Part.XmlData XmlDataDoc = factory.createPackagePartXmlData(); 200 201 org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document)documentPart.getJaxbElement(); 202 203 XmlDataDoc.setDocument(wmlDocumentEl); 204 pkgPartDocument.setXmlData(XmlDataDoc); 205 pkg.getPart().add(pkgPartDocument); 206 207 // .. the style part 208 org.docx4j.wml.Package.Part pkgPartStyles = factory.createPackagePart(); 209 210 org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart stylesPart = documentPart.getStyleDefinitionsPart(); 211 212 pkgPartDocument.setName(stylesPart.getPartName().getName()); 213 pkgPartDocument.setContentType(stylesPart.getContentType() ); 214 215 org.docx4j.wml.Package.Part.XmlData XmlDataStyles = factory.createPackagePartXmlData(); 216 217 org.docx4j.wml.Styles styles = (org.docx4j.wml.Styles)stylesPart.getJaxbElement(); 218 219 XmlDataStyles.setStyles(styles); 220 pkgPartStyles.setXmlData(XmlDataStyles); 221 pkg.getPart().add(pkgPartStyles); 222 223 return pkg; 224 225 } 226 227 228 /** 229 * Use an XSLT to alter the contents of this package. 230 * The output of the transformation must be valid 231 * pck:package/pck:part format, as emitted by Word 2007. 232 * 233 * @param xslt 234 * @param transformParameters 235 * @throws Exception 236 */ 237 public void transform(java.io.InputStream xslt, 238 Map<String, Object> transformParameters) throws Exception { 239 240 // Prepare in the input document 241 org.docx4j.wml.Package pkg = exportPkgXml(); 242 JAXBContext jc = Context.jc; 243 Marshaller marshaller=jc.createMarshaller(); 244 org.w3c.dom.Document doc = org.docx4j.XmlUtils.neww3cDomDocument(); 245 marshaller.marshal(pkg, doc); 246 247 javax.xml.bind.util.JAXBResult result = new javax.xml.bind.util.JAXBResult(jc ); 248 249 // Perform the transformation 250 org.docx4j.XmlUtils.transform(doc, xslt, transformParameters, result); 251 252 253 org.docx4j.wml.Package wmlPackageEl = (org.docx4j.wml.Package)result.getResult(); 254 255 org.docx4j.wml.Document wmlDocument = null; 256 org.docx4j.wml.Styles wmlStyles = null; 257 for (org.docx4j.wml.Package.Part p : wmlPackageEl.getPart() ) { 258 259 if (p.getXmlData().getDocument()!= null) { 260 wmlDocument = p.getXmlData().getDocument(); 261 } 262 if (p.getXmlData().getStyles()!= null) { 263 wmlStyles = p.getXmlData().getStyles(); 264 } 265 } 266 267 // TODO - delete existing main document part 268 269 // Create main document part 270 MainDocumentPart wordDocumentPart = new MainDocumentPart(); 271 // Put the content in the part 272 wordDocumentPart.setJaxbElement(wmlDocument); 273 // Add the main document part to the package relationships 274 // (creating it if necessary) 275 this.addTargetPart(wordDocumentPart); 276 277 278 // TODO - delete existing style part 279 280 281 // That handled the Main Document Part; now set the Style part. 282 StyleDefinitionsPart stylesPart = new StyleDefinitionsPart(); 283 stylesPart.setJaxbElement(wmlStyles); 284 // Add the styles part to the main document part relationships 285 // (creating it if necessary) 286 wordDocumentPart.addTargetPart(stylesPart); // NB - add it to main doc part, not package! 287 288 289 } 290 291 public void filter( FilterSettings filterSettings ) throws Exception { 292 293 java.io.InputStream xslt 294 = org.docx4j.utils.ResourceUtils.getResource( 295 "org/docx4j/openpackaging/packages/filter.xslt"); 296 297 transform(xslt, filterSettings.getSettings() ); 298 299 } 300 169 301 /** Create an html version of the document, using CSS font family 170 302 * stacks. This is appropriate if the HTML is intended for … … 181 313 } 182 314 315 183 316 /** Create an html version of the document. 184 317 * … … 190 323 191 324 /* 192 * Given that word2html.xsl is freely available, we use the second 193 * approach. 194 * 195 * The question then is how the stylesheet is made to work with 196 * our main document and style definition parts. 197 * 198 * I've adapted the stylesheet to process the 325 * Given that word2html.xsl is freely available, use a 326 * version of it adapted to process the 199 327 * pck:package/pck:part stuff emitted by Word 2007. 200 328 * 201 */ 202 203 // so, put the 2 parts together into a single document 204 // The JAXB object org.docx4j.wml.Package is 205 // custom built for this purpose. 206 207 // Create a org.docx4j.wml.Package object 208 org.docx4j.wml.ObjectFactory factory = new org.docx4j.wml.ObjectFactory(); 209 org.docx4j.wml.Package pkg = factory.createPackage(); 210 211 // Set its parts 212 213 // .. the main document part 214 org.docx4j.wml.Package.Part pkgPartDocument = factory.createPackagePart(); 329 */ 330 org.docx4j.wml.Package pkg = exportPkgXml(); 215 331 216 MainDocumentPart documentPart = getMainDocumentPart();217 218 pkgPartDocument.setName(documentPart.getPartName().getName());219 pkgPartDocument.setContentType(documentPart.getContentType() );220 221 org.docx4j.wml.Package.Part.XmlData XmlDataDoc = factory.createPackagePartXmlData();222 223 org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document)documentPart.getJaxbElement();224 225 XmlDataDoc.setDocument(wmlDocumentEl);226 pkgPartDocument.setXmlData(XmlDataDoc);227 pkg.getPart().add(pkgPartDocument);228 229 // .. the style part230 org.docx4j.wml.Package.Part pkgPartStyles = factory.createPackagePart();231 232 org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart stylesPart = documentPart.getStyleDefinitionsPart();233 234 pkgPartDocument.setName(stylesPart.getPartName().getName());235 pkgPartDocument.setContentType(stylesPart.getContentType() );236 237 org.docx4j.wml.Package.Part.XmlData XmlDataStyles = factory.createPackagePartXmlData();238 239 org.docx4j.wml.Styles styles = (org.docx4j.wml.Styles)stylesPart.getJaxbElement();240 241 XmlDataStyles.setStyles(styles);242 pkgPartStyles.setXmlData(XmlDataStyles);243 pkg.getPart().add(pkgPartStyles);244 332 245 333 // Now marshall it … … 252 340 log.info("wordDocument created for PDF rendering!"); 253 341 254 /* 255 * We want to use plain old Xalan J, not xsltc 256 * 257 * Following would not be necessary provided Xalan is on the classpath 258 * 259 System.setProperty("javax.xml.transform.TransformerFactory", "FQCN"); 260 261 examples of FQCN: 262 263 org.apache.xalan.processor.TransformerFactoryImpl (this is the one we want) 264 com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl 265 org.apache.xalan.xsltc.trax.TransformerFactoryImpl 266 net.sf.saxon.TransformerFactoryImpl 267 268 HOWEVER, docx4all encounters http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6396599 269 270 java.util.prefs.FileSystemPreferences syncWorld 271 WARNING: Couldn't flush user prefs: java.util.prefs.BackingStoreException: java.lang.IllegalArgumentException: Not supported: indent-number 272 273 every 30 seconds 274 275 The workaround implemented is to remove META-INF/services from the xalan jar 276 to prevent xalan being picked up as the default provider for jaxp transform, 277 so we have to use it explicitly. 278 279 .. which means 280 281 System.setProperty("javax.xml.transform.TransformerFactory", "org.apache.xalan.processor.TransformerFactoryImpl"); 282 283 (unfortunately, there is no com.sun.org.apache.xalan.processor.TransformerFactoryImpl, 284 so we have to bundle xalan jar, which is 2.7 MB 285 286 But we can make it smaller: 287 288 org/apache/xalan/lib$ rm sql -rf 289 org/apache/xalan$ rm xsltc -rf 290 291 That gets us from 2.7 MB to 1.85 MB. 292 293 Sun already has: 294 295 com.sun.org.apache.xpath; 296 com.sun.org.apache.xml.internal.dtm; 297 com.sun.org.apache.xalan.internal.extensions|lib|res 298 299 so you might think we can refactor Xalan to point to those, and them out of our jar. 300 301 well, it turns out that its too messy leaving out org.apache.xpath or xalan.extensions 302 303 so you have to keep xalan.extensions, processor, serialize, trace, transformer 304 305 leaving out just org.apache.xalan.resources and org.apache.xpath.resources 306 only gets us down to 1.5 MB. (and that's with just jar cvf xalan-minimal.jar org/apache/xalan org/apache/xpath 307 - we'd still need to include org/apache/xml) 308 309 ie once you include the whole of org.apache.xpath, you may as well just go with the 1.85 MB :( 310 311 */ 312 313 javax.xml.transform.TransformerFactory tfactory = javax.xml.transform.TransformerFactory.newInstance(); 314 String originalFactory = tfactory.getClass().getName(); 315 System.out.println("original TransformerFactory: " + originalFactory); 316 // com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl resolves the syncWorld problem 317 // net.sf.saxon.TransformerFactoryImpl is no good. 318 319 System.setProperty("javax.xml.transform.TransformerFactory", "org.apache.xalan.processor.TransformerFactoryImpl"); 320 321 // Now transform this into XHTML 322 tfactory = javax.xml.transform.TransformerFactory.newInstance(); 323 javax.xml.transform.dom.DOMSource domSource = new javax.xml.transform.dom.DOMSource(doc); 324 325 // Get the xslt file 326 java.io.InputStream is = null; 327 // Works in Eclipse - note absence of leading '/' 328 is = org.docx4j.utils.ResourceUtils.getResource("org/docx4j/openpackaging/packages/wordml2html-2007.xslt"); 329 330 // Use the factory to create a template containing the xsl file 331 javax.xml.transform.Templates template = tfactory.newTemplates( 332 new javax.xml.transform.stream.StreamSource(is)); 333 // Use the template to create a transformer 334 javax.xml.transform.Transformer xformer = template.newTransformer(); 335 336 337 // Finished with the factory, so set it back again! 338 // The "Not supported: indent-number" problem will only occur if a user creates 339 // a new document during the time between these 2 calls to setProperty 340 // (and syncWorld is called?) 341 System.setProperty("javax.xml.transform.TransformerFactory", originalFactory); 342 343 if (!xformer.getClass().getName().equals("org.apache.xalan.transformer.TransformerImpl")) { 344 log.error("Detected " + xformer.getClass().getName() 345 + ", but require org.apache.xalan.transformer.TransformerImpl. " + 346 "Ensure Xalan 2.7.0 is on your classpath!" ); 347 } 348 // com.sun.org.apache.xalan.internal.xsltc.trax.TransformerImpl won't work 349 // with our extension function. 350 351 352 // 3. Ensure that the font names in the XHTML have been mapped to these matches 342 343 // Get the xslt file - Works in Eclipse - note absence of leading '/' 344 java.io.InputStream xslt = org.docx4j.utils.ResourceUtils.getResource("org/docx4j/openpackaging/packages/wordml2html-2007.xslt"); 345 346 // Prep parameters 347 Map<String, Object> transformParameters = new java.util.HashMap<String,Object>(); 348 // ..Ensure that the font names in the XHTML have been mapped to these matches 353 349 // possibly via an extension function in the XSLT 354 350 if (fontSubstituter==null) { … … 358 354 log.debug("Using existing Substituter."); 359 355 } 360 xformer.setParameter("substituterInstance", fontSubstituter); 361 xformer.setParameter("fontFamilyStack", fontFamilyStack); 362 363 //DEBUGGING 364 // use the identity transform if you want to send wordDocument; 365 // otherwise you'll get the XHTML 366 //javax.xml.transform.Transformer xformer = tfactory.newTransformer(); 367 368 xformer.transform(domSource, result); 369 356 transformParameters.put("substituterInstance", fontSubstituter); 357 transformParameters.put("fontFamilyStack", fontFamilyStack); 358 359 360 // Now do the transformation 361 org.docx4j.XmlUtils.transform(doc, xslt, transformParameters, result); 362 370 363 log.info("wordDocument transformed to xhtml .."); 371 364 372 365 } 366 367 368 373 369 374 370 public void setFontSubstituter(Substituter fs) throws Exception { … … 598 594 } 599 595 596 public static class FilterSettings { 597 598 Boolean removeProofErrors = Boolean.FALSE; 599 public void setRemoveProofErrors(boolean val) { 600 removeProofErrors = new Boolean(val); 601 } 602 603 Map<String, Object> getSettings() { 604 Map<String, Object> settings = new java.util.HashMap<String, Object>(); 605 606 settings.put("removeProofErrors", removeProofErrors); 607 608 return settings; 609 } 610 611 612 } 613 600 614 }
Note: See TracChangeset
for help on using the changeset viewer.
