| 1 | /* |
|---|
| 2 | * Copyright 2007-2008, Plutext Pty Ltd. |
|---|
| 3 | * |
|---|
| 4 | * This file is part of docx4j. |
|---|
| 5 | |
|---|
| 6 | docx4j is licensed under the Apache License, Version 2.0 (the "License"); |
|---|
| 7 | you may not use this file except in compliance with the License. |
|---|
| 8 | |
|---|
| 9 | You may obtain a copy of the License at |
|---|
| 10 | |
|---|
| 11 | http://www.apache.org/licenses/LICENSE-2.0 |
|---|
| 12 | |
|---|
| 13 | Unless required by applicable law or agreed to in writing, software |
|---|
| 14 | distributed under the License is distributed on an "AS IS" BASIS, |
|---|
| 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|---|
| 16 | See the License for the specific language governing permissions and |
|---|
| 17 | limitations under the License. |
|---|
| 18 | |
|---|
| 19 | */ |
|---|
| 20 | package org.docx4j.openpackaging.parts; |
|---|
| 21 | |
|---|
| 22 | |
|---|
| 23 | import java.io.ByteArrayOutputStream; |
|---|
| 24 | |
|---|
| 25 | import javax.xml.bind.JAXBContext; |
|---|
| 26 | import javax.xml.bind.JAXBException; |
|---|
| 27 | import javax.xml.bind.Marshaller; |
|---|
| 28 | import javax.xml.bind.UnmarshalException; |
|---|
| 29 | import javax.xml.bind.Unmarshaller; |
|---|
| 30 | import javax.xml.bind.util.JAXBResult; |
|---|
| 31 | import javax.xml.transform.Templates; |
|---|
| 32 | import javax.xml.transform.stream.StreamSource; |
|---|
| 33 | |
|---|
| 34 | import org.apache.log4j.Logger; |
|---|
| 35 | import org.docx4j.XmlUtils; |
|---|
| 36 | import org.docx4j.jaxb.Context; |
|---|
| 37 | import org.docx4j.jaxb.JaxbValidationEventHandler; |
|---|
| 38 | import org.docx4j.jaxb.NamespacePrefixMapperUtils; |
|---|
| 39 | import org.docx4j.openpackaging.exceptions.Docx4JException; |
|---|
| 40 | import org.docx4j.openpackaging.exceptions.InvalidFormatException; |
|---|
| 41 | import org.docx4j.wml.Numbering; |
|---|
| 42 | |
|---|
| 43 | /** OPC Parts are either XML, or binary (or text) documents. |
|---|
| 44 | * |
|---|
| 45 | * Most are XML documents. |
|---|
| 46 | * |
|---|
| 47 | * docx4j aims to represent XML parts using JAXB. |
|---|
| 48 | * |
|---|
| 49 | * Any XML Part for which we have a JAXB representation (eg the main |
|---|
| 50 | * document part) should extend this Part. |
|---|
| 51 | * |
|---|
| 52 | * This class provides only one of the methods for serializing (marshalling) the |
|---|
| 53 | * Java content tree back into XML data found in |
|---|
| 54 | * javax.xml.bind.Marshaller interface. You can always use |
|---|
| 55 | * any of the others by getting the jaxbElement required by those |
|---|
| 56 | * methods. |
|---|
| 57 | * |
|---|
| 58 | * Insofar as unmarshalling is concerned, at present it doesn't |
|---|
| 59 | * contain all the methods in javax.xml.bind.unmarshaller interface. |
|---|
| 60 | * This is because the content always comes from the same place |
|---|
| 61 | * (ie from a zip file or JCR via org.docx4j.io.*). |
|---|
| 62 | * TODO - what is the best thing to unmarshall from? |
|---|
| 63 | * |
|---|
| 64 | * @param <E> type of the content tree object |
|---|
| 65 | * */ |
|---|
| 66 | public abstract class JaxbXmlPart<E> extends Part { |
|---|
| 67 | |
|---|
| 68 | protected static Logger log = Logger.getLogger(JaxbXmlPart.class); |
|---|
| 69 | |
|---|
| 70 | // This class is abstract |
|---|
| 71 | // Most applications ought to be able to instantiate |
|---|
| 72 | // any part as the relevant subclass. |
|---|
| 73 | // If it was not abstract, users would have to |
|---|
| 74 | // take care to set its content type and |
|---|
| 75 | // relationship type when adding the part. |
|---|
| 76 | |
|---|
| 77 | public JaxbXmlPart(PartName partName) throws InvalidFormatException { |
|---|
| 78 | super(partName); |
|---|
| 79 | setJAXBContext(Context.jc); |
|---|
| 80 | } |
|---|
| 81 | |
|---|
| 82 | public JaxbXmlPart(PartName partName, JAXBContext jc) throws InvalidFormatException { |
|---|
| 83 | super(partName); |
|---|
| 84 | setJAXBContext(jc); |
|---|
| 85 | } |
|---|
| 86 | |
|---|
| 87 | protected JAXBContext jc; |
|---|
| 88 | public void setJAXBContext(JAXBContext jc) { |
|---|
| 89 | this.jc = jc; |
|---|
| 90 | } |
|---|
| 91 | /** |
|---|
| 92 | * @since 2.7 |
|---|
| 93 | */ |
|---|
| 94 | public JAXBContext getJAXBContext() { |
|---|
| 95 | return jc; |
|---|
| 96 | } |
|---|
| 97 | |
|---|
| 98 | |
|---|
| 99 | /** The content tree (ie JAXB representation of the Part) */ |
|---|
| 100 | protected E jaxbElement = null; |
|---|
| 101 | |
|---|
| 102 | public E getJaxbElement() { |
|---|
| 103 | return jaxbElement; |
|---|
| 104 | } |
|---|
| 105 | |
|---|
| 106 | public void setJaxbElement(E jaxbElement) { |
|---|
| 107 | this.jaxbElement = jaxbElement; |
|---|
| 108 | } |
|---|
| 109 | |
|---|
| 110 | public void setJaxbElement(JAXBResult result) throws JAXBException { |
|---|
| 111 | |
|---|
| 112 | setJaxbElement((E)result.getResult()); |
|---|
| 113 | } |
|---|
| 114 | |
|---|
| 115 | |
|---|
| 116 | /** |
|---|
| 117 | * Marshal the content tree rooted at <tt>jaxbElement</tt> into a DOM tree. |
|---|
| 118 | * |
|---|
| 119 | * @param node |
|---|
| 120 | * DOM nodes will be added as children of this node. |
|---|
| 121 | * This parameter must be a Node that accepts children |
|---|
| 122 | * ({@link org.w3c.dom.Document}, |
|---|
| 123 | * {@link org.w3c.dom.DocumentFragment}, or |
|---|
| 124 | * {@link org.w3c.dom.Element}) |
|---|
| 125 | * |
|---|
| 126 | * @throws JAXBException |
|---|
| 127 | * If any unexpected problem occurs during the marshalling. |
|---|
| 128 | */ |
|---|
| 129 | public void marshal(org.w3c.dom.Node node) throws JAXBException { |
|---|
| 130 | |
|---|
| 131 | marshal(node, NamespacePrefixMapperUtils.getPrefixMapper() ); |
|---|
| 132 | |
|---|
| 133 | } |
|---|
| 134 | |
|---|
| 135 | /** |
|---|
| 136 | * Marshal the content tree rooted at <tt>jaxbElement</tt> into a DOM tree. |
|---|
| 137 | * |
|---|
| 138 | * @param node |
|---|
| 139 | * DOM nodes will be added as children of this node. |
|---|
| 140 | * This parameter must be a Node that accepts children |
|---|
| 141 | * ({@link org.w3c.dom.Document}, |
|---|
| 142 | * {@link org.w3c.dom.DocumentFragment}, or |
|---|
| 143 | * {@link org.w3c.dom.Element}) |
|---|
| 144 | * |
|---|
| 145 | * @throws JAXBException |
|---|
| 146 | * If any unexpected problem occurs during the marshalling. |
|---|
| 147 | */ |
|---|
| 148 | public void marshal(org.w3c.dom.Node node, |
|---|
| 149 | Object namespacePrefixMapper) throws JAXBException { |
|---|
| 150 | |
|---|
| 151 | try { |
|---|
| 152 | Marshaller marshaller = jc.createMarshaller(); |
|---|
| 153 | NamespacePrefixMapperUtils.setProperty(marshaller, namespacePrefixMapper); |
|---|
| 154 | marshaller.marshal(jaxbElement, node); |
|---|
| 155 | |
|---|
| 156 | } catch (JAXBException e) { |
|---|
| 157 | // e.printStackTrace(); |
|---|
| 158 | log.error(e); |
|---|
| 159 | throw e; |
|---|
| 160 | } |
|---|
| 161 | } |
|---|
| 162 | |
|---|
| 163 | /** |
|---|
| 164 | * Marshal the content tree rooted at <tt>jaxbElement</tt> into an output |
|---|
| 165 | * stream, using org.docx4j.jaxb.NamespacePrefixMapper. |
|---|
| 166 | * |
|---|
| 167 | * @param os |
|---|
| 168 | * XML will be added to this stream. |
|---|
| 169 | * |
|---|
| 170 | * @throws JAXBException |
|---|
| 171 | * If any unexpected problem occurs during the marshalling. |
|---|
| 172 | */ |
|---|
| 173 | public void marshal(java.io.OutputStream os) throws JAXBException { |
|---|
| 174 | |
|---|
| 175 | marshal(os, NamespacePrefixMapperUtils.getPrefixMapper() ); |
|---|
| 176 | } |
|---|
| 177 | |
|---|
| 178 | /** |
|---|
| 179 | * Marshal the content tree rooted at <tt>jaxbElement</tt> into an output |
|---|
| 180 | * stream |
|---|
| 181 | * |
|---|
| 182 | * @param os |
|---|
| 183 | * XML will be added to this stream. |
|---|
| 184 | * @param namespacePrefixMapper |
|---|
| 185 | * namespacePrefixMapper |
|---|
| 186 | * |
|---|
| 187 | * @throws JAXBException |
|---|
| 188 | * If any unexpected problem occurs during the marshalling. |
|---|
| 189 | */ |
|---|
| 190 | public void marshal(java.io.OutputStream os, Object namespacePrefixMapper) throws JAXBException { |
|---|
| 191 | |
|---|
| 192 | try { |
|---|
| 193 | Marshaller marshaller = jc.createMarshaller(); |
|---|
| 194 | NamespacePrefixMapperUtils.setProperty(marshaller, namespacePrefixMapper); |
|---|
| 195 | |
|---|
| 196 | log.info("marshalling " + this.getClass().getName() ); |
|---|
| 197 | marshaller.marshal(jaxbElement, os); |
|---|
| 198 | |
|---|
| 199 | } catch (JAXBException e) { |
|---|
| 200 | //e.printStackTrace(); |
|---|
| 201 | log.error(e); |
|---|
| 202 | throw e; |
|---|
| 203 | } |
|---|
| 204 | } |
|---|
| 205 | |
|---|
| 206 | /** |
|---|
| 207 | * Unmarshal XML data from the specified InputStream and return the |
|---|
| 208 | * resulting content tree. Validation event location information may be |
|---|
| 209 | * incomplete when using this form of the unmarshal API. |
|---|
| 210 | * |
|---|
| 211 | * <p> |
|---|
| 212 | * Implements <a href="#unmarshalGlobal">Unmarshal Global Root Element</a>. |
|---|
| 213 | * |
|---|
| 214 | * @param is |
|---|
| 215 | * the InputStream to unmarshal XML data from |
|---|
| 216 | * @return the newly created root object of the java content tree |
|---|
| 217 | * |
|---|
| 218 | * @throws JAXBException |
|---|
| 219 | * If any unexpected errors occur while unmarshalling |
|---|
| 220 | */ |
|---|
| 221 | public E unmarshal( java.io.InputStream is ) throws JAXBException { |
|---|
| 222 | |
|---|
| 223 | try { |
|---|
| 224 | |
|---|
| 225 | Unmarshaller u = jc.createUnmarshaller(); |
|---|
| 226 | |
|---|
| 227 | JaxbValidationEventHandler eventHandler = new JaxbValidationEventHandler(); |
|---|
| 228 | if (is.markSupported()) { |
|---|
| 229 | // Only fail hard if we know we can restart |
|---|
| 230 | eventHandler.setContinue(false); |
|---|
| 231 | } |
|---|
| 232 | u.setEventHandler(eventHandler); |
|---|
| 233 | |
|---|
| 234 | try { |
|---|
| 235 | jaxbElement = (E) XmlUtils.unwrap( |
|---|
| 236 | u.unmarshal( is )); |
|---|
| 237 | } catch (UnmarshalException ue) { |
|---|
| 238 | |
|---|
| 239 | if (is.markSupported() ) { |
|---|
| 240 | // When reading from zip, we use a ByteArrayInputStream, |
|---|
| 241 | // which does support this. |
|---|
| 242 | |
|---|
| 243 | log.info("encountered unexpected content; pre-processing"); |
|---|
| 244 | eventHandler.setContinue(true); |
|---|
| 245 | |
|---|
| 246 | try { |
|---|
| 247 | Templates mcPreprocessorXslt = JaxbValidationEventHandler.getMcPreprocessor(); |
|---|
| 248 | is.reset(); |
|---|
| 249 | JAXBResult result = XmlUtils.prepareJAXBResult(Context.jc); |
|---|
| 250 | XmlUtils.transform(new StreamSource(is), |
|---|
| 251 | mcPreprocessorXslt, null, result); |
|---|
| 252 | jaxbElement = (E) XmlUtils.unwrap( |
|---|
| 253 | result.getResult() ); |
|---|
| 254 | } catch (Exception e) { |
|---|
| 255 | throw new JAXBException("Preprocessing exception", e); |
|---|
| 256 | } |
|---|
| 257 | |
|---|
| 258 | } else { |
|---|
| 259 | log.error(ue); |
|---|
| 260 | log.error(".. and mark not supported"); |
|---|
| 261 | throw ue; |
|---|
| 262 | } |
|---|
| 263 | } |
|---|
| 264 | |
|---|
| 265 | |
|---|
| 266 | } catch (JAXBException e ) { |
|---|
| 267 | log.error(e); |
|---|
| 268 | throw e; |
|---|
| 269 | } |
|---|
| 270 | |
|---|
| 271 | return jaxbElement; |
|---|
| 272 | |
|---|
| 273 | } |
|---|
| 274 | |
|---|
| 275 | public E unmarshal(org.w3c.dom.Element el) throws JAXBException { |
|---|
| 276 | |
|---|
| 277 | try { |
|---|
| 278 | |
|---|
| 279 | Unmarshaller u = jc.createUnmarshaller(); |
|---|
| 280 | JaxbValidationEventHandler eventHandler = new JaxbValidationEventHandler(); |
|---|
| 281 | eventHandler.setContinue(false); |
|---|
| 282 | u.setEventHandler(eventHandler); |
|---|
| 283 | |
|---|
| 284 | try { |
|---|
| 285 | jaxbElement = (E) XmlUtils.unwrap( |
|---|
| 286 | u.unmarshal( el ) ); |
|---|
| 287 | } catch (UnmarshalException ue) { |
|---|
| 288 | log.info("encountered unexpected content; pre-processing"); |
|---|
| 289 | try { |
|---|
| 290 | org.w3c.dom.Document doc; |
|---|
| 291 | if (el instanceof org.w3c.dom.Document) { |
|---|
| 292 | doc = (org.w3c.dom.Document) el; |
|---|
| 293 | } else { |
|---|
| 294 | // Hope for the best. Dodgy though; what if this is |
|---|
| 295 | // being used on something deep in the tree? |
|---|
| 296 | // TODO: revisit |
|---|
| 297 | doc = el.getOwnerDocument(); |
|---|
| 298 | } |
|---|
| 299 | eventHandler.setContinue(true); |
|---|
| 300 | JAXBResult result = XmlUtils.prepareJAXBResult(Context.jc); |
|---|
| 301 | Templates mcPreprocessorXslt = JaxbValidationEventHandler |
|---|
| 302 | .getMcPreprocessor(); |
|---|
| 303 | XmlUtils.transform(doc, mcPreprocessorXslt, null, result); |
|---|
| 304 | jaxbElement = (E) XmlUtils.unwrap( |
|---|
| 305 | result.getResult() ); |
|---|
| 306 | } catch (Exception e) { |
|---|
| 307 | throw new JAXBException("Preprocessing exception", e); |
|---|
| 308 | } |
|---|
| 309 | } |
|---|
| 310 | return jaxbElement; |
|---|
| 311 | |
|---|
| 312 | } catch (JAXBException e) { |
|---|
| 313 | log.error(e); |
|---|
| 314 | throw e; |
|---|
| 315 | } |
|---|
| 316 | } |
|---|
| 317 | |
|---|
| 318 | |
|---|
| 319 | public boolean isContentEqual(Part other) throws Docx4JException { |
|---|
| 320 | |
|---|
| 321 | log.debug("Comparing " + getPartName().getName() + " : " + other.getPartName().getName() ); |
|---|
| 322 | |
|---|
| 323 | if (!(other instanceof JaxbXmlPart)) { |
|---|
| 324 | log.debug(other.getPartName().getName() + " is not a JaxbXmlPart"); |
|---|
| 325 | return false; |
|---|
| 326 | } |
|---|
| 327 | |
|---|
| 328 | /* Implementation notes: Either we implement |
|---|
| 329 | * a notion of equality for all content trees |
|---|
| 330 | * (ie wml, dml etc), or we marshal to something |
|---|
| 331 | * and compare that. |
|---|
| 332 | * |
|---|
| 333 | * Let's take the marshal approach. |
|---|
| 334 | * |
|---|
| 335 | * Question becomes, what is it most efficient |
|---|
| 336 | * to marshal to? |
|---|
| 337 | * |
|---|
| 338 | * Looking at JAXB, probably SAX or a stream. |
|---|
| 339 | * |
|---|
| 340 | * Then we want an equality test for one of those, |
|---|
| 341 | * which returns as soon as inequality is established. |
|---|
| 342 | * |
|---|
| 343 | * diffx contains a method boolean equivalent(InputStream xml1, InputStream xml2) |
|---|
| 344 | * which will do what we want. |
|---|
| 345 | * |
|---|
| 346 | * We marshal to an output stream, then need a |
|---|
| 347 | * way to get an input stream from that. |
|---|
| 348 | * |
|---|
| 349 | * Since for now I'm just going to use a byte array for that |
|---|
| 350 | * (though pipes would be more efficient and possibly worth it for large |
|---|
| 351 | * MainDocumentPart - calling code could just assume that part is different |
|---|
| 352 | * though?), |
|---|
| 353 | * I'll just test the equality of the byte arrays |
|---|
| 354 | * and be done with it. |
|---|
| 355 | */ |
|---|
| 356 | ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
|---|
| 357 | ByteArrayOutputStream baos2 = new ByteArrayOutputStream(); |
|---|
| 358 | try { |
|---|
| 359 | marshal(baos); |
|---|
| 360 | ((JaxbXmlPart)other).marshal(baos2); |
|---|
| 361 | } catch (JAXBException e) { |
|---|
| 362 | throw new Docx4JException("Error marshalling parts", e); |
|---|
| 363 | } |
|---|
| 364 | |
|---|
| 365 | return java.util.Arrays.equals(baos.toByteArray(), baos2.toByteArray()); |
|---|
| 366 | |
|---|
| 367 | } |
|---|
| 368 | |
|---|
| 369 | } |
|---|