| 1 | /* |
|---|
| 2 | * Copyright 2007-2008, Plutext Pty Ltd. |
|---|
| 3 | * |
|---|
| 4 | * This file is part of docx4j. |
|---|
| 5 | |
|---|
| 6 | docx4j is licensed under the Apache License, Version 2.0 (the "License"); |
|---|
| 7 | you may not use this file except in compliance with the License. |
|---|
| 8 | |
|---|
| 9 | You may obtain a copy of the License at |
|---|
| 10 | |
|---|
| 11 | http://www.apache.org/licenses/LICENSE-2.0 |
|---|
| 12 | |
|---|
| 13 | Unless required by applicable law or agreed to in writing, software |
|---|
| 14 | distributed under the License is distributed on an "AS IS" BASIS, |
|---|
| 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|---|
| 16 | See the License for the specific language governing permissions and |
|---|
| 17 | limitations under the License. |
|---|
| 18 | |
|---|
| 19 | */ |
|---|
| 20 | |
|---|
| 21 | package org.docx4j.openpackaging.io; |
|---|
| 22 | |
|---|
| 23 | import java.io.InputStream; |
|---|
| 24 | import java.net.URISyntaxException; |
|---|
| 25 | import java.util.Collection; |
|---|
| 26 | import java.util.HashMap; |
|---|
| 27 | import java.util.Iterator; |
|---|
| 28 | |
|---|
| 29 | import javax.xml.bind.JAXBElement; |
|---|
| 30 | import javax.xml.bind.Unmarshaller; |
|---|
| 31 | |
|---|
| 32 | import org.apache.log4j.Logger; |
|---|
| 33 | import org.docx4j.XmlUtils; |
|---|
| 34 | import org.docx4j.jaxb.Context; |
|---|
| 35 | import org.docx4j.model.datastorage.BindingHandler; |
|---|
| 36 | import org.docx4j.model.datastorage.CustomXmlDataStorage; |
|---|
| 37 | import org.docx4j.model.datastorage.CustomXmlDataStorageImpl; |
|---|
| 38 | import org.docx4j.openpackaging.contenttype.ContentTypeManager; |
|---|
| 39 | import org.docx4j.openpackaging.exceptions.Docx4JException; |
|---|
| 40 | import org.docx4j.openpackaging.exceptions.InvalidFormatException; |
|---|
| 41 | import org.docx4j.openpackaging.exceptions.PartUnrecognisedException; |
|---|
| 42 | import org.docx4j.openpackaging.packages.OpcPackage; |
|---|
| 43 | import org.docx4j.openpackaging.parts.CustomXmlDataStoragePart; |
|---|
| 44 | import org.docx4j.openpackaging.parts.CustomXmlDataStoragePropertiesPart; |
|---|
| 45 | import org.docx4j.openpackaging.parts.Part; |
|---|
| 46 | import org.docx4j.openpackaging.parts.PartName; |
|---|
| 47 | import org.docx4j.openpackaging.parts.WordprocessingML.BibliographyPart; |
|---|
| 48 | import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPart; |
|---|
| 49 | import org.docx4j.openpackaging.parts.opendope.ComponentsPart; |
|---|
| 50 | import org.docx4j.openpackaging.parts.opendope.ConditionsPart; |
|---|
| 51 | import org.docx4j.openpackaging.parts.opendope.QuestionsPart; |
|---|
| 52 | import org.docx4j.openpackaging.parts.opendope.XPathsPart; |
|---|
| 53 | import org.docx4j.openpackaging.parts.relationships.Namespaces; |
|---|
| 54 | import org.docx4j.relationships.Relationship; |
|---|
| 55 | |
|---|
| 56 | |
|---|
| 57 | public class Load { |
|---|
| 58 | |
|---|
| 59 | private static Logger log = Logger.getLogger(Load.class); |
|---|
| 60 | |
|---|
| 61 | |
|---|
| 62 | |
|---|
| 63 | public Load() { |
|---|
| 64 | super(); |
|---|
| 65 | } |
|---|
| 66 | |
|---|
| 67 | //public ContentTypeManager ctm; |
|---|
| 68 | |
|---|
| 69 | protected boolean loadExternalTargets = false; |
|---|
| 70 | public void loadExternalTargets(boolean loadExternalTargets) { |
|---|
| 71 | this.loadExternalTargets = loadExternalTargets; |
|---|
| 72 | } |
|---|
| 73 | |
|---|
| 74 | |
|---|
| 75 | /** |
|---|
| 76 | * TODO. I'd prefer this not to be static, but it needs to be, |
|---|
| 77 | * given that getRawPart is. Maybe its not such a big deal, |
|---|
| 78 | * because its reasonable to assume that most people using docx4j |
|---|
| 79 | * will standardise on a single implementation of CustomXmlDataStorage? |
|---|
| 80 | */ |
|---|
| 81 | static protected CustomXmlDataStorage customXmlDataStorageClass = null; |
|---|
| 82 | /** |
|---|
| 83 | * Set your preferred implementation of the CustomXmlDataStorage |
|---|
| 84 | * interface. Its factory method will be used to create new instances. |
|---|
| 85 | * |
|---|
| 86 | * @param customXmlDataStorageClass the customXmlDataStorageClass to set |
|---|
| 87 | */ |
|---|
| 88 | static public void setCustomXmlDataStorageClass( |
|---|
| 89 | CustomXmlDataStorage customXmlDataStorageClassVal) { |
|---|
| 90 | customXmlDataStorageClass = customXmlDataStorageClassVal; |
|---|
| 91 | } |
|---|
| 92 | |
|---|
| 93 | /** |
|---|
| 94 | * @return the customXmlDataStorageClass |
|---|
| 95 | */ |
|---|
| 96 | static public CustomXmlDataStorage getCustomXmlDataStorageClass() { |
|---|
| 97 | try { |
|---|
| 98 | if (customXmlDataStorageClass==null) { |
|---|
| 99 | customXmlDataStorageClass = new CustomXmlDataStorageImpl(); |
|---|
| 100 | } |
|---|
| 101 | return customXmlDataStorageClass; |
|---|
| 102 | } catch (InvalidFormatException e) { |
|---|
| 103 | // TODO Auto-generated catch block |
|---|
| 104 | e.printStackTrace(); |
|---|
| 105 | return null; |
|---|
| 106 | } |
|---|
| 107 | } |
|---|
| 108 | |
|---|
| 109 | |
|---|
| 110 | /** |
|---|
| 111 | * Get a Part (except a relationships part), but not its relationships part |
|---|
| 112 | * or related parts. Useful if you need quick access to just this part, |
|---|
| 113 | * or if you wish to add a foreign part (ie a part from some other package). |
|---|
| 114 | * This can be called directly from outside the library, in which case |
|---|
| 115 | * the Part will not be owned by a Package until the calling code makes it so. |
|---|
| 116 | * @see To get a Part and all its related parts, and add all to a package, use |
|---|
| 117 | * getPart. |
|---|
| 118 | * @param is |
|---|
| 119 | * @param ctm the ContentTypeManager associated with the foreign package |
|---|
| 120 | * @param resolvedPartUri the part name |
|---|
| 121 | * @return |
|---|
| 122 | * |
|---|
| 123 | * @throws URISyntaxException |
|---|
| 124 | * @throws InvalidFormatException |
|---|
| 125 | */ |
|---|
| 126 | public static Part getRawPart(InputStream is, ContentTypeManager ctm, String resolvedPartUri, Relationship rel) |
|---|
| 127 | throws Docx4JException { |
|---|
| 128 | |
|---|
| 129 | Part part = null; |
|---|
| 130 | |
|---|
| 131 | try { |
|---|
| 132 | |
|---|
| 133 | try { |
|---|
| 134 | |
|---|
| 135 | part = ctm.getPart("/" + resolvedPartUri, rel); |
|---|
| 136 | |
|---|
| 137 | |
|---|
| 138 | if (part instanceof org.docx4j.openpackaging.parts.ThemePart) { |
|---|
| 139 | |
|---|
| 140 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcThemePart); |
|---|
| 141 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 142 | |
|---|
| 143 | } else if (part instanceof org.docx4j.openpackaging.parts.DocPropsCorePart ) { |
|---|
| 144 | |
|---|
| 145 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcDocPropsCore); |
|---|
| 146 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 147 | |
|---|
| 148 | } else if (part instanceof org.docx4j.openpackaging.parts.DocPropsCustomPart ) { |
|---|
| 149 | |
|---|
| 150 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcDocPropsCustom); |
|---|
| 151 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 152 | |
|---|
| 153 | } else if (part instanceof org.docx4j.openpackaging.parts.DocPropsExtendedPart ) { |
|---|
| 154 | |
|---|
| 155 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcDocPropsExtended); |
|---|
| 156 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 157 | |
|---|
| 158 | } else if (part instanceof org.docx4j.openpackaging.parts.CustomXmlDataStoragePropertiesPart ) { |
|---|
| 159 | |
|---|
| 160 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcCustomXmlProperties); |
|---|
| 161 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 162 | |
|---|
| 163 | } else if (part instanceof org.docx4j.openpackaging.parts.JaxbXmlPart) { |
|---|
| 164 | |
|---|
| 165 | // MainDocument part, Styles part, Font part etc |
|---|
| 166 | |
|---|
| 167 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jc); |
|---|
| 168 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 169 | |
|---|
| 170 | } else if (part instanceof org.docx4j.openpackaging.parts.WordprocessingML.BinaryPart) { |
|---|
| 171 | |
|---|
| 172 | log.debug("Detected BinaryPart " + part.getClass().getName() ); |
|---|
| 173 | ((BinaryPart)part).setBinaryData(is); |
|---|
| 174 | |
|---|
| 175 | } else if (part instanceof org.docx4j.openpackaging.parts.CustomXmlDataStoragePart ) { |
|---|
| 176 | |
|---|
| 177 | // Is it a part we know? |
|---|
| 178 | try { |
|---|
| 179 | Unmarshaller u = Context.jc.createUnmarshaller(); |
|---|
| 180 | Object o = u.unmarshal( is ); |
|---|
| 181 | log.debug(o.getClass().getName()); |
|---|
| 182 | |
|---|
| 183 | PartName name = part.getPartName(); |
|---|
| 184 | |
|---|
| 185 | if (o instanceof org.opendope.conditions.Conditions) { |
|---|
| 186 | |
|---|
| 187 | part = new ConditionsPart(name); |
|---|
| 188 | ((ConditionsPart)part).setJaxbElement( |
|---|
| 189 | (org.opendope.conditions.Conditions)o); |
|---|
| 190 | |
|---|
| 191 | } else if (o instanceof org.opendope.xpaths.Xpaths) { |
|---|
| 192 | |
|---|
| 193 | part = new XPathsPart(name); |
|---|
| 194 | ((XPathsPart)part).setJaxbElement( |
|---|
| 195 | (org.opendope.xpaths.Xpaths)o); |
|---|
| 196 | |
|---|
| 197 | } else if (o instanceof org.opendope.questions.Questionnaire) { |
|---|
| 198 | |
|---|
| 199 | part = new QuestionsPart(name); |
|---|
| 200 | ((QuestionsPart)part).setJaxbElement( |
|---|
| 201 | (org.opendope.questions.Questionnaire)o); |
|---|
| 202 | |
|---|
| 203 | } else if (o instanceof org.opendope.components.Components) { |
|---|
| 204 | |
|---|
| 205 | part = new ComponentsPart(name); |
|---|
| 206 | ((ComponentsPart)part).setJaxbElement( |
|---|
| 207 | (org.opendope.components.Components)o); |
|---|
| 208 | |
|---|
| 209 | } else if (o instanceof JAXBElement<?> |
|---|
| 210 | && XmlUtils.unwrap(o) instanceof org.docx4j.bibliography.CTSources) { |
|---|
| 211 | part = new BibliographyPart(name); |
|---|
| 212 | ((BibliographyPart) part) |
|---|
| 213 | .setJaxbElement((JAXBElement<org.docx4j.bibliography.CTSources>)o); |
|---|
| 214 | |
|---|
| 215 | } else { |
|---|
| 216 | |
|---|
| 217 | log.warn("No known part after all for CustomXmlPart " + o.getClass().getName()); |
|---|
| 218 | |
|---|
| 219 | CustomXmlDataStorage data = getCustomXmlDataStorageClass().factory(); |
|---|
| 220 | is.reset(); |
|---|
| 221 | data.setDocument(is); // Not necessarily JAXB, that's just our method name |
|---|
| 222 | ((org.docx4j.openpackaging.parts.CustomXmlDataStoragePart)part).setData(data); |
|---|
| 223 | |
|---|
| 224 | } |
|---|
| 225 | |
|---|
| 226 | } catch (javax.xml.bind.UnmarshalException ue) { |
|---|
| 227 | |
|---|
| 228 | // No ... |
|---|
| 229 | CustomXmlDataStorage data = getCustomXmlDataStorageClass().factory(); |
|---|
| 230 | is.reset(); |
|---|
| 231 | data.setDocument(is); // Not necessarily JAXB, that's just our method name |
|---|
| 232 | ((org.docx4j.openpackaging.parts.CustomXmlDataStoragePart)part).setData(data); |
|---|
| 233 | } |
|---|
| 234 | |
|---|
| 235 | } else { |
|---|
| 236 | // Shouldn't happen, since ContentTypeManagerImpl should |
|---|
| 237 | // return an instance of one of the above, or throw an |
|---|
| 238 | // Exception. |
|---|
| 239 | |
|---|
| 240 | log.error("No suitable part found for: " + resolvedPartUri); |
|---|
| 241 | return null; |
|---|
| 242 | } |
|---|
| 243 | } catch (PartUnrecognisedException e) { |
|---|
| 244 | |
|---|
| 245 | // Try to get it as a binary part |
|---|
| 246 | part = new BinaryPart(new PartName("/" + resolvedPartUri)); |
|---|
| 247 | ((BinaryPart) part).setBinaryData(is); |
|---|
| 248 | |
|---|
| 249 | } |
|---|
| 250 | } catch (Exception ex) { |
|---|
| 251 | // PathNotFoundException, ValueFormatException, RepositoryException, URISyntaxException |
|---|
| 252 | ex.printStackTrace(); |
|---|
| 253 | throw new Docx4JException("Failed to getPart", ex); |
|---|
| 254 | } |
|---|
| 255 | return part; |
|---|
| 256 | } |
|---|
| 257 | |
|---|
| 258 | |
|---|
| 259 | /** |
|---|
| 260 | * Find any /customXml/itemN.xml which have a props part |
|---|
| 261 | * which specifies a data store item ID. |
|---|
| 262 | * |
|---|
| 263 | * Register such parts. |
|---|
| 264 | * |
|---|
| 265 | * @param p |
|---|
| 266 | */ |
|---|
| 267 | public static void registerCustomXmlDataStorageParts(OpcPackage pkg) { |
|---|
| 268 | |
|---|
| 269 | HashMap<PartName, Part> parts = pkg.getParts().getParts(); |
|---|
| 270 | |
|---|
| 271 | // Strictly speaking, we're only interested in CustomXmlDataStorageParts |
|---|
| 272 | // which are referred to in document.xml.rels ? |
|---|
| 273 | // But it doesn't do much harm to register a CustomXmlDataStoragePart |
|---|
| 274 | // which has a data store item ID, even if it isn't in document.xml.rels |
|---|
| 275 | |
|---|
| 276 | //Iterator iterator = parts.entrySet().iterator(); |
|---|
| 277 | Collection col = parts.values(); |
|---|
| 278 | Iterator iterator = col.iterator(); |
|---|
| 279 | while( iterator.hasNext() ) { |
|---|
| 280 | Part entry = (Part)iterator.next(); |
|---|
| 281 | |
|---|
| 282 | if (entry instanceof org.docx4j.openpackaging.parts.CustomXmlDataStoragePart) { |
|---|
| 283 | log.debug("Found a CustomXmlDataStoragePart, named " + entry.getPartName().getName() ); |
|---|
| 284 | String itemId = null; |
|---|
| 285 | if (entry.getRelationshipsPart()==null) { |
|---|
| 286 | continue; |
|---|
| 287 | } else { |
|---|
| 288 | log.debug(".. it has a rels part"); |
|---|
| 289 | // Look in its rels for rel of @Type customXmlProps (eg @Target="itemProps1.xml") |
|---|
| 290 | Relationship r = entry.getRelationshipsPart().getRelationshipByType( |
|---|
| 291 | Namespaces.CUSTOM_XML_DATA_STORAGE_PROPERTIES); |
|---|
| 292 | if (r==null) { |
|---|
| 293 | log.debug(".. but that doesn't point to a customXmlProps part"); |
|---|
| 294 | continue; |
|---|
| 295 | } |
|---|
| 296 | CustomXmlDataStoragePropertiesPart customXmlProps = |
|---|
| 297 | (CustomXmlDataStoragePropertiesPart)entry.getRelationshipsPart().getPart(r); |
|---|
| 298 | if (customXmlProps==null) { |
|---|
| 299 | log.error(".. but the target seems to be missing?"); |
|---|
| 300 | |
|---|
| 301 | try { |
|---|
| 302 | org.w3c.dom.Document document = ((CustomXmlDataStoragePart)entry).getData().getDocument(); |
|---|
| 303 | String localName = document.getDocumentElement().getLocalName(); |
|---|
| 304 | log.debug(localName); |
|---|
| 305 | if (document.getDocumentElement().isDefaultNamespace("http://schemas.microsoft.com/?office/?2006/?coverPageProps") |
|---|
| 306 | || localName.equals("CoverPageProperties" ) ) { |
|---|
| 307 | // Special case: CoverPageProperties |
|---|
| 308 | // See "Office Well Defined Custom XML Parts"; see documentinteropinitiative.org/additionalinfo/IS29500/sect5.aspx |
|---|
| 309 | // Has a rels part, but sometimes no target? Sometimes it definitely does ... |
|---|
| 310 | // Give it the store item id, Word 2007 seems to consistently allocate |
|---|
| 311 | itemId = BindingHandler.COVERPAGE_PROPERTIES_STOREITEMID.toLowerCase(); |
|---|
| 312 | } else { |
|---|
| 313 | continue; |
|---|
| 314 | } |
|---|
| 315 | } catch (Docx4JException e) { |
|---|
| 316 | e.printStackTrace(); |
|---|
| 317 | continue; |
|---|
| 318 | } |
|---|
| 319 | } else { |
|---|
| 320 | itemId = customXmlProps.getItemId().toLowerCase(); |
|---|
| 321 | } |
|---|
| 322 | } |
|---|
| 323 | log.info("Identified/registered ds:itemId " + itemId); |
|---|
| 324 | if (pkg.getCustomXmlDataStorageParts().get(itemId.toLowerCase())!=null) { |
|---|
| 325 | log.warn("Duplicate CustomXML itemId " + itemId + "; check your source docx!"); |
|---|
| 326 | } |
|---|
| 327 | pkg.getCustomXmlDataStorageParts().put(itemId, |
|---|
| 328 | (org.docx4j.openpackaging.parts.CustomXmlDataStoragePart)entry ); |
|---|
| 329 | } |
|---|
| 330 | } |
|---|
| 331 | |
|---|
| 332 | |
|---|
| 333 | |
|---|
| 334 | |
|---|
| 335 | } |
|---|
| 336 | |
|---|
| 337 | } |
|---|