| 1 | /* |
|---|
| 2 | * Copyright 2007-2009, Plutext Pty Ltd. |
|---|
| 3 | * |
|---|
| 4 | * This file is part of docx4j. |
|---|
| 5 | |
|---|
| 6 | docx4j is licensed under the Apache License, Version 2.0 (the "License"); |
|---|
| 7 | you may not use this file except in compliance with the License. |
|---|
| 8 | |
|---|
| 9 | You may obtain a copy of the License at |
|---|
| 10 | |
|---|
| 11 | http://www.apache.org/licenses/LICENSE-2.0 |
|---|
| 12 | |
|---|
| 13 | Unless required by applicable law or agreed to in writing, software |
|---|
| 14 | distributed under the License is distributed on an "AS IS" BASIS, |
|---|
| 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|---|
| 16 | See the License for the specific language governing permissions and |
|---|
| 17 | limitations under the License. |
|---|
| 18 | |
|---|
| 19 | */ |
|---|
| 20 | |
|---|
| 21 | package org.docx4j.openpackaging.io; |
|---|
| 22 | |
|---|
| 23 | |
|---|
| 24 | |
|---|
| 25 | import java.io.BufferedInputStream; |
|---|
| 26 | import java.io.BufferedOutputStream; |
|---|
| 27 | import java.io.ByteArrayInputStream; |
|---|
| 28 | import java.io.ByteArrayOutputStream; |
|---|
| 29 | import java.io.File; |
|---|
| 30 | import java.io.IOException; |
|---|
| 31 | import java.io.InputStream; |
|---|
| 32 | import java.io.Serializable; |
|---|
| 33 | import java.net.URI; |
|---|
| 34 | import java.net.URISyntaxException; |
|---|
| 35 | import java.nio.ByteBuffer; |
|---|
| 36 | import java.util.Enumeration; |
|---|
| 37 | import java.util.HashMap; |
|---|
| 38 | import java.util.Iterator; |
|---|
| 39 | import java.util.zip.ZipEntry; |
|---|
| 40 | import java.util.zip.ZipFile; |
|---|
| 41 | import java.util.zip.ZipInputStream; |
|---|
| 42 | |
|---|
| 43 | import javax.xml.bind.JAXBElement; |
|---|
| 44 | import javax.xml.bind.Unmarshaller; |
|---|
| 45 | |
|---|
| 46 | import org.apache.log4j.Logger; |
|---|
| 47 | import org.docx4j.XmlUtils; |
|---|
| 48 | import org.docx4j.jaxb.Context; |
|---|
| 49 | import org.docx4j.model.datastorage.CustomXmlDataStorage; |
|---|
| 50 | import org.docx4j.openpackaging.Base; |
|---|
| 51 | import org.docx4j.openpackaging.URIHelper; |
|---|
| 52 | import org.docx4j.openpackaging.contenttype.ContentType; |
|---|
| 53 | import org.docx4j.openpackaging.contenttype.ContentTypeManager; |
|---|
| 54 | import org.docx4j.openpackaging.exceptions.Docx4JException; |
|---|
| 55 | import org.docx4j.openpackaging.exceptions.InvalidFormatException; |
|---|
| 56 | import org.docx4j.openpackaging.exceptions.PartUnrecognisedException; |
|---|
| 57 | import org.docx4j.openpackaging.packages.OpcPackage; |
|---|
| 58 | import org.docx4j.openpackaging.parts.DefaultXmlPart; |
|---|
| 59 | import org.docx4j.openpackaging.parts.Part; |
|---|
| 60 | import org.docx4j.openpackaging.parts.PartName; |
|---|
| 61 | import org.docx4j.openpackaging.parts.XmlPart; |
|---|
| 62 | import org.docx4j.openpackaging.parts.WordprocessingML.BibliographyPart; |
|---|
| 63 | import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPart; |
|---|
| 64 | import org.docx4j.openpackaging.parts.opendope.ComponentsPart; |
|---|
| 65 | import org.docx4j.openpackaging.parts.opendope.ConditionsPart; |
|---|
| 66 | import org.docx4j.openpackaging.parts.opendope.QuestionsPart; |
|---|
| 67 | import org.docx4j.openpackaging.parts.opendope.XPathsPart; |
|---|
| 68 | import org.docx4j.openpackaging.parts.relationships.Namespaces; |
|---|
| 69 | import org.docx4j.openpackaging.parts.relationships.RelationshipsPart; |
|---|
| 70 | import org.docx4j.relationships.Relationships; |
|---|
| 71 | import org.docx4j.relationships.Relationship; |
|---|
| 72 | |
|---|
| 73 | |
|---|
| 74 | /** |
|---|
| 75 | * Create a Package object from a Zip file or input stream. |
|---|
| 76 | * |
|---|
| 77 | * This class is a refactoring of LoadFromZipFile, which |
|---|
| 78 | * couldn't read from an input stream |
|---|
| 79 | * |
|---|
| 80 | * @author jharrop |
|---|
| 81 | * |
|---|
| 82 | */ |
|---|
| 83 | public class LoadFromZipNG extends Load { |
|---|
| 84 | |
|---|
| 85 | //public HashMap<String, ByteArray> partByteArrays = new HashMap<String, ByteArray>(); |
|---|
| 86 | |
|---|
| 87 | private static Logger log = Logger.getLogger(LoadFromZipNG.class); |
|---|
| 88 | |
|---|
| 89 | // Testing |
|---|
| 90 | public static void main(String[] args) throws Exception { |
|---|
| 91 | String filepath = System.getProperty("user.dir") + "/sample-docs/FontEmbedded.docx"; |
|---|
| 92 | log.info("Path: " + filepath ); |
|---|
| 93 | LoadFromZipNG loader = new LoadFromZipNG(); |
|---|
| 94 | loader.get(filepath); |
|---|
| 95 | } |
|---|
| 96 | |
|---|
| 97 | // HashMap containing the names of all the zip entries, |
|---|
| 98 | // so we can tell whether there are any orphans |
|---|
| 99 | // public HashMap unusedZipEntries = new HashMap(); |
|---|
| 100 | |
|---|
| 101 | |
|---|
| 102 | public LoadFromZipNG() { |
|---|
| 103 | // this(new ContentTypeManager() ); |
|---|
| 104 | } |
|---|
| 105 | |
|---|
| 106 | // public LoadFromZipNG(ContentTypeManager ctm) { |
|---|
| 107 | // this.ctm = ctm; |
|---|
| 108 | // } |
|---|
| 109 | |
|---|
| 110 | |
|---|
| 111 | public OpcPackage get(String filepath) throws Docx4JException { |
|---|
| 112 | return get(new File(filepath)); |
|---|
| 113 | } |
|---|
| 114 | |
|---|
| 115 | public static byte[] getBytesFromInputStream(InputStream is) |
|---|
| 116 | throws Exception { |
|---|
| 117 | |
|---|
| 118 | BufferedInputStream bufIn = new BufferedInputStream(is); |
|---|
| 119 | ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
|---|
| 120 | BufferedOutputStream bos = new BufferedOutputStream(baos); |
|---|
| 121 | int c = bufIn.read(); |
|---|
| 122 | while (c != -1) { |
|---|
| 123 | bos.write(c); |
|---|
| 124 | c = bufIn.read(); |
|---|
| 125 | } |
|---|
| 126 | bos.flush(); |
|---|
| 127 | baos.flush(); |
|---|
| 128 | //bufIn.close(); //don't do that, since it closes the ZipInputStream after we've read an entry! |
|---|
| 129 | bos.close(); |
|---|
| 130 | return baos.toByteArray(); |
|---|
| 131 | } |
|---|
| 132 | |
|---|
| 133 | public OpcPackage get(File f) throws Docx4JException { |
|---|
| 134 | log.info("Filepath = " + f.getPath() ); |
|---|
| 135 | |
|---|
| 136 | ZipFile zf = null; |
|---|
| 137 | try { |
|---|
| 138 | if (!f.exists()) { |
|---|
| 139 | log.info( "Couldn't find " + f.getPath() ); |
|---|
| 140 | } |
|---|
| 141 | zf = new ZipFile(f); |
|---|
| 142 | } catch (IOException ioe) { |
|---|
| 143 | ioe.printStackTrace() ; |
|---|
| 144 | throw new Docx4JException("Couldn't get ZipFile", ioe); |
|---|
| 145 | } |
|---|
| 146 | |
|---|
| 147 | HashMap<String, ByteArray> partByteArrays = new HashMap<String, ByteArray>(); |
|---|
| 148 | Enumeration entries = zf.entries(); |
|---|
| 149 | while (entries.hasMoreElements()) { |
|---|
| 150 | ZipEntry entry = (ZipEntry) entries.nextElement(); |
|---|
| 151 | log.info( "\n\n" + entry.getName() + "\n" ); |
|---|
| 152 | InputStream in = null; |
|---|
| 153 | try { |
|---|
| 154 | byte[] bytes = getBytesFromInputStream( zf.getInputStream(entry) ); |
|---|
| 155 | partByteArrays.put(entry.getName(), new ByteArray(bytes) ); |
|---|
| 156 | } catch (Exception e) { |
|---|
| 157 | e.printStackTrace() ; |
|---|
| 158 | } |
|---|
| 159 | } |
|---|
| 160 | // At this point, we've finished with the zip file |
|---|
| 161 | try { |
|---|
| 162 | zf.close(); |
|---|
| 163 | } catch (IOException exc) { |
|---|
| 164 | exc.printStackTrace(); |
|---|
| 165 | } |
|---|
| 166 | |
|---|
| 167 | |
|---|
| 168 | return process(partByteArrays); |
|---|
| 169 | } |
|---|
| 170 | |
|---|
| 171 | public OpcPackage get(InputStream is) throws Docx4JException { |
|---|
| 172 | |
|---|
| 173 | HashMap<String, ByteArray> partByteArrays = new HashMap<String, ByteArray>(); |
|---|
| 174 | try { |
|---|
| 175 | ZipInputStream zis = new ZipInputStream(is); |
|---|
| 176 | ZipEntry entry = null; |
|---|
| 177 | while ((entry = zis.getNextEntry()) != null) { |
|---|
| 178 | byte[] bytes = getBytesFromInputStream( zis ); |
|---|
| 179 | //log.debug("Extracting " + entry.getName()); |
|---|
| 180 | partByteArrays.put(entry.getName(), new ByteArray(bytes) ); |
|---|
| 181 | } |
|---|
| 182 | zis.close(); |
|---|
| 183 | } catch (Exception e) { |
|---|
| 184 | log.error(e.getMessage()); |
|---|
| 185 | throw new Docx4JException("Error processing zip file (is it a zip file?)", e); |
|---|
| 186 | } |
|---|
| 187 | |
|---|
| 188 | // At this point, we're finished with the zip input stream |
|---|
| 189 | // TODO, so many of the below methods could be renamed. |
|---|
| 190 | // If performance is ok, LoadFromJCR could be refactored to |
|---|
| 191 | // work the same way |
|---|
| 192 | |
|---|
| 193 | return process(partByteArrays); |
|---|
| 194 | } |
|---|
| 195 | |
|---|
| 196 | private OpcPackage process(HashMap<String, ByteArray> partByteArrays) throws Docx4JException { |
|---|
| 197 | |
|---|
| 198 | // 2. Create a new Package |
|---|
| 199 | // Eventually, you'll also be able to create an Excel package etc |
|---|
| 200 | // but only the WordML package exists at present |
|---|
| 201 | |
|---|
| 202 | ContentTypeManager ctm = new ContentTypeManager(); |
|---|
| 203 | |
|---|
| 204 | try { |
|---|
| 205 | InputStream is = getInputStreamFromZippedPart( partByteArrays, "[Content_Types].xml"); |
|---|
| 206 | ctm.parseContentTypesFile(is); |
|---|
| 207 | } catch (IOException e) { |
|---|
| 208 | throw new Docx4JException("Couldn't get [Content_Types].xml from ZipFile", e); |
|---|
| 209 | } |
|---|
| 210 | |
|---|
| 211 | OpcPackage p = ctm.createPackage(); |
|---|
| 212 | |
|---|
| 213 | // 3. Get [Content_Types].xml |
|---|
| 214 | // Once we've got this, then we can look up the content type for |
|---|
| 215 | // each PartName, and use it in the Part constructor. |
|---|
| 216 | // p.setContentTypeManager(ctm); - 20080111 - done by ctm.createPackage(); |
|---|
| 217 | |
|---|
| 218 | // unusedZipEntries.put("[Content_Types].xml", new Boolean(false)); |
|---|
| 219 | |
|---|
| 220 | // 4. Start with _rels/.rels |
|---|
| 221 | |
|---|
| 222 | // <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"> |
|---|
| 223 | // <Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties" Target="docProps/app.xml"/> |
|---|
| 224 | // <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties" Target="docProps/core.xml"/> |
|---|
| 225 | // <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/> |
|---|
| 226 | // </Relationships> |
|---|
| 227 | |
|---|
| 228 | String partName = "_rels/.rels"; |
|---|
| 229 | RelationshipsPart rp = getRelationshipsPartFromZip(p, partByteArrays, partName); |
|---|
| 230 | p.setRelationships(rp); |
|---|
| 231 | //rp.setPackageRelationshipPart(true); |
|---|
| 232 | |
|---|
| 233 | // unusedZipEntries.put(partName, new Boolean(false)); |
|---|
| 234 | |
|---|
| 235 | |
|---|
| 236 | log.debug( "Object created for: " + partName); |
|---|
| 237 | |
|---|
| 238 | // 5. Now recursively |
|---|
| 239 | // (i) create new Parts for each thing listed |
|---|
| 240 | // in the relationships |
|---|
| 241 | // (ii) add the new Part to the package |
|---|
| 242 | // (iii) cross the PartName off unusedZipEntries |
|---|
| 243 | addPartsFromRelationships(partByteArrays, p, rp, ctm ); |
|---|
| 244 | |
|---|
| 245 | |
|---|
| 246 | // 6. Check unusedZipEntries is empty |
|---|
| 247 | // if (log.isDebugEnabled()) { |
|---|
| 248 | // Iterator myVeryOwnIterator = unusedZipEntries.keySet().iterator(); |
|---|
| 249 | // while(myVeryOwnIterator.hasNext()) { |
|---|
| 250 | // String key = (String)myVeryOwnIterator.next(); |
|---|
| 251 | // log.info( key + " " + unusedZipEntries.get(key)); |
|---|
| 252 | // } |
|---|
| 253 | // } |
|---|
| 254 | |
|---|
| 255 | registerCustomXmlDataStorageParts(p); |
|---|
| 256 | |
|---|
| 257 | return p; |
|---|
| 258 | } |
|---|
| 259 | |
|---|
| 260 | //private RelationshipsPart getRelationshipsPartFromZip(Base p, ZipFile zf, String partName) |
|---|
| 261 | private RelationshipsPart getRelationshipsPartFromZip(Base p, HashMap<String, ByteArray> partByteArrays, String partName) |
|---|
| 262 | throws Docx4JException { |
|---|
| 263 | // Document contents = null; |
|---|
| 264 | // try { |
|---|
| 265 | // contents = getDocumentFromZippedPart( zf, partName); |
|---|
| 266 | // } catch (Exception e) { |
|---|
| 267 | // e.printStackTrace(); |
|---|
| 268 | // throw new Docx4JException("Error getting document from Zipped Part", e); |
|---|
| 269 | // |
|---|
| 270 | // } |
|---|
| 271 | // // debugPrint(contents); |
|---|
| 272 | // // TODO - why don't any of the part names in this document start with "/"? |
|---|
| 273 | // return new RelationshipsPart( p, new PartName("/" + partName), contents ); |
|---|
| 274 | |
|---|
| 275 | RelationshipsPart rp = null; |
|---|
| 276 | |
|---|
| 277 | InputStream is = null; |
|---|
| 278 | try { |
|---|
| 279 | is = getInputStreamFromZippedPart( partByteArrays, partName); |
|---|
| 280 | //thePart = new RelationshipsPart( p, new PartName("/" + partName), is ); |
|---|
| 281 | rp = new RelationshipsPart(new PartName("/" + partName) ); |
|---|
| 282 | rp.setSourceP(p); |
|---|
| 283 | rp.unmarshal(is); |
|---|
| 284 | |
|---|
| 285 | } catch (Exception e) { |
|---|
| 286 | e.printStackTrace(); |
|---|
| 287 | throw new Docx4JException("Error getting document from Zipped Part:" + partName, e); |
|---|
| 288 | |
|---|
| 289 | } finally { |
|---|
| 290 | if (is != null) { |
|---|
| 291 | try { |
|---|
| 292 | is.close(); |
|---|
| 293 | } catch (IOException exc) { |
|---|
| 294 | exc.printStackTrace(); |
|---|
| 295 | } |
|---|
| 296 | } |
|---|
| 297 | } |
|---|
| 298 | |
|---|
| 299 | return rp; |
|---|
| 300 | // debugPrint(contents); |
|---|
| 301 | // TODO - why don't any of the part names in this document start with "/"? |
|---|
| 302 | } |
|---|
| 303 | |
|---|
| 304 | private static InputStream getInputStreamFromZippedPart(HashMap<String, ByteArray> partByteArrays, |
|---|
| 305 | String partName) throws IOException { |
|---|
| 306 | |
|---|
| 307 | ByteArray bytes = partByteArrays.get(partName); |
|---|
| 308 | if (bytes == null) throw new IOException("part '" + partName + "' not found"); |
|---|
| 309 | return bytes.getInputStream(); |
|---|
| 310 | } |
|---|
| 311 | |
|---|
| 312 | |
|---|
| 313 | /* recursively |
|---|
| 314 | (i) create new Parts for each thing listed |
|---|
| 315 | in the relationships |
|---|
| 316 | (ii) add the new Part to the package |
|---|
| 317 | (iii) cross the PartName off unusedZipEntries |
|---|
| 318 | */ |
|---|
| 319 | //private void addPartsFromRelationships(ZipFile zf, Base source, RelationshipsPart rp) |
|---|
| 320 | private void addPartsFromRelationships(HashMap<String, ByteArray> partByteArrays, |
|---|
| 321 | Base source, RelationshipsPart rp, ContentTypeManager ctm) |
|---|
| 322 | throws Docx4JException { |
|---|
| 323 | |
|---|
| 324 | OpcPackage pkg = source.getPackage(); |
|---|
| 325 | |
|---|
| 326 | // for (Iterator it = rp.iterator(); it.hasNext(); ) { |
|---|
| 327 | // Relationship r = (Relationship)it.next(); |
|---|
| 328 | // log.info("For Relationship Id=" + r.getId() + " Source is " |
|---|
| 329 | // + r.getSource().getPartName() |
|---|
| 330 | // + ", Target is " + r.getTargetURI() ); |
|---|
| 331 | // try { |
|---|
| 332 | // |
|---|
| 333 | // getPart(zf, pkg, rp, r); |
|---|
| 334 | // |
|---|
| 335 | // } catch (Exception e) { |
|---|
| 336 | // throw new Docx4JException("Failed to add parts from relationships", e); |
|---|
| 337 | // } |
|---|
| 338 | // } |
|---|
| 339 | |
|---|
| 340 | for ( Relationship r : rp.getRelationships().getRelationship() ) { |
|---|
| 341 | |
|---|
| 342 | log.debug("\n For Relationship Id=" + r.getId() |
|---|
| 343 | + " Source is " + rp.getSourceP().getPartName() |
|---|
| 344 | + ", Target is " + r.getTarget() |
|---|
| 345 | + ", type: " + r.getType() ); |
|---|
| 346 | |
|---|
| 347 | // This is usually the first logged comment for |
|---|
| 348 | // a part, so start with a line break. |
|---|
| 349 | try { |
|---|
| 350 | getPart(partByteArrays, pkg, rp, r, ctm); |
|---|
| 351 | } catch (Exception e) { |
|---|
| 352 | throw new Docx4JException("Failed to add parts from relationships", e); |
|---|
| 353 | } |
|---|
| 354 | } |
|---|
| 355 | |
|---|
| 356 | |
|---|
| 357 | } |
|---|
| 358 | |
|---|
| 359 | /** |
|---|
| 360 | * Get a Part (except a relationships part), and all its related parts. |
|---|
| 361 | * This can be called directly from outside the library, in which case |
|---|
| 362 | * the Part will not be owned by a Package until the calling code makes it so. |
|---|
| 363 | * |
|---|
| 364 | * @param zf |
|---|
| 365 | * @param source |
|---|
| 366 | * @param unusedZipEntries |
|---|
| 367 | * @param pkg |
|---|
| 368 | * @param r |
|---|
| 369 | * @param resolvedPartUri |
|---|
| 370 | * @throws Docx4JException |
|---|
| 371 | * @throws InvalidFormatException |
|---|
| 372 | */ |
|---|
| 373 | //private void getPart(ZipFile zf, Package pkg, RelationshipsPart rp, Relationship r) |
|---|
| 374 | private void getPart(HashMap<String, ByteArray> partByteArrays, OpcPackage pkg, RelationshipsPart rp, |
|---|
| 375 | Relationship r, ContentTypeManager ctm) |
|---|
| 376 | throws Docx4JException, InvalidFormatException, URISyntaxException { |
|---|
| 377 | |
|---|
| 378 | Base source = null; |
|---|
| 379 | String resolvedPartUri = null; |
|---|
| 380 | |
|---|
| 381 | if (r.getType().equals(Namespaces.HYPERLINK)) { |
|---|
| 382 | // Could be Internal or External |
|---|
| 383 | // Example of Internal is w:drawing/wp:inline/wp:docPr/a:hlinkClick |
|---|
| 384 | log.info("Encountered (but not loading) hyperlink " + r.getTarget() ); |
|---|
| 385 | return; |
|---|
| 386 | } else |
|---|
| 387 | if (r.getTargetMode() == null |
|---|
| 388 | || !r.getTargetMode().equals("External") ) { |
|---|
| 389 | |
|---|
| 390 | // Usual case |
|---|
| 391 | |
|---|
| 392 | source = rp.getSourceP(); |
|---|
| 393 | resolvedPartUri = URIHelper.resolvePartUri(rp.getSourceURI(), new URI(r.getTarget() ) ).toString(); |
|---|
| 394 | |
|---|
| 395 | // Now drop leading "/' |
|---|
| 396 | resolvedPartUri = resolvedPartUri.substring(1); |
|---|
| 397 | |
|---|
| 398 | // Now normalise it .. ie abc/def/../ghi |
|---|
| 399 | // becomes abc/ghi |
|---|
| 400 | // Maybe this isn't necessary with a zip file, |
|---|
| 401 | // - ZipFile class may be smart enough to do it. |
|---|
| 402 | // But it is certainly necessary in the JCR case. |
|---|
| 403 | // resolvedPartUri = (new java.net.URI(resolvedPartUri)).normalize().toString(); |
|---|
| 404 | // log.info("Normalised, it is " + resolvedPartUri ); |
|---|
| 405 | |
|---|
| 406 | } else { |
|---|
| 407 | // EXTERNAL |
|---|
| 408 | if (loadExternalTargets && |
|---|
| 409 | r.getType().equals( Namespaces.IMAGE ) ) { |
|---|
| 410 | // It could instead be, for example, of type hyperlink, |
|---|
| 411 | // and we don't want to try to fetch that |
|---|
| 412 | log.info("Loading external resource " + r.getTarget() |
|---|
| 413 | + " of type " + r.getType() ); |
|---|
| 414 | BinaryPart bp = ExternalResourceUtils.getExternalResource(r.getTarget()); |
|---|
| 415 | pkg.getExternalResources().put(bp.getExternalTarget(), bp); |
|---|
| 416 | } else { |
|---|
| 417 | log.info("Encountered (but not loading) external resource " + r.getTarget() |
|---|
| 418 | + " of type " + r.getType() ); |
|---|
| 419 | } |
|---|
| 420 | return; |
|---|
| 421 | } |
|---|
| 422 | |
|---|
| 423 | if (pkg.handled.get(resolvedPartUri)!=null) return; |
|---|
| 424 | |
|---|
| 425 | String relationshipType = r.getType(); |
|---|
| 426 | |
|---|
| 427 | Part part = getRawPart(partByteArrays, ctm, resolvedPartUri, r); // will throw exception if null |
|---|
| 428 | |
|---|
| 429 | if (part instanceof BinaryPart |
|---|
| 430 | || part instanceof DefaultXmlPart) { |
|---|
| 431 | // The constructors of other parts should take care of this... |
|---|
| 432 | part.setRelationshipType(relationshipType); |
|---|
| 433 | } |
|---|
| 434 | rp.loadPart(part, r); |
|---|
| 435 | pkg.handled.put(resolvedPartUri, resolvedPartUri); |
|---|
| 436 | |
|---|
| 437 | // The source Part (or Package) might have a convenience |
|---|
| 438 | // method for this |
|---|
| 439 | if (source.setPartShortcut(part, relationshipType ) ) { |
|---|
| 440 | log.debug("Convenience method established from " + source.getPartName() |
|---|
| 441 | + " to " + part.getPartName()); |
|---|
| 442 | } |
|---|
| 443 | |
|---|
| 444 | // unusedZipEntries.put(resolvedPartUri, new Boolean(false)); |
|---|
| 445 | |
|---|
| 446 | RelationshipsPart rrp = getRelationshipsPart(partByteArrays, part); |
|---|
| 447 | if (rrp!=null) { |
|---|
| 448 | // recurse via this parts relationships, if it has any |
|---|
| 449 | addPartsFromRelationships(partByteArrays, part, rrp, ctm ); |
|---|
| 450 | String relPart = PartName.getRelationshipsPartName( |
|---|
| 451 | part.getPartName().getName().substring(1) ); |
|---|
| 452 | // unusedZipEntries.put(relPart, new Boolean(false)); |
|---|
| 453 | } |
|---|
| 454 | } |
|---|
| 455 | |
|---|
| 456 | /** |
|---|
| 457 | * Get the Relationships Part (if there is one) for a given Part. |
|---|
| 458 | * Otherwise return null. |
|---|
| 459 | * |
|---|
| 460 | * @param zf |
|---|
| 461 | * @param part |
|---|
| 462 | * @return |
|---|
| 463 | * @throws InvalidFormatException |
|---|
| 464 | */ |
|---|
| 465 | //public RelationshipsPart getRelationshipsPart(ZipFile zf, Part part) |
|---|
| 466 | public RelationshipsPart getRelationshipsPart(HashMap<String, ByteArray> partByteArrays, |
|---|
| 467 | Part part) |
|---|
| 468 | throws Docx4JException, InvalidFormatException { |
|---|
| 469 | |
|---|
| 470 | RelationshipsPart rrp = null; |
|---|
| 471 | // recurse via this parts relationships, if it has any |
|---|
| 472 | //String relPart = PartName.getRelationshipsPartName(target); |
|---|
| 473 | String relPart = PartName.getRelationshipsPartName( |
|---|
| 474 | part.getPartName().getName().substring(1) ); |
|---|
| 475 | |
|---|
| 476 | if (partByteArrays.get(relPart) !=null ) { |
|---|
| 477 | log.debug("Found relationships " + relPart ); |
|---|
| 478 | rrp = getRelationshipsPartFromZip(part, partByteArrays, relPart); |
|---|
| 479 | part.setRelationships(rrp); |
|---|
| 480 | } else { |
|---|
| 481 | log.debug("No relationships " + relPart ); |
|---|
| 482 | return null; |
|---|
| 483 | } |
|---|
| 484 | return rrp; |
|---|
| 485 | } |
|---|
| 486 | |
|---|
| 487 | |
|---|
| 488 | |
|---|
| 489 | /** |
|---|
| 490 | * Get a Part (except a relationships part), but not its relationships part |
|---|
| 491 | * or related parts. Useful if you need quick access to just this part. |
|---|
| 492 | * This can be called directly from outside the library, in which case |
|---|
| 493 | * the Part will not be owned by a Package until the calling code makes it so. |
|---|
| 494 | * @see To get a Part and all its related parts, and add all to a package, use |
|---|
| 495 | * getPart. |
|---|
| 496 | * @param partByteArrays |
|---|
| 497 | * @param ctm |
|---|
| 498 | * @param resolvedPartUri |
|---|
| 499 | * @param rel |
|---|
| 500 | * @return |
|---|
| 501 | * @throws Docx4JException including if result is null |
|---|
| 502 | */ |
|---|
| 503 | public static Part getRawPart(HashMap<String, ByteArray> partByteArrays, |
|---|
| 504 | ContentTypeManager ctm, String resolvedPartUri, Relationship rel) |
|---|
| 505 | throws Docx4JException { |
|---|
| 506 | |
|---|
| 507 | Part part = null; |
|---|
| 508 | |
|---|
| 509 | InputStream is = null; |
|---|
| 510 | try { |
|---|
| 511 | try { |
|---|
| 512 | log.debug("resolved uri: " + resolvedPartUri); |
|---|
| 513 | is = getInputStreamFromZippedPart( partByteArrays, resolvedPartUri); |
|---|
| 514 | |
|---|
| 515 | // Get a subclass of Part appropriate for this content type |
|---|
| 516 | // This will throw UnrecognisedPartException in the absence of |
|---|
| 517 | // specific knowledge. Hence it is important to get the is |
|---|
| 518 | // first, as we do above. |
|---|
| 519 | part = ctm.getPart("/" + resolvedPartUri, rel); |
|---|
| 520 | |
|---|
| 521 | if (part instanceof org.docx4j.openpackaging.parts.ThemePart) { |
|---|
| 522 | |
|---|
| 523 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcThemePart); |
|---|
| 524 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 525 | |
|---|
| 526 | } else if (part instanceof org.docx4j.openpackaging.parts.DocPropsCorePart ) { |
|---|
| 527 | |
|---|
| 528 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcDocPropsCore); |
|---|
| 529 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 530 | |
|---|
| 531 | } else if (part instanceof org.docx4j.openpackaging.parts.DocPropsCustomPart ) { |
|---|
| 532 | |
|---|
| 533 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcDocPropsCustom); |
|---|
| 534 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 535 | |
|---|
| 536 | } else if (part instanceof org.docx4j.openpackaging.parts.DocPropsExtendedPart ) { |
|---|
| 537 | |
|---|
| 538 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcDocPropsExtended); |
|---|
| 539 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 540 | |
|---|
| 541 | } else if (part instanceof org.docx4j.openpackaging.parts.CustomXmlDataStoragePropertiesPart ) { |
|---|
| 542 | |
|---|
| 543 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcCustomXmlProperties); |
|---|
| 544 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 545 | |
|---|
| 546 | } else if (part instanceof org.docx4j.openpackaging.parts.digitalsignature.XmlSignaturePart ) { |
|---|
| 547 | |
|---|
| 548 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jcXmlDSig); |
|---|
| 549 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 550 | |
|---|
| 551 | } else if (part instanceof org.docx4j.openpackaging.parts.JaxbXmlPart) { |
|---|
| 552 | |
|---|
| 553 | // MainDocument part, Styles part, Font part etc |
|---|
| 554 | |
|---|
| 555 | //((org.docx4j.openpackaging.parts.JaxbXmlPart)part).setJAXBContext(Context.jc); |
|---|
| 556 | ((org.docx4j.openpackaging.parts.JaxbXmlPart)part).unmarshal( is ); |
|---|
| 557 | |
|---|
| 558 | } else if (part instanceof org.docx4j.openpackaging.parts.WordprocessingML.BinaryPart) { |
|---|
| 559 | |
|---|
| 560 | log.debug("Detected BinaryPart " + part.getClass().getName() ); |
|---|
| 561 | ((BinaryPart)part).setBinaryData(is); |
|---|
| 562 | |
|---|
| 563 | } else if (part instanceof org.docx4j.openpackaging.parts.CustomXmlDataStoragePart ) { |
|---|
| 564 | |
|---|
| 565 | // Is it a part we know? |
|---|
| 566 | try { |
|---|
| 567 | Unmarshaller u = Context.jc.createUnmarshaller(); |
|---|
| 568 | Object o = u.unmarshal( is ); |
|---|
| 569 | log.debug(o.getClass().getName()); |
|---|
| 570 | |
|---|
| 571 | PartName name = part.getPartName(); |
|---|
| 572 | |
|---|
| 573 | if (o instanceof org.opendope.conditions.Conditions) { |
|---|
| 574 | |
|---|
| 575 | part = new ConditionsPart(name); |
|---|
| 576 | ((ConditionsPart)part).setJaxbElement( |
|---|
| 577 | (org.opendope.conditions.Conditions)o); |
|---|
| 578 | |
|---|
| 579 | |
|---|
| 580 | } else if (o instanceof org.opendope.xpaths.Xpaths) { |
|---|
| 581 | |
|---|
| 582 | part = new XPathsPart(name); |
|---|
| 583 | ((XPathsPart)part).setJaxbElement( |
|---|
| 584 | (org.opendope.xpaths.Xpaths)o); |
|---|
| 585 | |
|---|
| 586 | } else if (o instanceof org.opendope.questions.Questionnaire) { |
|---|
| 587 | |
|---|
| 588 | part = new QuestionsPart(name); |
|---|
| 589 | ((QuestionsPart)part).setJaxbElement( |
|---|
| 590 | (org.opendope.questions.Questionnaire)o); |
|---|
| 591 | |
|---|
| 592 | } else if (o instanceof org.opendope.components.Components) { |
|---|
| 593 | |
|---|
| 594 | part = new ComponentsPart(name); |
|---|
| 595 | ((ComponentsPart)part).setJaxbElement( |
|---|
| 596 | (org.opendope.components.Components)o); |
|---|
| 597 | |
|---|
| 598 | } else if (o instanceof JAXBElement<?> |
|---|
| 599 | && XmlUtils.unwrap(o) instanceof org.docx4j.bibliography.CTSources) { |
|---|
| 600 | part = new BibliographyPart(name); |
|---|
| 601 | ((BibliographyPart)part).setJaxbElement( |
|---|
| 602 | (JAXBElement<org.docx4j.bibliography.CTSources>)o); |
|---|
| 603 | |
|---|
| 604 | } else { |
|---|
| 605 | |
|---|
| 606 | log.warn("No known part after all for CustomXmlPart " + o.getClass().getName()); |
|---|
| 607 | |
|---|
| 608 | CustomXmlDataStorage data = getCustomXmlDataStorageClass().factory(); |
|---|
| 609 | is.reset(); |
|---|
| 610 | data.setDocument(is); // Not necessarily JAXB, that's just our method name |
|---|
| 611 | ((org.docx4j.openpackaging.parts.CustomXmlDataStoragePart)part).setData(data); |
|---|
| 612 | |
|---|
| 613 | } |
|---|
| 614 | |
|---|
| 615 | } catch (javax.xml.bind.UnmarshalException ue) { |
|---|
| 616 | |
|---|
| 617 | // No ... |
|---|
| 618 | CustomXmlDataStorage data = getCustomXmlDataStorageClass().factory(); |
|---|
| 619 | is.reset(); |
|---|
| 620 | data.setDocument(is); // Not necessarily JAXB, that's just our method name |
|---|
| 621 | ((org.docx4j.openpackaging.parts.CustomXmlDataStoragePart)part).setData(data); |
|---|
| 622 | } |
|---|
| 623 | |
|---|
| 624 | } else if (part instanceof org.docx4j.openpackaging.parts.XmlPart ) { |
|---|
| 625 | |
|---|
| 626 | // try { |
|---|
| 627 | ((XmlPart)part).setDocument(is); |
|---|
| 628 | |
|---|
| 629 | // Experimental 22/6/2011; don't fall back to binary (which we used to) |
|---|
| 630 | |
|---|
| 631 | // } catch (Docx4JException d) { |
|---|
| 632 | // // This isn't an XML part after all, |
|---|
| 633 | // // even though ContentTypeManager detected it as such |
|---|
| 634 | // // So get it as a binary part |
|---|
| 635 | // part = getBinaryPart(partByteArrays, ctm, resolvedPartUri); |
|---|
| 636 | // log.warn("Could not parse as XML, so using BinaryPart for " |
|---|
| 637 | // + resolvedPartUri); |
|---|
| 638 | // ((BinaryPart)part).setBinaryData(is); |
|---|
| 639 | // } |
|---|
| 640 | |
|---|
| 641 | } else { |
|---|
| 642 | // Shouldn't happen, since ContentTypeManagerImpl should |
|---|
| 643 | // return an instance of one of the above, or throw an |
|---|
| 644 | // Exception. |
|---|
| 645 | |
|---|
| 646 | log.error("No suitable part found for: " + resolvedPartUri); |
|---|
| 647 | part = null; |
|---|
| 648 | } |
|---|
| 649 | |
|---|
| 650 | } catch (PartUnrecognisedException e) { |
|---|
| 651 | log.error("PartUnrecognisedException shouldn't happen anymore!", e); |
|---|
| 652 | // Try to get it as a binary part |
|---|
| 653 | part = getBinaryPart(partByteArrays, ctm, resolvedPartUri); |
|---|
| 654 | log.warn("Using BinaryPart for " + resolvedPartUri); |
|---|
| 655 | |
|---|
| 656 | ((BinaryPart)part).setBinaryData(is); |
|---|
| 657 | } |
|---|
| 658 | } catch (Exception ex) { |
|---|
| 659 | // IOException, URISyntaxException |
|---|
| 660 | ex.printStackTrace(); |
|---|
| 661 | throw new Docx4JException("Failed to getPart", ex); |
|---|
| 662 | |
|---|
| 663 | } finally { |
|---|
| 664 | if (is != null) { |
|---|
| 665 | try { |
|---|
| 666 | is.close(); |
|---|
| 667 | } catch (IOException exc) { |
|---|
| 668 | exc.printStackTrace(); |
|---|
| 669 | } |
|---|
| 670 | } |
|---|
| 671 | } |
|---|
| 672 | |
|---|
| 673 | if (part == null) { |
|---|
| 674 | throw new Docx4JException("cannot find part " + resolvedPartUri + " from rel "+ rel.getId() + "=" + rel.getTarget()); |
|---|
| 675 | } |
|---|
| 676 | |
|---|
| 677 | return part; |
|---|
| 678 | } |
|---|
| 679 | |
|---|
| 680 | //public static Part getBinaryPart(ZipFile zf, ContentTypeManager ctm, String resolvedPartUri) |
|---|
| 681 | public static Part getBinaryPart(HashMap<String, ByteArray> partByteArrays, |
|---|
| 682 | ContentTypeManager ctm, String resolvedPartUri) |
|---|
| 683 | throws Docx4JException { |
|---|
| 684 | |
|---|
| 685 | Part part = null; |
|---|
| 686 | InputStream in = null; |
|---|
| 687 | try { |
|---|
| 688 | //in = zf.getInputStream( zf.getEntry(resolvedPartUri ) ); |
|---|
| 689 | in = partByteArrays.get(resolvedPartUri).getInputStream(); |
|---|
| 690 | part = new BinaryPart( new PartName("/" + resolvedPartUri)); |
|---|
| 691 | |
|---|
| 692 | // Set content type |
|---|
| 693 | part.setContentType( |
|---|
| 694 | new ContentType( |
|---|
| 695 | ctm.getContentType(new PartName("/" + resolvedPartUri)) ) ); |
|---|
| 696 | |
|---|
| 697 | ((BinaryPart)part).setBinaryData(in); |
|---|
| 698 | log.info("Stored as BinaryData" ); |
|---|
| 699 | |
|---|
| 700 | } catch (Exception ioe) { |
|---|
| 701 | ioe.printStackTrace() ; |
|---|
| 702 | } finally { |
|---|
| 703 | if (in != null) { |
|---|
| 704 | try { |
|---|
| 705 | in.close(); |
|---|
| 706 | } catch (IOException exc) { |
|---|
| 707 | exc.printStackTrace(); |
|---|
| 708 | } |
|---|
| 709 | } |
|---|
| 710 | } |
|---|
| 711 | return part; |
|---|
| 712 | } |
|---|
| 713 | |
|---|
| 714 | public static class ByteArray implements Serializable { |
|---|
| 715 | |
|---|
| 716 | private static final long serialVersionUID = -784146312250361899L; |
|---|
| 717 | // 4469266984448028582L; |
|---|
| 718 | |
|---|
| 719 | private byte[] bytes; |
|---|
| 720 | public byte[] getBytes() { |
|---|
| 721 | return bytes; |
|---|
| 722 | } |
|---|
| 723 | |
|---|
| 724 | private String mimetype; |
|---|
| 725 | public String getMimetype() { |
|---|
| 726 | return mimetype; |
|---|
| 727 | } |
|---|
| 728 | |
|---|
| 729 | public ByteArray(byte[] bytes) { |
|---|
| 730 | this.bytes = bytes; |
|---|
| 731 | //log.info("Added " + bytes.length ); |
|---|
| 732 | } |
|---|
| 733 | |
|---|
| 734 | |
|---|
| 735 | public ByteArray(ByteBuffer bb, String mimetype ) { |
|---|
| 736 | |
|---|
| 737 | bb.clear(); |
|---|
| 738 | bytes = new byte[bb.capacity()]; |
|---|
| 739 | bb.get(bytes, 0, bytes.length); |
|---|
| 740 | |
|---|
| 741 | this.mimetype = mimetype; |
|---|
| 742 | } |
|---|
| 743 | |
|---|
| 744 | |
|---|
| 745 | public InputStream getInputStream() { |
|---|
| 746 | return new ByteArrayInputStream(bytes); |
|---|
| 747 | } |
|---|
| 748 | |
|---|
| 749 | public int getLength() { |
|---|
| 750 | return bytes.length; |
|---|
| 751 | } |
|---|
| 752 | |
|---|
| 753 | } |
|---|
| 754 | |
|---|
| 755 | |
|---|
| 756 | |
|---|
| 757 | } |
|---|