| 1 | /* |
|---|
| 2 | * Copyright 2007-2008, Plutext Pty Ltd. |
|---|
| 3 | * |
|---|
| 4 | * This file is part of docx4j. |
|---|
| 5 | |
|---|
| 6 | docx4j is licensed under the Apache License, Version 2.0 (the "License"); |
|---|
| 7 | you may not use this file except in compliance with the License. |
|---|
| 8 | |
|---|
| 9 | You may obtain a copy of the License at |
|---|
| 10 | |
|---|
| 11 | http://www.apache.org/licenses/LICENSE-2.0 |
|---|
| 12 | |
|---|
| 13 | Unless required by applicable law or agreed to in writing, software |
|---|
| 14 | distributed under the License is distributed on an "AS IS" BASIS, |
|---|
| 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|---|
| 16 | See the License for the specific language governing permissions and |
|---|
| 17 | limitations under the License. |
|---|
| 18 | |
|---|
| 19 | */ |
|---|
| 20 | |
|---|
| 21 | package org.docx4j.samples; |
|---|
| 22 | |
|---|
| 23 | |
|---|
| 24 | import java.io.File; |
|---|
| 25 | import java.net.URI; |
|---|
| 26 | import java.util.ArrayList; |
|---|
| 27 | import java.util.List; |
|---|
| 28 | |
|---|
| 29 | import org.apache.log4j.Logger; |
|---|
| 30 | import org.docx4j.openpackaging.URIHelper; |
|---|
| 31 | import org.docx4j.openpackaging.exceptions.Docx4JException; |
|---|
| 32 | import org.docx4j.openpackaging.packages.WordprocessingMLPackage; |
|---|
| 33 | import org.docx4j.openpackaging.parts.Part; |
|---|
| 34 | import org.docx4j.openpackaging.parts.PartName; |
|---|
| 35 | import org.docx4j.openpackaging.parts.relationships.RelationshipsPart; |
|---|
| 36 | import org.docx4j.relationships.Relationship; |
|---|
| 37 | |
|---|
| 38 | |
|---|
| 39 | public class StripParts { |
|---|
| 40 | |
|---|
| 41 | |
|---|
| 42 | static boolean save = true; |
|---|
| 43 | static boolean flatOpcXmlOutput = false; |
|---|
| 44 | static boolean overwriteInputFile = true; |
|---|
| 45 | |
|---|
| 46 | static String dir = System.getProperty("user.dir") + "/src/test/resources/AlteredParts/"; |
|---|
| 47 | static String file = "blagh"; // set to null to process all docx in dir |
|---|
| 48 | |
|---|
| 49 | static boolean stripPropertiesParts = true; |
|---|
| 50 | static boolean keepStyles = true; |
|---|
| 51 | static boolean defaultToDelete = false; |
|---|
| 52 | |
|---|
| 53 | private static Logger log = Logger.getLogger(StripParts.class); |
|---|
| 54 | |
|---|
| 55 | /** |
|---|
| 56 | * @param args |
|---|
| 57 | */ |
|---|
| 58 | public static void main(String[] args) throws Exception { |
|---|
| 59 | |
|---|
| 60 | if (file==null) { |
|---|
| 61 | |
|---|
| 62 | List<File> filesToProcess = new ArrayList<File>(); |
|---|
| 63 | File[] filesAndDirs = new File(dir).listFiles(); |
|---|
| 64 | for (File file : filesAndDirs) { |
|---|
| 65 | if (file.isFile() |
|---|
| 66 | && file.getName().endsWith(".docx") ) { |
|---|
| 67 | filesToProcess.add(file); |
|---|
| 68 | } |
|---|
| 69 | } |
|---|
| 70 | |
|---|
| 71 | for (File file : filesToProcess) { |
|---|
| 72 | String outputfilepath = null; |
|---|
| 73 | if (save && overwriteInputFile) { |
|---|
| 74 | outputfilepath = file.getAbsolutePath(); |
|---|
| 75 | } |
|---|
| 76 | |
|---|
| 77 | processFile(file, outputfilepath); |
|---|
| 78 | |
|---|
| 79 | } |
|---|
| 80 | |
|---|
| 81 | } else { |
|---|
| 82 | String inputfilepath = dir + file + ".docx"; |
|---|
| 83 | |
|---|
| 84 | // If so, whereto? |
|---|
| 85 | String outputfilepath = null; |
|---|
| 86 | if (save) { |
|---|
| 87 | if (overwriteInputFile) { |
|---|
| 88 | outputfilepath = inputfilepath; |
|---|
| 89 | } else if (flatOpcXmlOutput) { |
|---|
| 90 | outputfilepath = dir + file + "_OUT.xml"; |
|---|
| 91 | } else { |
|---|
| 92 | outputfilepath = dir + file + "_OUT.docx"; |
|---|
| 93 | } |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | processFile(new java.io.File(inputfilepath), outputfilepath); |
|---|
| 97 | } |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | private static void processFile(File inputfile, String outputfilepath) |
|---|
| 101 | throws Docx4JException { |
|---|
| 102 | |
|---|
| 103 | // Open a document from the file system |
|---|
| 104 | // 1. Load the Package - .docx or Flat OPC .xml |
|---|
| 105 | WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(inputfile); |
|---|
| 106 | |
|---|
| 107 | // List the parts by walking the rels tree |
|---|
| 108 | RelationshipsPart rp = wordMLPackage.getRelationshipsPart(); |
|---|
| 109 | StringBuilder sb = new StringBuilder(); |
|---|
| 110 | printInfo(rp, sb, ""); |
|---|
| 111 | traverseRelationships(wordMLPackage, rp, sb, " "); |
|---|
| 112 | |
|---|
| 113 | System.out.println(sb.toString()); |
|---|
| 114 | |
|---|
| 115 | if (save) { |
|---|
| 116 | wordMLPackage.save(new java.io.File(outputfilepath)); |
|---|
| 117 | System.out.println("Saved stripped to " + outputfilepath); |
|---|
| 118 | } else { |
|---|
| 119 | System.out.println("Stripped parts from " + inputfile.getName() ); |
|---|
| 120 | } |
|---|
| 121 | } |
|---|
| 122 | |
|---|
| 123 | public static void printInfo(Part p, StringBuilder sb, String indent) { |
|---|
| 124 | sb.append("\n" + indent + p.getPartName() + " [" + p.getClass().getName() + "] " ); |
|---|
| 125 | } |
|---|
| 126 | |
|---|
| 127 | public static void traverseRelationships(WordprocessingMLPackage wordMLPackage, |
|---|
| 128 | RelationshipsPart rp, |
|---|
| 129 | StringBuilder sb, String indent) |
|---|
| 130 | throws Docx4JException { |
|---|
| 131 | |
|---|
| 132 | List<Relationship> deletions = new ArrayList<Relationship>(); |
|---|
| 133 | |
|---|
| 134 | for ( Relationship r : rp.getRelationships().getRelationship() ) { |
|---|
| 135 | |
|---|
| 136 | log.info("For Relationship Id=" + r.getId() |
|---|
| 137 | + " Source is " + rp.getSourceP().getPartName() |
|---|
| 138 | + ", Target is " + r.getTarget() ); |
|---|
| 139 | |
|---|
| 140 | if (r.getTargetMode() != null |
|---|
| 141 | && r.getTargetMode().equals("External") ) { |
|---|
| 142 | |
|---|
| 143 | sb.append("\n" + indent + "external resource " + r.getTarget() |
|---|
| 144 | + " of type " + r.getType() ); |
|---|
| 145 | continue; |
|---|
| 146 | } |
|---|
| 147 | |
|---|
| 148 | try { |
|---|
| 149 | String resolvedPartUri = URIHelper.resolvePartUri(rp.getSourceURI(), new URI(r.getTarget() ) ).toString(); |
|---|
| 150 | resolvedPartUri = resolvedPartUri.substring(1); |
|---|
| 151 | |
|---|
| 152 | Part part = wordMLPackage.getParts().get(new PartName("/" + resolvedPartUri)); |
|---|
| 153 | |
|---|
| 154 | // Or could just have done: |
|---|
| 155 | // Part p = rp.getPart(r); |
|---|
| 156 | |
|---|
| 157 | // TEMP |
|---|
| 158 | // if () |
|---|
| 159 | |
|---|
| 160 | if (part!=null) { |
|---|
| 161 | printInfo(part, sb, indent); |
|---|
| 162 | } |
|---|
| 163 | |
|---|
| 164 | if (part==null) { |
|---|
| 165 | sb.append("Part " + resolvedPartUri + " not found! \n"); |
|---|
| 166 | } else if ( part instanceof org.docx4j.openpackaging.parts.ThemePart |
|---|
| 167 | || part instanceof org.docx4j.openpackaging.parts.WordprocessingML.NumberingDefinitionsPart |
|---|
| 168 | || part instanceof org.docx4j.openpackaging.parts.WordprocessingML.FontTablePart) { |
|---|
| 169 | deletions.add(r ); |
|---|
| 170 | sb.append(".. DELETED" ); |
|---|
| 171 | } else if (part instanceof org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart) { |
|---|
| 172 | |
|---|
| 173 | if (!keepStyles) { |
|---|
| 174 | deletions.add(r ); |
|---|
| 175 | sb.append(".. DELETED" ); |
|---|
| 176 | } else { |
|---|
| 177 | sb.append(".. KEEPING" ); |
|---|
| 178 | } |
|---|
| 179 | |
|---|
| 180 | } else { |
|---|
| 181 | if (stripPropertiesParts |
|---|
| 182 | && ( part instanceof org.docx4j.openpackaging.parts.DocPropsExtendedPart |
|---|
| 183 | || part instanceof org.docx4j.openpackaging.parts.DocPropsCorePart |
|---|
| 184 | || part instanceof org.docx4j.openpackaging.parts.DocPropsCustomPart |
|---|
| 185 | || part instanceof org.docx4j.openpackaging.parts.WordprocessingML.WebSettingsPart |
|---|
| 186 | || part instanceof org.docx4j.openpackaging.parts.WordprocessingML.DocumentSettingsPart)) { |
|---|
| 187 | |
|---|
| 188 | deletions.add(r ); |
|---|
| 189 | sb.append(".. DELETED" ); |
|---|
| 190 | |
|---|
| 191 | } else if ( part instanceof org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart |
|---|
| 192 | || part instanceof org.docx4j.openpackaging.parts.WordprocessingML.StyleDefinitionsPart |
|---|
| 193 | || part instanceof org.docx4j.openpackaging.parts.WordprocessingML.HeaderPart) { |
|---|
| 194 | sb.append(".. KEEPING" ); |
|---|
| 195 | if (part.getRelationshipsPart()==null) { |
|---|
| 196 | sb.append(".. no rels" ); |
|---|
| 197 | } else { |
|---|
| 198 | traverseRelationships(wordMLPackage, part.getRelationshipsPart(), sb, indent + " "); |
|---|
| 199 | } |
|---|
| 200 | } |
|---|
| 201 | else if (defaultToDelete) { |
|---|
| 202 | // Delete it |
|---|
| 203 | deletions.add(r ); |
|---|
| 204 | sb.append(".. DELETED" ); |
|---|
| 205 | } |
|---|
| 206 | } |
|---|
| 207 | |
|---|
| 208 | } catch (Exception e) { |
|---|
| 209 | throw new Docx4JException("Failed to add parts from relationships", e); |
|---|
| 210 | } |
|---|
| 211 | |
|---|
| 212 | } |
|---|
| 213 | |
|---|
| 214 | for ( Relationship r : deletions) { |
|---|
| 215 | rp.removeRelationship(r); |
|---|
| 216 | } |
|---|
| 217 | |
|---|
| 218 | } |
|---|
| 219 | |
|---|
| 220 | |
|---|
| 221 | } |
|---|