import java.io.File; import java.io.FileNotFoundException; import java.io.OutputStream; import java.io.StringWriter; import java.util.ArrayList; import java.util.Calendar; import java.util.Hashtable; import java.util.List; import java.util.StringTokenizer; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import org.docx4j.XmlUtils; import org.docx4j.convert.out.Containerization; import org.docx4j.convert.out.html.AbstractHtmlExporter; import org.docx4j.convert.out.html.HtmlExporterNG2; import org.docx4j.convert.out.html.SdtWriter; import org.docx4j.convert.out.html.TagSingleBox; import org.docx4j.convert.out.html.AbstractHtmlExporter.HtmlSettings; import org.docx4j.diff.Differencer; import org.docx4j.fonts.IdentityPlusMapper; import org.docx4j.fonts.Mapper; import org.docx4j.fonts.PhysicalFont; import org.docx4j.fonts.PhysicalFonts; import org.docx4j.openpackaging.exceptions.Docx4JException; import org.docx4j.openpackaging.io.LoadFromZipFile; import org.docx4j.openpackaging.io.SaveToZipFile; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart; import org.docx4j.openpackaging.parts.relationships.Namespaces; import org.docx4j.openpackaging.parts.relationships.RelationshipsPart; import org.docx4j.relationships.Relationship; import org.docx4j.wml.Body; import org.docx4j.wml.Document; import org.docx4j.wml.SdtContentBlock; import org.apache.avalon.framework.configuration.Configuration; import org.apache.avalon.framework.configuration.DefaultConfigurationBuilder; import org.apache.fop.apps.FopFactory; import org.apache.log4j.ConsoleAppender; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.SimpleLayout; import org.docx4j.convert.out.pdf.viaXSLFO.*; public class CompareDocument { public static JAXBContext context = org.docx4j.jaxb.Context.jc; private static Logger logger = Logger.getLogger(org.docx4j.convert.out.html.HtmlExporterNG2.class); private static Logger pdfLogger = Logger.getLogger(org.docx4j.convert.out.pdf.viaXSLFO.Conversion.class); public static Hashtable compareWordDoc(String newerfilepath, String olderfilepath) throws Exception { Hashtable strResult = new Hashtable(); logger.setLevel(Level.INFO); pdfLogger.setLevel(Level.INFO); // try { // 1. Load the Packages WordprocessingMLPackage newerPackage = WordprocessingMLPackage .load(new java.io.File(newerfilepath)); WordprocessingMLPackage olderPackage = WordprocessingMLPackage .load(new java.io.File(olderfilepath)); int count = getWordDocCharCount(olderPackage); AbstractHtmlExporter exporter = new HtmlExporterNG2(); HtmlSettings htmlSettings = new HtmlSettings(); htmlSettings.setImageDirPath(newerfilepath + "_files"); /* * // Set up font mapper Mapper fontMapper = new IdentityPlusMapper(); * newerPackage.setFontMapper(fontMapper); // Example of mapping missing * font Algerian to installed font Comic Sans MS PhysicalFont font = * PhysicalFonts.getPhysicalFonts().get("Arial"); * fontMapper.getFontMappings().put("Arial", font); */ Mapper fontMapper = new IdentityPlusMapper(); PhysicalFont font = PhysicalFonts.getPhysicalFonts().get("Arial"); fontMapper.getFontMappings().put("Arial", font); newerPackage.setFontMapper(fontMapper); // Example of mapping missing Body newerBody = ((Document) newerPackage.getMainDocumentPart() .getJaxbElement()).getBody(); Body olderBody = ((Document) olderPackage.getMainDocumentPart() .getJaxbElement()).getBody(); /* java.io.StringWriter sw = new java.io.StringWriter(); Docx4jDriver.diff( XmlUtils.marshaltoW3CDomDocument(newerBody).getDocumentElement(), XmlUtils.marshaltoW3CDomDocument(olderBody).getDocumentElement(), sw); */ // 2. Do the differencing java.io.StringWriter sw = new java.io.StringWriter(); javax.xml.transform.stream.StreamResult result = new javax.xml.transform.stream.StreamResult( sw); Calendar changeDate = null; Differencer pd = new Differencer(); pd.setRelsDiffIdentifier("blagh"); // not necessary in this case pd.diff(newerBody, olderBody, result, "someone", changeDate, newerPackage.getMainDocumentPart().getRelationshipsPart(), olderPackage.getMainDocumentPart().getRelationshipsPart()); // 3. Get the result String contentStr = sw.toString(); System.out.println("Result: \n\n " + contentStr); Body newBody = (Body) org.docx4j.XmlUtils.unmarshalString(contentStr); // 4. Display the result as a PDF // To do this, we'll replace the body in the newer document ((Document) newerPackage.getMainDocumentPart().getJaxbElement()) .setBody(newBody); //RelationshipsPart rp = newerPackage.getMainDocumentPart() // .getRelationshipsPart(); //handleRels(pd, rp); //newerPackage.setFontMapper(new IdentityPlusMapper()); //OutputStream os = new java.io.FileOutputStream("c:\\testPDF_New_New"+ ".pdf"); org.docx4j.convert.out.pdf.PdfConversion c = new org.docx4j.convert.out.pdf.viaXSLFO.Conversion( newerPackage); //c.output(os); SdtWriter.registerTagHandler(Containerization.TAG_BORDERS, new TagSingleBox()); SdtWriter.registerTagHandler(Containerization.TAG_SHADING, new TagSingleBox()); String htmfilepath = newerfilepath.substring(0,newerfilepath.indexOf(".")-1); OutputStream os_html; if (true) { os_html = new java.io.FileOutputStream(htmfilepath+".html"); } else { os_html = System.out; } javax.xml.transform.stream.StreamResult result1 = new javax.xml.transform.stream.StreamResult( os_html); exporter.html(newerPackage, result1, htmlSettings); if (true) { System.out.println("Saved: html using " + exporter.getClass().getName()); } strResult.put(ApplicationConstantsIx.REVISION_HTML_STR, MTQatsHelper.readFileAsString(htmfilepath+".html")); strResult.put(ApplicationConstantsIx.VERSION_1_CHAR_COUNT, count); System.out.println(strResult); return strResult; } public static int getWordDocCharCount(WordprocessingMLPackage wrdPackage) { int count = 0; try { MainDocumentPart documentPart = wrdPackage.getMainDocumentPart(); org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document) documentPart .getJaxbElement(); StringWriter str = new StringWriter(); org.docx4j.TextUtils.extractText(wmlDocumentEl, str); String strString = str.toString(); count = strString.length(); System.out.println("Count....." + strString.length()); // out.flush(); // out.close(); str.close(); } catch (Docx4JException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return count; } public static void createDoc(String filePath, String content) throws Exception { StringTokenizer objTocken = new StringTokenizer(content,"\n"); System.out.println("no o paragraph is: "+ objTocken.countTokens()); WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage(); MainDocumentPart mainPart = wordMLPackage.getMainDocumentPart(); //mainPart.addParagraphOfText(content); do{ //String str = ""+objTocken.nextToken()+""; //mainPart.addObject(org.docx4j.XmlUtils.unmarshalString(str)); //mainPart.createParagraphOfText(objTocken.nextToken()); mainPart.addParagraphOfText(objTocken.nextToken()); }while(objTocken.hasMoreTokens()); // write out our word doc to disk // File file = File.createTempFile("wordexport-", ".docx"); org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document)mainPart.getJaxbElement(); String xml = org.docx4j.XmlUtils.marshaltoString(wmlDocumentEl, true); System.out.println(xml); File file = new File(filePath); wordMLPackage.save(file); } }