package ru.simplexsoftware.constructorOfDocuments; import org.docx4j.convert.in.xhtml.XHTMLImporterImpl; import org.docx4j.openpackaging.exceptions.Docx4JException; import org.docx4j.openpackaging.exceptions.InvalidFormatException; import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.openpackaging.parts.WordprocessingML.NumberingDefinitionsPart; import org.junit.Test; import org.junit.runner.RunWith; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import org.w3c.dom.Document; import org.w3c.tidy.Tidy; import org.xml.sax.InputSource; import ru.simplexsoftware.constructorOfDocuments.Utils.DocxUtil; import javax.xml.bind.JAXBException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.sax.SAXTransformerFactory; import javax.xml.transform.stream.StreamResult; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.StringWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.util.Scanner; @RunWith(SpringJUnit4ClassRunner.class) @ContextConfiguration( locations={"classpath:applicationContext.xml"} ) public class MainTest { private static final Logger LOG = LoggerFactory.getLogger(MainTest.class); @Test public void runTest() throws IOException { File file = new File("test.docx"); file.createNewFile(); String html=new String(); FileInputStream fstream = new FileInputStream("C:\\Users\\User\\IdeaProjects\\constructor\\src\\test\\java\\ru\\simplexsoftware\\constructorOfDocuments\\doc.html"); BufferedReader br = new BufferedReader(new InputStreamReader(fstream)); String strLine; while ((strLine = br.readLine()) != null){ html=html+" "+strLine; } InputStream in = new ByteArrayInputStream(createDocx(html)); org.apache.wicket.util.file.Files.writeTo(file, in); } public static byte[] createDocx(String htmlDoc){ WordprocessingMLPackage wordMLPackage = null; try { wordMLPackage = WordprocessingMLPackage.createPackage(); } catch (InvalidFormatException e) { LOG.error(e.getMessage(),e); } NumberingDefinitionsPart ndp = null; try { ndp = new NumberingDefinitionsPart(); } catch (InvalidFormatException e) { LOG.error(e.getMessage(),e); } try { wordMLPackage.getMainDocumentPart().addTargetPart(ndp); } catch (InvalidFormatException e) { LOG.error(e.getMessage(),e); } try { ndp.unmarshalDefaultNumbering(); } catch (JAXBException e) { LOG.error(e.getMessage(),e); } XHTMLImporterImpl xHTMLImporter = new XHTMLImporterImpl(wordMLPackage); xHTMLImporter.setHyperlinkStyle("Hyperlink"); Tidy tidy = new Tidy(); tidy.setInputEncoding("UTF-8"); tidy.setOutputEncoding("UTF-8"); tidy.setRawOut(true); tidy.setXHTML(true); ByteArrayInputStream inputStream = null; try { inputStream = new ByteArrayInputStream(htmlDoc.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { LOG.error(e.getMessage(),e); } ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); tidy.parseDOM(inputStream, outputStream); // Convert the XHTML, and add it into the empty docx we made try { wordMLPackage.getMainDocumentPart().getContent().addAll( xHTMLImporter.convert(new ByteArrayInputStream(outputStream.toByteArray()), null)); } catch (Docx4JException e) { LOG.error(e.getMessage(),e); } ByteArrayOutputStream output = new ByteArrayOutputStream(); try { wordMLPackage.save(output); } catch (Docx4JException e) { LOG.error(e.getMessage(),e); } return output.toByteArray(); } }