| 1 | /*
|
|---|
| 2 | * Copyright 2007-2008, Plutext Pty Ltd.
|
|---|
| 3 | *
|
|---|
| 4 | * This file is part of docx4j.
|
|---|
| 5 |
|
|---|
| 6 | docx4j is licensed under the Apache License, Version 2.0 (the "License");
|
|---|
| 7 | you may not use this file except in compliance with the License.
|
|---|
| 8 |
|
|---|
| 9 | You may obtain a copy of the License at
|
|---|
| 10 |
|
|---|
| 11 | http://www.apache.org/licenses/LICENSE-2.0
|
|---|
| 12 |
|
|---|
| 13 | Unless required by applicable law or agreed to in writing, software
|
|---|
| 14 | distributed under the License is distributed on an "AS IS" BASIS,
|
|---|
| 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|---|
| 16 | See the License for the specific language governing permissions and
|
|---|
| 17 | limitations under the License.
|
|---|
| 18 |
|
|---|
| 19 | */
|
|---|
| 20 |
|
|---|
| 21 |
|
|---|
| 22 | package org.docx4j;
|
|---|
| 23 |
|
|---|
| 24 | import java.io.IOException;
|
|---|
| 25 | import java.io.OutputStreamWriter;
|
|---|
| 26 | import java.io.Writer;
|
|---|
| 27 |
|
|---|
| 28 | import javax.xml.bind.JAXBContext;
|
|---|
| 29 | import javax.xml.bind.JAXBElement;
|
|---|
| 30 | import javax.xml.bind.Marshaller;
|
|---|
| 31 | import javax.xml.namespace.QName;
|
|---|
| 32 |
|
|---|
| 33 | import org.apache.log4j.Logger;
|
|---|
| 34 | import org.docx4j.jaxb.Context;
|
|---|
| 35 | import org.docx4j.jaxb.NamespacePrefixMapperUtils;
|
|---|
| 36 | import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
|
|---|
| 37 | import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
|
|---|
| 38 | import org.xml.sax.SAXException;
|
|---|
| 39 | import org.xml.sax.helpers.DefaultHandler;
|
|---|
| 40 |
|
|---|
| 41 | public class TextUtils {
|
|---|
| 42 |
|
|---|
| 43 | private static Logger log = Logger.getLogger(TextUtils.class);
|
|---|
| 44 |
|
|---|
| 45 | /**
|
|---|
| 46 | * Extract contents of descendant <w:t> elements.
|
|---|
| 47 | *
|
|---|
| 48 | * @param o
|
|---|
| 49 | * @param jcSVG JAXBContext
|
|---|
| 50 | * @return
|
|---|
| 51 | */
|
|---|
| 52 | public static void extractText(Object o, Writer w) throws Exception {
|
|---|
| 53 |
|
|---|
| 54 | extractText(o, w, Context.jc);
|
|---|
| 55 | }
|
|---|
| 56 |
|
|---|
| 57 | /**
|
|---|
| 58 | * Extract contents of descendant <w:t> elements.
|
|---|
| 59 | *
|
|---|
| 60 | * @param o
|
|---|
| 61 | * @param jc JAXBContext
|
|---|
| 62 | * @return
|
|---|
| 63 | */
|
|---|
| 64 | public static void extractText(Object o, Writer w, JAXBContext jc) throws Exception {
|
|---|
| 65 |
|
|---|
| 66 | Marshaller marshaller=jc.createMarshaller();
|
|---|
| 67 | NamespacePrefixMapperUtils.setProperty(marshaller,
|
|---|
| 68 | NamespacePrefixMapperUtils.getPrefixMapper());
|
|---|
| 69 | marshaller.marshal(o, new TextExtractor(w));
|
|---|
| 70 |
|
|---|
| 71 | }
|
|---|
| 72 |
|
|---|
| 73 | /**
|
|---|
| 74 | * Extract contents of descendant <w:t> elements.
|
|---|
| 75 | * Use this for objects which don't have @XmlRootElement
|
|---|
| 76 | *
|
|---|
| 77 | * @param o
|
|---|
| 78 | * @param w
|
|---|
| 79 | * @param jc
|
|---|
| 80 | * @param uri
|
|---|
| 81 | * @param local
|
|---|
| 82 | * @param declaredType
|
|---|
| 83 | * @throws Exception
|
|---|
| 84 | */
|
|---|
| 85 | public static void extractText(Object o, Writer w, JAXBContext jc,
|
|---|
| 86 | String uri, String local, Class declaredType) throws Exception {
|
|---|
| 87 |
|
|---|
| 88 | Marshaller marshaller=jc.createMarshaller();
|
|---|
| 89 | NamespacePrefixMapperUtils.setProperty(marshaller,
|
|---|
| 90 | NamespacePrefixMapperUtils.getPrefixMapper());
|
|---|
| 91 | marshaller.marshal(
|
|---|
| 92 | new JAXBElement(new QName(uri,local), declaredType, o ),
|
|---|
| 93 | new TextExtractor(w));
|
|---|
| 94 | }
|
|---|
| 95 |
|
|---|
| 96 |
|
|---|
| 97 | /**
|
|---|
| 98 | * A SAX ContentHandler that writes all #PCDATA onto a java.io.Writer
|
|---|
| 99 | *
|
|---|
| 100 | * From http://www.cafeconleche.org/books/xmljava/chapters/ch06s03.html
|
|---|
| 101 | *
|
|---|
| 102 | */
|
|---|
| 103 | static class TextExtractor extends DefaultHandler {
|
|---|
| 104 |
|
|---|
| 105 | private Writer out;
|
|---|
| 106 |
|
|---|
| 107 | public TextExtractor(Writer out) {
|
|---|
| 108 | this.out = out;
|
|---|
| 109 | }
|
|---|
| 110 |
|
|---|
| 111 | public void characters(char[] text, int start, int length)
|
|---|
| 112 | throws SAXException {
|
|---|
| 113 |
|
|---|
| 114 | try {
|
|---|
| 115 | out.write(text, start, length);
|
|---|
| 116 | }
|
|---|
| 117 | catch (IOException e) {
|
|---|
| 118 | throw new SAXException(e);
|
|---|
| 119 | }
|
|---|
| 120 |
|
|---|
| 121 | }
|
|---|
| 122 |
|
|---|
| 123 | } // end TextExtractor
|
|---|
| 124 |
|
|---|
| 125 | public static void main(String[] args) throws Exception {
|
|---|
| 126 |
|
|---|
| 127 | String inputfilepath = System.getProperty("user.dir") + "/sample-docs/Table.docx";
|
|---|
| 128 | //String inputfilepath = System.getProperty("user.dir") + "/sample-docs/Word2007-fonts.docx";
|
|---|
| 129 |
|
|---|
| 130 | WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File(inputfilepath));
|
|---|
| 131 | MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
|
|---|
| 132 |
|
|---|
| 133 | org.docx4j.wml.Document wmlDocumentEl = (org.docx4j.wml.Document)documentPart.getJaxbElement();
|
|---|
| 134 |
|
|---|
| 135 | Writer out = new OutputStreamWriter(System.out);
|
|---|
| 136 |
|
|---|
| 137 | extractText(wmlDocumentEl, out);
|
|---|
| 138 |
|
|---|
| 139 | //out.flush();
|
|---|
| 140 | out.close();
|
|---|
| 141 |
|
|---|
| 142 |
|
|---|
| 143 | }
|
|---|
| 144 |
|
|---|
| 145 | }
|
|---|
| 146 |
|
|---|