| /************************************************************************ |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| ************************************************************************/ |
| package org.odftoolkit.odfdom.incubator.doc.text; |
| |
| import java.util.List; |
| import java.util.logging.Level; |
| import java.util.logging.Logger; |
| import org.odftoolkit.odfdom.doc.OdfDocument; |
| |
| import org.odftoolkit.odfdom.doc.table.OdfTable; |
| import org.odftoolkit.odfdom.doc.table.OdfTableRow; |
| import org.odftoolkit.odfdom.dom.OdfContentDom; |
| import org.odftoolkit.odfdom.dom.OdfMetaDom; |
| import org.odftoolkit.odfdom.dom.OdfStylesDom; |
| import org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement; |
| import org.odftoolkit.odfdom.dom.element.office.OfficeMetaElement; |
| import org.odftoolkit.odfdom.dom.element.style.StyleMasterPageElement; |
| import org.odftoolkit.odfdom.dom.element.table.TableTableElement; |
| import org.odftoolkit.odfdom.dom.element.text.TextAElement; |
| import org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement; |
| import org.odftoolkit.odfdom.pkg.OdfElement; |
| import org.w3c.dom.NodeList; |
| |
| /** |
| * It's a sub class of OdfTextExtractor. It provides a method to return all the text |
| * that the user can typically edit in a document, including text in cotent.xml, |
| * header and footer in styles.xml, meta data in meta.xml. |
| * |
| * <p>This function can be used by search engine, and text analytic operations. </p> |
| * |
| * @deprecated As of release 0.8.8, replaced by {@link org.odftoolkit.simple.common.EditableTextExtractor} in Simple API. |
| */ |
| public class OdfEditableTextExtractor extends OdfTextExtractor { |
| |
| OdfDocument mDocument = null; |
| OdfElement mElement = null; |
| boolean mIsDocumentExtractor = false; |
| |
| /** |
| * Constructor with an ODF document as a parameter |
| * @param doc the ODF document whose editable text would be extracted. |
| */ |
| private OdfEditableTextExtractor(OdfDocument doc) { |
| mTextBuilder = new StringBuilder(); |
| mDocument = doc; |
| mIsDocumentExtractor = true; |
| } |
| |
| /** |
| * Constructor with an ODF element as parameter |
| * @param element the ODF element whose editable text would be extracted. |
| */ |
| private OdfEditableTextExtractor(OdfElement element) { |
| mTextBuilder = new StringBuilder(); |
| mElement = element; |
| mIsDocumentExtractor = false; |
| } |
| |
| /** |
| * An instance of OdfEditableTextExtractor will be created to |
| * extract the editable text content of an ODF element. |
| * @param doc the ODF document whose text will be extracted. |
| * @return An instance of OdfEditableTextExtractor |
| */ |
| public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfDocument doc) { |
| return new OdfEditableTextExtractor(doc); |
| } |
| |
| /** |
| * An instance of OdfEditableTextExtractor will be created to |
| * extract the editable text content of an ODF element. |
| * @param element the ODF element whose text will be extracted. |
| * @return An instance of OdfEditableTextExtractor |
| */ |
| public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfElement element) { |
| return new OdfEditableTextExtractor(element); |
| } |
| |
| /* (non-Javadoc) |
| * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement) |
| */ |
| @Override |
| public void visit(DrawObjectElement element) { |
| String embedDocPath = element.getXlinkHrefAttribute(); |
| OdfDocument embedDoc = ((OdfDocument) (((OdfContentDom) element.getOwnerDocument()).getDocument())).loadSubDocument(embedDocPath); |
| if (embedDoc != null) { |
| try { |
| mTextBuilder.append(OdfEditableTextExtractor.newOdfEditableTextExtractor(embedDoc).getText()); |
| } catch (Exception e) { |
| Logger.getLogger(OdfEditableTextExtractor.class.getName()).log(Level.SEVERE, null, e); |
| } |
| } |
| } |
| |
| /* (non-Javadoc) |
| * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement) |
| */ |
| @Override |
| public void visit(TextTrackedChangesElement ele) { |
| return; |
| } |
| |
| /* (non-Javadoc) |
| * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextAElement) |
| */ |
| @Override |
| public void visit(TextAElement ele) { |
| String link = ele.getXlinkHrefAttribute(); |
| mTextBuilder.append(link); |
| appendElementText(ele); |
| } |
| |
| /* (non-Javadoc) |
| * @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTabElement) |
| */ |
| @Override |
| public void visit(TableTableElement ele) { |
| OdfTable table = OdfTable.getInstance(ele); |
| List<OdfTableRow> rowlist = table.getRowList(); |
| for (int i = 0; i < rowlist.size(); i++) { |
| OdfTableRow row = rowlist.get(i); |
| for (int j = 0; j < row.getCellCount(); j++) { |
| mTextBuilder.append(row.getCellByIndex(j).getDisplayText()).append(TabChar); |
| } |
| mTextBuilder.append(NewLineChar); |
| } |
| } |
| |
| /** |
| * Return the editable text content as a string |
| * @return the editable text content as a string |
| */ |
| @Override |
| public String getText() { |
| if (mIsDocumentExtractor) { |
| return getDocumentText(); |
| } else { |
| visit(mElement); |
| return mTextBuilder.toString(); |
| } |
| } |
| |
| private String getDocumentText() { |
| StringBuilder builder = new StringBuilder(); |
| try { |
| //Extract text from content.xml |
| OdfEditableTextExtractor contentDomExtractor = newOdfEditableTextExtractor(mDocument.getContentRoot()); |
| builder.append(contentDomExtractor.getText()); |
| |
| //Extract text from style.xml |
| OdfStylesDom styleDom = mDocument.getStylesDom(); |
| if (styleDom != null) { |
| StyleMasterPageElement masterpage = null; |
| NodeList list = styleDom.getElementsByTagName("style:master-page"); |
| if (list.getLength() > 0) { |
| masterpage = (StyleMasterPageElement) list.item(0); |
| } |
| if (masterpage != null) { |
| builder.append(newOdfEditableTextExtractor(masterpage).getText()); |
| } |
| } |
| |
| //Extract text from meta.xml |
| OdfMetaDom metaDom = mDocument.getMetaDom(); |
| if (metaDom != null) { |
| OdfElement root = metaDom.getRootElement(); |
| OfficeMetaElement officemeta = OdfElement.findFirstChildNode(OfficeMetaElement.class, root); |
| if (officemeta != null) { |
| builder.append(newOdfEditableTextExtractor(officemeta).getText()); |
| } |
| } |
| |
| return builder.toString(); |
| } catch (Exception e) { |
| Logger.getLogger(OdfEditableTextExtractor.class.getName()).severe(e.getMessage()); |
| return builder.toString(); |
| } |
| } |
| } |