blob: 43800d530e57f6841a81d84c4f13bc3d438d4104 [file] [log] [blame]
/************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
************************************************************************/
package org.odftoolkit.odfdom.incubator.doc.text;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.odftoolkit.odfdom.doc.OdfDocument;
import org.odftoolkit.odfdom.doc.table.OdfTable;
import org.odftoolkit.odfdom.doc.table.OdfTableRow;
import org.odftoolkit.odfdom.dom.OdfContentDom;
import org.odftoolkit.odfdom.dom.OdfMetaDom;
import org.odftoolkit.odfdom.dom.OdfStylesDom;
import org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement;
import org.odftoolkit.odfdom.dom.element.office.OfficeMetaElement;
import org.odftoolkit.odfdom.dom.element.style.StyleMasterPageElement;
import org.odftoolkit.odfdom.dom.element.table.TableTableElement;
import org.odftoolkit.odfdom.dom.element.text.TextAElement;
import org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement;
import org.odftoolkit.odfdom.pkg.OdfElement;
import org.w3c.dom.NodeList;
/**
* It's a sub class of OdfTextExtractor. It provides a method to return all the text
* that the user can typically edit in a document, including text in cotent.xml,
* header and footer in styles.xml, meta data in meta.xml.
*
* <p>This function can be used by search engine, and text analytic operations. </p>
*
* @deprecated As of release 0.8.8, replaced by {@link org.odftoolkit.simple.common.EditableTextExtractor} in Simple API.
*/
public class OdfEditableTextExtractor extends OdfTextExtractor {
OdfDocument mDocument = null;
OdfElement mElement = null;
boolean mIsDocumentExtractor = false;
/**
* Constructor with an ODF document as a parameter
* @param doc the ODF document whose editable text would be extracted.
*/
private OdfEditableTextExtractor(OdfDocument doc) {
mTextBuilder = new StringBuilder();
mDocument = doc;
mIsDocumentExtractor = true;
}
/**
* Constructor with an ODF element as parameter
* @param element the ODF element whose editable text would be extracted.
*/
private OdfEditableTextExtractor(OdfElement element) {
mTextBuilder = new StringBuilder();
mElement = element;
mIsDocumentExtractor = false;
}
/**
* An instance of OdfEditableTextExtractor will be created to
* extract the editable text content of an ODF element.
* @param doc the ODF document whose text will be extracted.
* @return An instance of OdfEditableTextExtractor
*/
public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfDocument doc) {
return new OdfEditableTextExtractor(doc);
}
/**
* An instance of OdfEditableTextExtractor will be created to
* extract the editable text content of an ODF element.
* @param element the ODF element whose text will be extracted.
* @return An instance of OdfEditableTextExtractor
*/
public static OdfEditableTextExtractor newOdfEditableTextExtractor(OdfElement element) {
return new OdfEditableTextExtractor(element);
}
/* (non-Javadoc)
* @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.draw.DrawObjectElement)
*/
@Override
public void visit(DrawObjectElement element) {
String embedDocPath = element.getXlinkHrefAttribute();
OdfDocument embedDoc = ((OdfDocument) (((OdfContentDom) element.getOwnerDocument()).getDocument())).loadSubDocument(embedDocPath);
if (embedDoc != null) {
try {
mTextBuilder.append(OdfEditableTextExtractor.newOdfEditableTextExtractor(embedDoc).getText());
} catch (Exception e) {
Logger.getLogger(OdfEditableTextExtractor.class.getName()).log(Level.SEVERE, null, e);
}
}
}
/* (non-Javadoc)
* @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTrackedChangesElement)
*/
@Override
public void visit(TextTrackedChangesElement ele) {
return;
}
/* (non-Javadoc)
* @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextAElement)
*/
@Override
public void visit(TextAElement ele) {
String link = ele.getXlinkHrefAttribute();
mTextBuilder.append(link);
appendElementText(ele);
}
/* (non-Javadoc)
* @see org.odftoolkit.odfdom.dom.DefaultElementVisitor#visit(org.odftoolkit.odfdom.dom.element.text.TextTabElement)
*/
@Override
public void visit(TableTableElement ele) {
OdfTable table = OdfTable.getInstance(ele);
List<OdfTableRow> rowlist = table.getRowList();
for (int i = 0; i < rowlist.size(); i++) {
OdfTableRow row = rowlist.get(i);
for (int j = 0; j < row.getCellCount(); j++) {
mTextBuilder.append(row.getCellByIndex(j).getDisplayText()).append(TabChar);
}
mTextBuilder.append(NewLineChar);
}
}
/**
* Return the editable text content as a string
* @return the editable text content as a string
*/
@Override
public String getText() {
if (mIsDocumentExtractor) {
return getDocumentText();
} else {
visit(mElement);
return mTextBuilder.toString();
}
}
private String getDocumentText() {
StringBuilder builder = new StringBuilder();
try {
//Extract text from content.xml
OdfEditableTextExtractor contentDomExtractor = newOdfEditableTextExtractor(mDocument.getContentRoot());
builder.append(contentDomExtractor.getText());
//Extract text from style.xml
OdfStylesDom styleDom = mDocument.getStylesDom();
if (styleDom != null) {
StyleMasterPageElement masterpage = null;
NodeList list = styleDom.getElementsByTagName("style:master-page");
if (list.getLength() > 0) {
masterpage = (StyleMasterPageElement) list.item(0);
}
if (masterpage != null) {
builder.append(newOdfEditableTextExtractor(masterpage).getText());
}
}
//Extract text from meta.xml
OdfMetaDom metaDom = mDocument.getMetaDom();
if (metaDom != null) {
OdfElement root = metaDom.getRootElement();
OfficeMetaElement officemeta = OdfElement.findFirstChildNode(OfficeMetaElement.class, root);
if (officemeta != null) {
builder.append(newOdfEditableTextExtractor(officemeta).getText());
}
}
return builder.toString();
} catch (Exception e) {
Logger.getLogger(OdfEditableTextExtractor.class.getName()).severe(e.getMessage());
return builder.toString();
}
}
}