AOO410/main/xmerge/source/xmerge/java/org/openoffice/xmerge/merger/diff/CharacterParser.java - openoffice

blob: 39029e097122b5fa62fd47d4d86cced693faa95f [file] [log] [blame]

	/**************************************************************
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*************************************************************/



	package org.openoffice.xmerge.merger.diff;

	import org.w3c.dom.Node;

	import org.openoffice.xmerge.converter.xml.OfficeConstants;

	import java.util.Vector;
	import java.util.List;


	/**
	* <p>This is a parser to return a character array for difference purpose.
	* It will use depth first search to traverse all the characters inside the
	* text <code>Node</code> under a given <code>Node</code> (most likely to be
	* a paragraph <code>Node</code>).</p>
	*
	* <p>Note: Once the XML Tree is parsed, then the <code>Iterator</code> will be
	* a snap shot of that tree. That means even the tree is modified later, than
	* the cached paragraph <code>Node</code> list will not be updated accordingly.
	* For this reason and for performance reasons this <code>Iterator</code> does
	* not support any operation methods such as insert, remove or replace. The
	* main purpose of this <code>Iterator</code> is to be used with difference,
	* not with merge.</p>
	*
	* @author smak
	*/
	public class CharacterParser {

	private TextNodeIterator textNodes;
	private int currentPosition = 0;
	private List nodeList_ = null;
	private char[] charArray;


	/**
	* Standard constructor.
	*
	* @param node The initial root <code>Node</code>.
	*/
	public CharacterParser(Node node) {
	textNodes = new TextNodeIterator(node);
	nodeList_ = new Vector();

	parseNodes();
	}


	/**
	* Returns the <code>Node</code> pointer with the given character position.
	*
	* @return The <code>Node</code> pointer with the given character position.
	*/
	public List getNodeList() {
	// will go through the nodeList to find the corresponding node
	return nodeList_;
	}

	/**
	* Returns the character array representation of the text.
	*
	* @return The character array representation of the text.
	*/
	public char[] getCharArray() {
	return charArray;
	}

	private void parseNodes() {

	StringBuffer strBuf = new StringBuffer();

	/* create the character array by iterate the textnode iterator */
	Node currentNode = (Node)(textNodes.start());
	for (;
	currentNode != null;
	currentNode = (Node)(textNodes.next())) {

	// add the text value into the array
	String textValue = null;
	String nodeName = currentNode.getNodeName();

	// TODO: Space node have a count attribute which is not handled!
	if (currentNode.getNodeType() == Node.TEXT_NODE) {
	textValue = currentNode.getNodeValue();
	} else if (nodeName.equals(OfficeConstants.TAG_SPACE)) {
	textValue = " ";
	} else if (nodeName.equals(OfficeConstants.TAG_TAB_STOP)) {
	textValue = "\t";
	}

	if (textValue != null) {
	strBuf.append(textValue);
	addNewNodeEntry(textValue.length(), currentNode);
	}
	}

	charArray = strBuf.toString().toCharArray();
	}


	/**
	* Adds a new <code>Node</code> entry.
	*
	* @param textLen The text length.
	* @param node The <code>Node</code>.
	*/
	private void addNewNodeEntry(int textLen, Node node) {

	TextNodeEntry nodeEntry = new TextNodeEntry(currentPosition,
	currentPosition + textLen - 1, node);
	currentPosition = currentPosition + textLen;

	nodeList_.add(nodeEntry);
	}
	}