|  | /* | 
|  | * Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | * contributor license agreements.  See the NOTICE file distributed with | 
|  | * this work for additional information regarding copyright ownership. | 
|  | * The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | * (the "License"); you may not use this file except in compliance with | 
|  | * the License.  You may obtain a copy of the License at | 
|  | * | 
|  | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, software | 
|  | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | * See the License for the specific language governing permissions and | 
|  | * limitations under the License. | 
|  | */ | 
|  |  | 
|  | package org.apache.html.dom; | 
|  |  | 
|  | import java.io.Serializable; | 
|  |  | 
|  | import org.w3c.dom.Element; | 
|  | import org.w3c.dom.Node; | 
|  | import org.w3c.dom.html.HTMLAnchorElement; | 
|  | import org.w3c.dom.html.HTMLAppletElement; | 
|  | import org.w3c.dom.html.HTMLAreaElement; | 
|  | import org.w3c.dom.html.HTMLCollection; | 
|  | import org.w3c.dom.html.HTMLElement; | 
|  | import org.w3c.dom.html.HTMLFormElement; | 
|  | import org.w3c.dom.html.HTMLImageElement; | 
|  | import org.w3c.dom.html.HTMLObjectElement; | 
|  | import org.w3c.dom.html.HTMLOptionElement; | 
|  | import org.w3c.dom.html.HTMLTableCellElement; | 
|  | import org.w3c.dom.html.HTMLTableRowElement; | 
|  | import org.w3c.dom.html.HTMLTableSectionElement; | 
|  |  | 
|  | /** | 
|  | * Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named | 
|  | * elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to | 
|  | * look for is identified in the constructor by code. This collection is not | 
|  | * optimized for traversing large trees. | 
|  | * <p> | 
|  | * The collection has to meet two requirements: it has to be live, and it has | 
|  | * to traverse depth first and always return results in that order. As such, | 
|  | * using an object container (such as {@link java.util.Vector}) is expensive on | 
|  | * insert/remove operations. Instead, the collection has been implemented using | 
|  | * three traversing functions. As a result, operations on large documents will | 
|  | * result in traversal of the entire document tree and consume a considerable | 
|  | * amount of time. | 
|  | * <p> | 
|  | * Note that synchronization on the traversed document cannot be achieved. | 
|  | * The document itself cannot be locked, and locking each traversed node is | 
|  | * likely to lead to a dead lock condition. Therefore, there is a chance of the | 
|  | * document being changed as results are fetched; in all likelihood, the results | 
|  | * might be out dated, but not erroneous. | 
|  | * | 
|  | * @xerces.internal | 
|  | * | 
|  | * @version $Revision$ $Date$ | 
|  | * @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a> | 
|  | * @see org.w3c.dom.html.HTMLCollection | 
|  | */ | 
|  | class HTMLCollectionImpl | 
|  | implements HTMLCollection, Serializable | 
|  | { | 
|  |  | 
|  | private static final long serialVersionUID = 9112122196669185082L; | 
|  |  | 
|  | /** | 
|  | * Request collection of all anchors in document: <A> elements that | 
|  | * have a <code>name</code> attribute. | 
|  | */ | 
|  | static final short        ANCHOR = 1; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all forms in document: <FORM> elements. | 
|  | */ | 
|  | static final short        FORM = 2; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all images in document: <IMG> elements. | 
|  | */ | 
|  | static final short        IMAGE = 3; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all Applets in document: <APPLET> and | 
|  | * <OBJECT> elements (<OBJECT> must contain an Applet). | 
|  | */ | 
|  | static final short        APPLET = 4; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all links in document: <A> and <AREA> | 
|  | * elements (must have a <code>href</code> attribute). | 
|  | */ | 
|  | static final short        LINK = 5; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all options in selection: <OPTION> elements in | 
|  | * <SELECT> or <OPTGROUP>. | 
|  | */ | 
|  | static final short        OPTION = 6; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all rows in table: <TR> elements in table or | 
|  | * table section. | 
|  | */ | 
|  | static final short        ROW = 7; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all form elements: <INPUT>, <BUTTON>, | 
|  | * <SELECT>, and <TEXTAREA> elements inside form <FORM>. | 
|  | */ | 
|  | static final short        ELEMENT = 8; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all areas in map: <AREA> element in <MAP> | 
|  | * (non recursive). | 
|  | */ | 
|  | static final short        AREA = -1; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all table bodies in table: <TBODY> element in | 
|  | * table <TABLE> (non recursive). | 
|  | */ | 
|  | static final short        TBODY = -2; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Request collection of all cells in row: <TD> and <TH> | 
|  | * elements in <TR> (non recursive). | 
|  | */ | 
|  | static final short        CELL = -3; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Indicates what this collection is looking for. Holds one of the enumerated | 
|  | * values and used by {@link #collectionMatch}. Set by the constructor and | 
|  | * determine the collection's use for its life time. | 
|  | */ | 
|  | private short            _lookingFor; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * This is the top level element underneath which the collection exists. | 
|  | */ | 
|  | private Element            _topLevel; | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Construct a new collection that retrieves element of the specific type | 
|  | * (<code>lookingFor</code>) from the specific document portion | 
|  | * (<code>topLevel</code>). | 
|  | * | 
|  | * @param topLevel The element underneath which the collection exists | 
|  | * @param lookingFor Code indicating what elements to look for | 
|  | */ | 
|  | HTMLCollectionImpl( HTMLElement topLevel, short lookingFor ) | 
|  | { | 
|  | if ( topLevel == null ) | 
|  | throw new NullPointerException( "HTM011 Argument 'topLevel' is null." ); | 
|  | _topLevel = topLevel; | 
|  | _lookingFor = lookingFor; | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Returns the length of the collection. This method might traverse the | 
|  | * entire document tree. | 
|  | * | 
|  | * @return Length of the collection | 
|  | */ | 
|  | public final int getLength() | 
|  | { | 
|  | // Call recursive function on top-level element. | 
|  | return getLength( _topLevel ); | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Retrieves the indexed node from the collection. Nodes are numbered in | 
|  | * tree order - depth-first traversal order. This method might traverse | 
|  | * the entire document tree. | 
|  | * | 
|  | * @param index The index of the node to return | 
|  | * @return The specified node or null if no such node found | 
|  | */ | 
|  | public final Node item( int index ) | 
|  | { | 
|  | if ( index < 0 ) | 
|  | throw new IllegalArgumentException( "HTM012 Argument 'index' is negative." ); | 
|  | // Call recursive function on top-level element. | 
|  | return item( _topLevel, new CollectionIndex( index ) ); | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Retrieves the named node from the collection. The name is matched case | 
|  | * sensitive against the <TT>id</TT> attribute of each element in the | 
|  | * collection, returning the first match. The tree is traversed in | 
|  | * depth-first order. This method might traverse the entire document tree. | 
|  | * | 
|  | * @param name The name of the node to return | 
|  | * @return The specified node or null if no such node found | 
|  | */ | 
|  | public final Node namedItem( String name ) | 
|  | { | 
|  | if ( name == null ) | 
|  | throw new NullPointerException( "HTM013 Argument 'name' is null." ); | 
|  | // Call recursive function on top-level element. | 
|  | return namedItem( _topLevel, name ); | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Recursive function returns the number of elements of a particular type | 
|  | * that exist under the top level element. This is a recursive function | 
|  | * and the top level element is passed along. | 
|  | * | 
|  | * @param topLevel Top level element from which to scan | 
|  | * @return Number of elements | 
|  | */ | 
|  | private int getLength( Element topLevel ) | 
|  | { | 
|  | int        length; | 
|  | Node    node; | 
|  |  | 
|  | synchronized ( topLevel ) | 
|  | { | 
|  | // Always count from zero and traverse all the childs of the | 
|  | // current element in the order they appear. | 
|  | length = 0; | 
|  | node = topLevel.getFirstChild(); | 
|  | while ( node != null ) | 
|  | { | 
|  | // If a particular node is an element (could be HTML or XML), | 
|  | // do two things: if it's the one we're looking for, count | 
|  | // another matched element; at any rate, traverse it's | 
|  | // children as well. | 
|  | if ( node instanceof Element ) | 
|  | { | 
|  | if ( collectionMatch( (Element) node, null ) ) | 
|  | ++ length; | 
|  | else if ( recurse() ) | 
|  | length += getLength( (Element) node ); | 
|  | } | 
|  | node = node.getNextSibling(); | 
|  | } | 
|  | } | 
|  | return length; | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Recursive function returns the numbered element of a particular type | 
|  | * that exist under the top level element. This is a recursive function | 
|  | * and the top level element is passed along. | 
|  | * <p> | 
|  | * Note that this function must call itself with an index and get back both | 
|  | * the element (if one was found) and the new index which is decremeneted | 
|  | * for any like element found. Since integers are only passed by value, | 
|  | * this function makes use of a separate class ({@link CollectionIndex}) | 
|  | * to hold that index. | 
|  | * | 
|  | * @param topLevel Top level element from which to scan | 
|  | * @param index The index of the item to retreive | 
|  | * @return Number of elements | 
|  | * @see CollectionIndex | 
|  | */ | 
|  | private Node item( Element topLevel, CollectionIndex index ) | 
|  | { | 
|  | Node    node; | 
|  | Node    result; | 
|  |  | 
|  | synchronized ( topLevel ) | 
|  | { | 
|  | // Traverse all the childs of the current element in the order | 
|  | // they appear. Count from the index backwards until you reach | 
|  | // matching element with an index of zero. Return that element. | 
|  | node = topLevel.getFirstChild(); | 
|  | while ( node != null ) | 
|  | { | 
|  | // If a particular node is an element (could be HTML or XML), | 
|  | // do two things: if it's the one we're looking for, decrease | 
|  | // the index and if zero, return this node; at any rate, | 
|  | // traverse it's children as well. | 
|  | if ( node instanceof Element ) | 
|  | { | 
|  | if ( collectionMatch( (Element) node, null ) ) | 
|  | { | 
|  | if ( index.isZero() ) | 
|  | return node; | 
|  | index.decrement(); | 
|  | } else if ( recurse() ) | 
|  | { | 
|  | result = item( (Element) node, index ); | 
|  | if ( result != null ) | 
|  | return result; | 
|  | } | 
|  | } | 
|  | node = node.getNextSibling(); | 
|  | } | 
|  | } | 
|  | return null; | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Recursive function returns an element of a particular type with the | 
|  | * specified name (<TT>id</TT> attribute). | 
|  | * | 
|  | * @param topLevel Top level element from which to scan | 
|  | * @param name The named element to look for | 
|  | * @return The first named element found | 
|  | */ | 
|  | private  Node namedItem( Element topLevel, String name ) | 
|  | { | 
|  | Node    node; | 
|  | Node    result; | 
|  |  | 
|  | synchronized ( topLevel ) | 
|  | { | 
|  | // Traverse all the childs of the current element in the order | 
|  | // they appear. | 
|  | node = topLevel.getFirstChild(); | 
|  | while ( node != null ) | 
|  | { | 
|  | // If a particular node is an element (could be HTML or XML), | 
|  | // do two things: if it's the one we're looking for, and the | 
|  | // name (id attribute) attribute is the one we're looking for, | 
|  | // return this element; otherwise, traverse it's children. | 
|  | if ( node instanceof Element ) | 
|  | { | 
|  | if ( collectionMatch( (Element) node, name ) ) | 
|  | return node; | 
|  | else if ( recurse() ) | 
|  | { | 
|  | result = namedItem( (Element) node, name ); | 
|  | if ( result != null ) | 
|  | return result; | 
|  | } | 
|  | } | 
|  | node = node.getNextSibling(); | 
|  | } | 
|  | return node; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Returns true if scanning methods should iterate through the collection. | 
|  | * When looking for elements in the document, recursing is needed to traverse | 
|  | * the full document tree. When looking inside a specific element (e.g. for a | 
|  | * cell inside a row), recursing can lead to erroneous results. | 
|  | * | 
|  | * @return True if methods should recurse to traverse entire tree | 
|  | */ | 
|  | protected boolean recurse() | 
|  | { | 
|  | return _lookingFor > 0; | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Determines if current element matches based on what we're looking for. | 
|  | * The element is passed along with an optional identifier name. If the | 
|  | * element is the one we're looking for, return true. If the name is also | 
|  | * specified, the name must match the <code>id</code> attribute | 
|  | * (match <code>name</code> first for anchors). | 
|  | * | 
|  | * @param elem The current element | 
|  | * @param name The identifier name or null | 
|  | * @return The element matches what we're looking for | 
|  | */ | 
|  | protected boolean collectionMatch( Element elem, String name ) | 
|  | { | 
|  | boolean    match; | 
|  |  | 
|  | synchronized ( elem ) | 
|  | { | 
|  | // Begin with no matching. Depending on what we're looking for, | 
|  | // attempt to match based on the element type. This is the quickest | 
|  | // way to match involving only a cast. Do the expensive string | 
|  | // comparison later on. | 
|  | match = false; | 
|  | switch ( _lookingFor ) | 
|  | { | 
|  | case ANCHOR: | 
|  | // Anchor is an <A> element with a 'name' attribute. Otherwise, it's | 
|  | // just a link. | 
|  | match = ( elem instanceof HTMLAnchorElement ) && | 
|  | elem.getAttribute( "name" ).length() > 0; | 
|  | break; | 
|  | case FORM: | 
|  | // Any <FORM> element. | 
|  | match = ( elem instanceof HTMLFormElement ); | 
|  | break; | 
|  | case IMAGE: | 
|  | // Any <IMG> element. <OBJECT> elements with images are not returned. | 
|  | match = ( elem instanceof HTMLImageElement ); | 
|  | break; | 
|  | case APPLET: | 
|  | // Any <APPLET> element, and any <OBJECT> element which represents an | 
|  | // Applet. This is determined by 'codetype' attribute being | 
|  | // 'application/java' or 'classid' attribute starting with 'java:'. | 
|  | match = ( elem instanceof HTMLAppletElement ) || | 
|  | ( elem instanceof HTMLObjectElement && | 
|  | ( "application/java".equals( elem.getAttribute( "codetype" ) ) || | 
|  | elem.getAttribute( "classid" ).startsWith( "java:" ) ) ); | 
|  | break; | 
|  | case ELEMENT: | 
|  | // All form elements implement HTMLFormControl for easy identification. | 
|  | match = ( elem instanceof HTMLFormControl ); | 
|  | break; | 
|  | case LINK: | 
|  | // Any <A> element, and any <AREA> elements with an 'href' attribute. | 
|  | match = ( ( elem instanceof HTMLAnchorElement || | 
|  | elem instanceof HTMLAreaElement ) && | 
|  | elem.getAttribute( "href" ).length() > 0 ); | 
|  | break; | 
|  | case AREA: | 
|  | // Any <AREA> element. | 
|  | match = ( elem instanceof HTMLAreaElement ); | 
|  | break; | 
|  | case OPTION: | 
|  | // Any <OPTION> element. | 
|  | match = ( elem instanceof HTMLOptionElement ); | 
|  | break; | 
|  | case ROW: | 
|  | // Any <TR> element. | 
|  | match = ( elem instanceof HTMLTableRowElement ); | 
|  | break; | 
|  | case TBODY: | 
|  | // Any <TBODY> element (one of three table section types). | 
|  | match = ( elem instanceof HTMLTableSectionElement && | 
|  | elem.getTagName().equals( "TBODY" ) ); | 
|  | break; | 
|  | case CELL: | 
|  | // Any <TD> or <TH> element. | 
|  | match = ( elem instanceof HTMLTableCellElement ); | 
|  | break; | 
|  | } | 
|  |  | 
|  | // If element type was matched and a name was specified, must also match | 
|  | // the name against either the 'id' or the 'name' attribute. The 'name' | 
|  | // attribute is relevant only for <A> elements for backward compatibility. | 
|  | if ( match && name != null ) | 
|  | { | 
|  | // If an anchor and 'name' attribute matches, return true. Otherwise, | 
|  | // try 'id' attribute. | 
|  | if ( elem instanceof HTMLAnchorElement && | 
|  | name.equals( elem.getAttribute( "name" ) ) ) | 
|  | return true; | 
|  | match = name.equals( elem.getAttribute( "id" ) ); | 
|  | } | 
|  | } | 
|  | return match; | 
|  | } | 
|  |  | 
|  |  | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * {@link CollectionImpl#item} must traverse down the tree and decrement the | 
|  | * index until it matches an element who's index is zero. Since integers are | 
|  | * passed by value, this class servers to pass the index into each recursion | 
|  | * by reference. It encompasses all the operations that need be performed on | 
|  | * the index, although direct access is possible. | 
|  | * | 
|  | * @xerces.internal | 
|  | * | 
|  | * @see CollectionImpl#item | 
|  | */ | 
|  | class CollectionIndex | 
|  | { | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Returns the current index. | 
|  | * | 
|  | * @return Current index | 
|  | */ | 
|  | int getIndex() | 
|  | { | 
|  | return _index; | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Decrements the index by one. | 
|  | */ | 
|  | void decrement() | 
|  | { | 
|  | -- _index; | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Returns true if index is zero (or negative). | 
|  | * | 
|  | * @return True if index is zero | 
|  | */ | 
|  | boolean isZero() | 
|  | { | 
|  | return _index <= 0; | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Constructs a new index with the specified initial value. The index will | 
|  | * then be decremeneted until it reaches zero. | 
|  | * | 
|  | * @param index The initial value | 
|  | */ | 
|  | CollectionIndex( int index ) | 
|  | { | 
|  | _index = index; | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Holds the actual value that is passed by reference using this class. | 
|  | */ | 
|  | private int        _index; | 
|  |  | 
|  |  | 
|  | } |