| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.html.dom; |
| |
| import java.io.Serializable; |
| |
| import org.w3c.dom.Element; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.html.HTMLAnchorElement; |
| import org.w3c.dom.html.HTMLAppletElement; |
| import org.w3c.dom.html.HTMLAreaElement; |
| import org.w3c.dom.html.HTMLCollection; |
| import org.w3c.dom.html.HTMLElement; |
| import org.w3c.dom.html.HTMLFormElement; |
| import org.w3c.dom.html.HTMLImageElement; |
| import org.w3c.dom.html.HTMLObjectElement; |
| import org.w3c.dom.html.HTMLOptionElement; |
| import org.w3c.dom.html.HTMLTableCellElement; |
| import org.w3c.dom.html.HTMLTableRowElement; |
| import org.w3c.dom.html.HTMLTableSectionElement; |
| |
| /** |
| * Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named |
| * elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to |
| * look for is identified in the constructor by code. This collection is not |
| * optimized for traversing large trees. |
| * <p> |
| * The collection has to meet two requirements: it has to be live, and it has |
| * to traverse depth first and always return results in that order. As such, |
| * using an object container (such as {@link java.util.Vector}) is expensive on |
| * insert/remove operations. Instead, the collection has been implemented using |
| * three traversing functions. As a result, operations on large documents will |
| * result in traversal of the entire document tree and consume a considerable |
| * amount of time. |
| * <p> |
| * Note that synchronization on the traversed document cannot be achieved. |
| * The document itself cannot be locked, and locking each traversed node is |
| * likely to lead to a dead lock condition. Therefore, there is a chance of the |
| * document being changed as results are fetched; in all likelihood, the results |
| * might be out dated, but not erroneous. |
| * |
| * @xerces.internal |
| * |
| * @version $Revision$ $Date$ |
| * @author <a href="mailto:arkin@exoffice.com">Assaf Arkin</a> |
| * @see org.w3c.dom.html.HTMLCollection |
| */ |
| class HTMLCollectionImpl |
| implements HTMLCollection, Serializable |
| { |
| |
| private static final long serialVersionUID = 9112122196669185082L; |
| |
| /** |
| * Request collection of all anchors in document: <A> elements that |
| * have a <code>name</code> attribute. |
| */ |
| static final short ANCHOR = 1; |
| |
| |
| /** |
| * Request collection of all forms in document: <FORM> elements. |
| */ |
| static final short FORM = 2; |
| |
| |
| /** |
| * Request collection of all images in document: <IMG> elements. |
| */ |
| static final short IMAGE = 3; |
| |
| |
| /** |
| * Request collection of all Applets in document: <APPLET> and |
| * <OBJECT> elements (<OBJECT> must contain an Applet). |
| */ |
| static final short APPLET = 4; |
| |
| |
| /** |
| * Request collection of all links in document: <A> and <AREA> |
| * elements (must have a <code>href</code> attribute). |
| */ |
| static final short LINK = 5; |
| |
| |
| /** |
| * Request collection of all options in selection: <OPTION> elements in |
| * <SELECT> or <OPTGROUP>. |
| */ |
| static final short OPTION = 6; |
| |
| |
| /** |
| * Request collection of all rows in table: <TR> elements in table or |
| * table section. |
| */ |
| static final short ROW = 7; |
| |
| |
| /** |
| * Request collection of all form elements: <INPUT>, <BUTTON>, |
| * <SELECT>, and <TEXTAREA> elements inside form <FORM>. |
| */ |
| static final short ELEMENT = 8; |
| |
| |
| /** |
| * Request collection of all areas in map: <AREA> element in <MAP> |
| * (non recursive). |
| */ |
| static final short AREA = -1; |
| |
| |
| /** |
| * Request collection of all table bodies in table: <TBODY> element in |
| * table <TABLE> (non recursive). |
| */ |
| static final short TBODY = -2; |
| |
| |
| /** |
| * Request collection of all cells in row: <TD> and <TH> |
| * elements in <TR> (non recursive). |
| */ |
| static final short CELL = -3; |
| |
| |
| /** |
| * Indicates what this collection is looking for. Holds one of the enumerated |
| * values and used by {@link #collectionMatch}. Set by the constructor and |
| * determine the collection's use for its life time. |
| */ |
| private short _lookingFor; |
| |
| |
| /** |
| * This is the top level element underneath which the collection exists. |
| */ |
| private Element _topLevel; |
| |
| |
| /** |
| * Construct a new collection that retrieves element of the specific type |
| * (<code>lookingFor</code>) from the specific document portion |
| * (<code>topLevel</code>). |
| * |
| * @param topLevel The element underneath which the collection exists |
| * @param lookingFor Code indicating what elements to look for |
| */ |
| HTMLCollectionImpl( HTMLElement topLevel, short lookingFor ) |
| { |
| if ( topLevel == null ) |
| throw new NullPointerException( "HTM011 Argument 'topLevel' is null." ); |
| _topLevel = topLevel; |
| _lookingFor = lookingFor; |
| } |
| |
| |
| /** |
| * Returns the length of the collection. This method might traverse the |
| * entire document tree. |
| * |
| * @return Length of the collection |
| */ |
| public final int getLength() |
| { |
| // Call recursive function on top-level element. |
| return getLength( _topLevel ); |
| } |
| |
| |
| /** |
| * Retrieves the indexed node from the collection. Nodes are numbered in |
| * tree order - depth-first traversal order. This method might traverse |
| * the entire document tree. |
| * |
| * @param index The index of the node to return |
| * @return The specified node or null if no such node found |
| */ |
| public final Node item( int index ) |
| { |
| if ( index < 0 ) |
| throw new IllegalArgumentException( "HTM012 Argument 'index' is negative." ); |
| // Call recursive function on top-level element. |
| return item( _topLevel, new CollectionIndex( index ) ); |
| } |
| |
| |
| /** |
| * Retrieves the named node from the collection. The name is matched case |
| * sensitive against the <TT>id</TT> attribute of each element in the |
| * collection, returning the first match. The tree is traversed in |
| * depth-first order. This method might traverse the entire document tree. |
| * |
| * @param name The name of the node to return |
| * @return The specified node or null if no such node found |
| */ |
| public final Node namedItem( String name ) |
| { |
| if ( name == null ) |
| throw new NullPointerException( "HTM013 Argument 'name' is null." ); |
| // Call recursive function on top-level element. |
| return namedItem( _topLevel, name ); |
| } |
| |
| |
| /** |
| * Recursive function returns the number of elements of a particular type |
| * that exist under the top level element. This is a recursive function |
| * and the top level element is passed along. |
| * |
| * @param topLevel Top level element from which to scan |
| * @return Number of elements |
| */ |
| private int getLength( Element topLevel ) |
| { |
| int length; |
| Node node; |
| |
| synchronized ( topLevel ) |
| { |
| // Always count from zero and traverse all the childs of the |
| // current element in the order they appear. |
| length = 0; |
| node = topLevel.getFirstChild(); |
| while ( node != null ) |
| { |
| // If a particular node is an element (could be HTML or XML), |
| // do two things: if it's the one we're looking for, count |
| // another matched element; at any rate, traverse it's |
| // children as well. |
| if ( node instanceof Element ) |
| { |
| if ( collectionMatch( (Element) node, null ) ) |
| ++ length; |
| else if ( recurse() ) |
| length += getLength( (Element) node ); |
| } |
| node = node.getNextSibling(); |
| } |
| } |
| return length; |
| } |
| |
| |
| /** |
| * Recursive function returns the numbered element of a particular type |
| * that exist under the top level element. This is a recursive function |
| * and the top level element is passed along. |
| * <p> |
| * Note that this function must call itself with an index and get back both |
| * the element (if one was found) and the new index which is decremeneted |
| * for any like element found. Since integers are only passed by value, |
| * this function makes use of a separate class ({@link CollectionIndex}) |
| * to hold that index. |
| * |
| * @param topLevel Top level element from which to scan |
| * @param index The index of the item to retreive |
| * @return Number of elements |
| * @see CollectionIndex |
| */ |
| private Node item( Element topLevel, CollectionIndex index ) |
| { |
| Node node; |
| Node result; |
| |
| synchronized ( topLevel ) |
| { |
| // Traverse all the childs of the current element in the order |
| // they appear. Count from the index backwards until you reach |
| // matching element with an index of zero. Return that element. |
| node = topLevel.getFirstChild(); |
| while ( node != null ) |
| { |
| // If a particular node is an element (could be HTML or XML), |
| // do two things: if it's the one we're looking for, decrease |
| // the index and if zero, return this node; at any rate, |
| // traverse it's children as well. |
| if ( node instanceof Element ) |
| { |
| if ( collectionMatch( (Element) node, null ) ) |
| { |
| if ( index.isZero() ) |
| return node; |
| index.decrement(); |
| } else if ( recurse() ) |
| { |
| result = item( (Element) node, index ); |
| if ( result != null ) |
| return result; |
| } |
| } |
| node = node.getNextSibling(); |
| } |
| } |
| return null; |
| } |
| |
| |
| /** |
| * Recursive function returns an element of a particular type with the |
| * specified name (<TT>id</TT> attribute). |
| * |
| * @param topLevel Top level element from which to scan |
| * @param name The named element to look for |
| * @return The first named element found |
| */ |
| private Node namedItem( Element topLevel, String name ) |
| { |
| Node node; |
| Node result; |
| |
| synchronized ( topLevel ) |
| { |
| // Traverse all the childs of the current element in the order |
| // they appear. |
| node = topLevel.getFirstChild(); |
| while ( node != null ) |
| { |
| // If a particular node is an element (could be HTML or XML), |
| // do two things: if it's the one we're looking for, and the |
| // name (id attribute) attribute is the one we're looking for, |
| // return this element; otherwise, traverse it's children. |
| if ( node instanceof Element ) |
| { |
| if ( collectionMatch( (Element) node, name ) ) |
| return node; |
| else if ( recurse() ) |
| { |
| result = namedItem( (Element) node, name ); |
| if ( result != null ) |
| return result; |
| } |
| } |
| node = node.getNextSibling(); |
| } |
| return node; |
| } |
| } |
| |
| |
| /** |
| * Returns true if scanning methods should iterate through the collection. |
| * When looking for elements in the document, recursing is needed to traverse |
| * the full document tree. When looking inside a specific element (e.g. for a |
| * cell inside a row), recursing can lead to erroneous results. |
| * |
| * @return True if methods should recurse to traverse entire tree |
| */ |
| protected boolean recurse() |
| { |
| return _lookingFor > 0; |
| } |
| |
| |
| /** |
| * Determines if current element matches based on what we're looking for. |
| * The element is passed along with an optional identifier name. If the |
| * element is the one we're looking for, return true. If the name is also |
| * specified, the name must match the <code>id</code> attribute |
| * (match <code>name</code> first for anchors). |
| * |
| * @param elem The current element |
| * @param name The identifier name or null |
| * @return The element matches what we're looking for |
| */ |
| protected boolean collectionMatch( Element elem, String name ) |
| { |
| boolean match; |
| |
| synchronized ( elem ) |
| { |
| // Begin with no matching. Depending on what we're looking for, |
| // attempt to match based on the element type. This is the quickest |
| // way to match involving only a cast. Do the expensive string |
| // comparison later on. |
| match = false; |
| switch ( _lookingFor ) |
| { |
| case ANCHOR: |
| // Anchor is an <A> element with a 'name' attribute. Otherwise, it's |
| // just a link. |
| match = ( elem instanceof HTMLAnchorElement ) && |
| elem.getAttribute( "name" ).length() > 0; |
| break; |
| case FORM: |
| // Any <FORM> element. |
| match = ( elem instanceof HTMLFormElement ); |
| break; |
| case IMAGE: |
| // Any <IMG> element. <OBJECT> elements with images are not returned. |
| match = ( elem instanceof HTMLImageElement ); |
| break; |
| case APPLET: |
| // Any <APPLET> element, and any <OBJECT> element which represents an |
| // Applet. This is determined by 'codetype' attribute being |
| // 'application/java' or 'classid' attribute starting with 'java:'. |
| match = ( elem instanceof HTMLAppletElement ) || |
| ( elem instanceof HTMLObjectElement && |
| ( "application/java".equals( elem.getAttribute( "codetype" ) ) || |
| elem.getAttribute( "classid" ).startsWith( "java:" ) ) ); |
| break; |
| case ELEMENT: |
| // All form elements implement HTMLFormControl for easy identification. |
| match = ( elem instanceof HTMLFormControl ); |
| break; |
| case LINK: |
| // Any <A> element, and any <AREA> elements with an 'href' attribute. |
| match = ( ( elem instanceof HTMLAnchorElement || |
| elem instanceof HTMLAreaElement ) && |
| elem.getAttribute( "href" ).length() > 0 ); |
| break; |
| case AREA: |
| // Any <AREA> element. |
| match = ( elem instanceof HTMLAreaElement ); |
| break; |
| case OPTION: |
| // Any <OPTION> element. |
| match = ( elem instanceof HTMLOptionElement ); |
| break; |
| case ROW: |
| // Any <TR> element. |
| match = ( elem instanceof HTMLTableRowElement ); |
| break; |
| case TBODY: |
| // Any <TBODY> element (one of three table section types). |
| match = ( elem instanceof HTMLTableSectionElement && |
| elem.getTagName().equals( "TBODY" ) ); |
| break; |
| case CELL: |
| // Any <TD> or <TH> element. |
| match = ( elem instanceof HTMLTableCellElement ); |
| break; |
| } |
| |
| // If element type was matched and a name was specified, must also match |
| // the name against either the 'id' or the 'name' attribute. The 'name' |
| // attribute is relevant only for <A> elements for backward compatibility. |
| if ( match && name != null ) |
| { |
| // If an anchor and 'name' attribute matches, return true. Otherwise, |
| // try 'id' attribute. |
| if ( elem instanceof HTMLAnchorElement && |
| name.equals( elem.getAttribute( "name" ) ) ) |
| return true; |
| match = name.equals( elem.getAttribute( "id" ) ); |
| } |
| } |
| return match; |
| } |
| |
| |
| } |
| |
| |
| /** |
| * {@link CollectionImpl#item} must traverse down the tree and decrement the |
| * index until it matches an element who's index is zero. Since integers are |
| * passed by value, this class servers to pass the index into each recursion |
| * by reference. It encompasses all the operations that need be performed on |
| * the index, although direct access is possible. |
| * |
| * @xerces.internal |
| * |
| * @see CollectionImpl#item |
| */ |
| class CollectionIndex |
| { |
| |
| |
| /** |
| * Returns the current index. |
| * |
| * @return Current index |
| */ |
| int getIndex() |
| { |
| return _index; |
| } |
| |
| |
| /** |
| * Decrements the index by one. |
| */ |
| void decrement() |
| { |
| -- _index; |
| } |
| |
| |
| /** |
| * Returns true if index is zero (or negative). |
| * |
| * @return True if index is zero |
| */ |
| boolean isZero() |
| { |
| return _index <= 0; |
| } |
| |
| |
| /** |
| * Constructs a new index with the specified initial value. The index will |
| * then be decremeneted until it reaches zero. |
| * |
| * @param index The initial value |
| */ |
| CollectionIndex( int index ) |
| { |
| _index = index; |
| } |
| |
| |
| /** |
| * Holds the actual value that is passed by reference using this class. |
| */ |
| private int _index; |
| |
| |
| } |