ide/html.editor.lib/src/org/netbeans/modules/html/editor/lib/ElementsParser.java - netbeans - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.netbeans.modules.html.editor.lib;

 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import org.netbeans.api.html.lexer.HTMLTokenId;
 import org.netbeans.api.lexer.Token;
 import org.netbeans.api.lexer.TokenSequence;
 import org.netbeans.modules.html.editor.lib.api.ProblemDescription;
 import org.netbeans.modules.html.editor.lib.api.elements.Attribute;
 import org.netbeans.modules.html.editor.lib.api.elements.Element;
 import org.netbeans.modules.html.editor.lib.plain.*;
 import org.netbeans.modules.web.common.api.LexerUtils;

 /**
  * Html syntax analyzer/plain parser
  *
  * @author mfukala@netbeans.org
  */
 public class ElementsParser implements Iterator<Element> {

     //parser state
     private int state;
     //parser state constants
     private static final int S_INIT = 0;
     private static final int S_TAG_OPEN_SYMBOL = 1;
     private static final int S_TAG = 2;
     private static final int S_TAG_ATTR = 3;
     private static final int S_TAG_VALUE = 4;
     private static final int S_COMMENT = 5;
     private static final int S_DECLARATION = 6;
     private static final int S_DOCTYPE_DECLARATION = 7;
     private static final int S_DOCTYPE_AFTER_ROOT_ELEMENT = 8;
     private static final int S_DOCTYPE_PUBLIC_ID = 9;
     private static final int S_DOCTYPE_FILE = 10;
     private static final int S_TEXT = 11;
     private static final int S_TAG_AFTER_NAME = 12;
     //eof parser state constants
     public static final String UNEXPECTED_SYMBOL_IN_OPEN_TAG = "unexpected_symbol_in_open_tag"; //NOI18N
     private CharSequence sourceCode;
     private TokenSequence<HTMLTokenId> ts;
     //inner parsing states
     private Token<HTMLTokenId> token;
     private int start;
     private boolean openTag = true;
     private String tagName;
     private TokenInfo attrib;
     private List<TokenInfo> attr_keys;
     private List<List<TokenInfo>> attr_values;
     private Element current;
     private boolean eof;
     private AtomicReference<Element> lastFoundElement;
     private String root_element, doctype_public_id, doctype_file, doctype_name;

     /* The {@link TokenSequence} needs to be properly positioned. */
     private ElementsParser(CharSequence sourceCode, TokenSequence<HTMLTokenId> tokenSequence) {
         this.sourceCode = sourceCode;
         this.ts = tokenSequence;

         state = S_INIT;
         start = -1;
         attr_keys = new ArrayList<>();
         attr_values = new ArrayList<>();
         eof = false;
     }

     public static ElementsParser forOffset(CharSequence sourceCode, TokenSequence<HTMLTokenId> tokenSequence, int position) {
         if (position < 0) {
             throw new IllegalArgumentException(String.format("Position (%s) must be positive", position));
         }

         int diff = tokenSequence.move(position);
         if (diff != 0) {
             throw new IllegalArgumentException(String.format("Parser must be started "
                     + "at a token beginning, not in the middle (position=%s, token diff=%s, token=%s)",
                     position, diff, (tokenSequence.moveNext() ? tokenSequence.token() : null))); //NOI18N
         }
         return new ElementsParser(sourceCode, tokenSequence);
     }

     public static ElementsParser forTokenIndex(CharSequence sourceCode, TokenSequence<HTMLTokenId> tokenSequence, int tokenIndex) {
         if (tokenIndex < 0) {
             throw new IllegalArgumentException(String.format("TokenSequence index (%s) must be positive", tokenIndex));
         }
         tokenSequence.moveEnd();
         int lastTokenIndex = tokenSequence.index();
         if(tokenIndex > lastTokenIndex) {
             throw new IllegalArgumentException(String.format("token index (%s) is bigger than last index in the sequence (%s)", tokenIndex, lastTokenIndex));
         }
         tokenSequence.moveIndex(tokenIndex);
         return new ElementsParser(sourceCode, tokenSequence);
     }

     @Override
     public boolean hasNext() {
         if (lastFoundElement == null) {
             lastFoundElement = new AtomicReference<>(findNextElement());
         }
         return lastFoundElement.get() != null;
     }

     @Override
     public Element next() {
         if (!hasNext()) {
             throw new IllegalStateException("No such element");
         }
         Element element = lastFoundElement.get();
         lastFoundElement = null;
         return element;
     }

     @Override
     public void remove() {
         //no-op
     }

     //---------------------------- private methods -----------------------------
     private void error() {
         current = new ErrorElement(sourceCode,
                 start,
                 (short) (ts.offset() + ts.token().length() - start));
     }

     private void text() {
         current = new TextElement(start, ts.offset() + ts.token().length());
     }

     private void entityReference() {
         current = new EntityReferenceElement(sourceCode,
                 start,
                 (short) (ts.offset() + ts.token().length() - start));

     }

     private void comment() {
         current = new CommentElement(sourceCode,
                 start,
                 ts.offset() + ts.token().length() - start);
     }

     private void declaration() {
         current = new DeclarationElement(sourceCode,
                 start,
                 (short) (ts.offset() + ts.token().length() - start),
                 root_element,
                 doctype_public_id,
                 doctype_file,
                 doctype_name);
     }

     private void tag(boolean emptyTag) {
         tag(emptyTag, null);
     }

     private void tag(boolean emptyTag, ProblemDescription problem) {
         List<Attribute> attributes = new ArrayList<>(1); //use small initial capacity since typically there are one or two attribs (if any)
         for (int i = 0; i < attr_keys.size(); i++) {
             TokenInfo key = attr_keys.get(i);
             List<TokenInfo> values = attr_values.get(i);
             StringBuilder joinedValue = new StringBuilder();

             if (values == null) {
                 //attribute has no value
                 assert key.token.length() < Short.MAX_VALUE;
                 Attribute ta = new AttributeElement(
                         sourceCode,
                         key.offset,
                         (short) key.token.length());
                 attributes.add(ta);
             } else {
                 if (values.size() == 1) {
                     //one part value
                     TokenInfo ti = values.get(0);

                     assert key.token.length() < Short.MAX_VALUE;
                     Attribute ta = new AttributeElement(
                             sourceCode,
                             key.offset,
                             ti.offset,
                             (short) key.token.length(),
                             ti.token.length());

                     attributes.add(ta);

                 } else {
                     //multipart value
                     for (TokenInfo t : values) {
                         joinedValue.append(t.token.text());
                     }

                     TokenInfo firstValuePart = values.get(0);
                     TokenInfo lastValuePart = values.get(values.size() - 1);

                     Attribute ta = new AttributeElement.AttributeElementWithJoinedValue(
                             sourceCode,
                             key.offset,
                             (short) key.token.length(),
                             firstValuePart.offset,
                             joinedValue.toString().intern());

                     attributes.add(ta);
                 }
             }
         }

         //Bug 220775 - AssertionError: element length must be positive! debug>>>
         if (start == -1) {
             throw new IllegalStateException(getCodeSnippet());
         }
         int len = ts.offset() + ts.token().length() - start;
         if (len <= 0) {
             throw new IllegalStateException(getCodeSnippet());
         }
         //<<<

         if (openTag) {

             if (attributes.isEmpty()) {
                 //no attributes
                 if (problem == null) {
                     current = new AttributelessOpenTagElement(
                             sourceCode,
                             start,
                             (short) len,
                             (byte) tagName.length(),
                             emptyTag);
                 } else {
                     current = new ProblematicAttributelessOpenTagElement(
                             sourceCode,
                             start,
                             (short) len,
                             (byte) tagName.length(),
                             emptyTag,
                             problem);

                 }
             } else {
                 //attributes
                 if (problem == null) {
                     //open tag w/o error
                     if (len > Short.MAX_VALUE) {
                         //unusually long element
                         current = new LongOpenTagElement(
                                 sourceCode,
                                 start,
                                 len,
                                 (byte) tagName.length(),
                                 attributes,
                                 emptyTag);
                     } else {
                         current = new OpenTagElement(
                                 sourceCode,
                                 start,
                                 (short) len,
                                 (byte) tagName.length(),
                                 attributes,
                                 emptyTag);
                     }
                 } else {
                     //open tag w/ error
                     //note: the ProblematicOpenTagElement also extends LongOpenTagElement
                     current = new ProblematicOpenTagElement(
                             sourceCode,
                             start,
                             (short) len,
                             (byte) tagName.length(),
                             attributes,
                             emptyTag,
                             problem);

                 }
             }
         } else {
             current = new EndTagElement(
                     sourceCode,
                     start,
                     (short) len,
                     (byte) tagName.length());
         }

         tagName = null;
         attrib = null;
         attr_keys = new ArrayList<>();
         attr_values = new ArrayList<>();
     }
     private static final int SNIPPET_LEN = 100;

     private String getCodeSnippet() {
         int offset = ts.offset();
         int from = Math.max(0, offset - (SNIPPET_LEN / 2));
         int to = Math.min(sourceCode.length(), offset + (SNIPPET_LEN / 2));
         return sourceCode.subSequence(from, to).toString();
     }

     //an error inside a tag, at least the tag name is known
     private void tag_with_error(ProblemDescription problem) {
         //lets put back the errorneous symbol first
         backup(1);
         //make the tag, we do not know if empty or not
         tag(false, problem);

         state = S_INIT;
         start = -1;
     }

     //recover from error
     private void reset() {
         backup(1);
         //create error element excluding the last token caused the error
         error();
         state = S_INIT;
         start = -1;
     }

     private void backup(int tokens) {
         for (int i = 0; i < tokens; i++) {
             ts.movePrevious();
             token = ts.token();
         }
     }

     private Element findNextElement() {
         Element element = null;
         //parse tokens until a syntaxelement is found
         while (!eof && (element = processNextToken()) == null) {
             //no-op
         }
         return element;
     }

     private Element processNextToken() {
         current = null;

         if (!ts.moveNext()) {
             //eof
             handleEOF(); //may possibly set current element
             eof = true; //finish the parsing cycle
             return current;
         }

         int offset = ts.offset();
         token = ts.token();
         HTMLTokenId id = token.id();

         switch (state) {
             case S_INIT:
                 switch (id) {
                     case CHARACTER:
                         start = ts.offset();
                         entityReference();
                         state = S_INIT;
                         start = -1;
                         break;
                     case TAG_OPEN_SYMBOL:
                         start = ts.offset();
                         state = S_TAG_OPEN_SYMBOL;
                         break;
                     case BLOCK_COMMENT:
                         start = ts.offset();
                         state = S_COMMENT;
                         break;
                     case DECLARATION:
                         start = ts.offset();
                         if (LexerUtils.equals("<!doctype", token.text(), true, true)) { //NOI18N
                             root_element = null;
                             doctype_public_id = null;
                             doctype_file = null;
                             state = S_DOCTYPE_DECLARATION;
                         } else {
                             state = S_DECLARATION;
                         }
                         doctype_name = token.text().subSequence(2, token.text().length()).toString(); //strip off the <! chars
                         break;
                     default:
                         //everything else is just a text
                         start = ts.offset();
                         state = S_TEXT;
                         break;
                 }
                 break;

             case S_TEXT:
                 switch (id) {
                     case TEXT:
                         break;
                     default:
                         backup(1);
                         text();
                         state = S_INIT;
                         start = -1;
                         break;
                 }
                 break;

             case S_TAG_OPEN_SYMBOL:
                 switch (id) {
                     case TAG_OPEN:
                         state = S_TAG_AFTER_NAME;
                         openTag = true;
                         tagName = token.text().toString();
                         break;
                     case TAG_CLOSE:
                         state = S_TAG_AFTER_NAME;
                         openTag = false;
                         tagName = token.text().toString();
                         break;
                     default:
                         reset(); //error
                         break;
                 }
                 break;

             case S_TAG_AFTER_NAME:
                 //just switch to 'in tag state'
                 backup(1);
                 state = S_TAG;
                 break;

             case S_TAG:
                 switch (id) {
                     case WS:
                     case EOL:
                     case ERROR:
                         break;
                     case ARGUMENT:
                         state = S_TAG_ATTR;
                         attrib = tokenInfo();
                         break;
                     case TAG_CLOSE_SYMBOL:
                         boolean emptyTag = "/>".equals(token.text().toString());
                         tag(emptyTag);
                         state = S_INIT;
                         start = -1;
                         break;
                     default:
                         tag_with_error(
                                 ProblemDescription.create(UNEXPECTED_SYMBOL_IN_OPEN_TAG,
                                 String.format("Unexpected symbol '%s' found in the open tag", token.text()),
                                 ProblemDescription.ERROR,
                                 offset,
                                 offset + token.length()));
                         break;
                 }
                 break;


             case S_TAG_ATTR:
                 switch (id) {
                     case OPERATOR:
                     case WS:
                         break;
                     case VALUE:
                     case VALUE_JAVASCRIPT:
                     case VALUE_CSS:
                         backup(1); //backup the value
                         state = S_TAG_VALUE;
                         break;
                     case ARGUMENT:
                     case TAG_CLOSE_SYMBOL:
                         //attribute without value
                         attr_keys.add(attrib);
                         attr_values.add(null);
                         state = S_TAG;
                         backup(1);
                         break;
                     default:
                         tag_with_error(
                                 ProblemDescription.create(UNEXPECTED_SYMBOL_IN_OPEN_TAG,
                                 String.format("Unexpected symbol '%s' found in the open tag", token.text()),
                                 ProblemDescription.ERROR,
                                 offset,
                                 offset + token.length()));
                         break;
                 }
                 break;

             case S_TAG_VALUE:
                 switch (id) {
                     case VALUE:
                     case VALUE_JAVASCRIPT:
                     case VALUE_CSS:
                     case EL_OPEN_DELIMITER:
                     case EL_CONTENT:
                     case EL_CLOSE_DELIMITER:
                         int index = attr_keys.indexOf(attrib);
                         if (index == -1) {
                             List<TokenInfo> values = new ArrayList<>();
                             values.add(tokenInfo());
                             attr_keys.add(attrib);
                             attr_values.add(values);
                         } else {
                             List<TokenInfo> valueParts = attr_values.get(index);
                             //http://statistics.netbeans.org/exceptions/messageslog?id=679650
                             //NPE might happen as attr_values.get(index) might return null
                             //I cannot see the code path which leads to this so adding a silly NPE check
                             if(valueParts != null) {
                                 valueParts.add(tokenInfo());
                             }
                         }

                         break;
                     case ERROR:
                         tag_with_error(
                                 ProblemDescription.create(UNEXPECTED_SYMBOL_IN_OPEN_TAG,
                                 String.format("Unexpected symbol '%s' found in the open tag", token.text()),
                                 ProblemDescription.ERROR,
                                 offset,
                                 offset + token.length()));
                         break;
                     default:
                         backup(1);
                         state = S_TAG;
                         break;
                 }
                 break;

             case S_COMMENT:
                 switch (id) {
                     case BLOCK_COMMENT:
                     case EOL:
                     case WS:
                         break;
                     default:
                         backup(1);
                         comment();
                         state = S_INIT;
                         start = -1;
                         break;
                 }
                 break;

             case S_DECLARATION:
                 switch (id) {
                     case DECLARATION:
                     case SGML_COMMENT:
                     case EOL:
                     case WS:
                         break;
                     default:
                         backup(1);
                         declaration();
                         state = S_INIT;
                         start = -1;
                         break;
                 }
                 break;

             case S_DOCTYPE_DECLARATION:
                 switch (id) {
                     case DECLARATION:
                         root_element = token.text().toString();
                         state = S_DOCTYPE_AFTER_ROOT_ELEMENT;
                         break;
                     case SGML_COMMENT:
                     case EOL:
                     case WS:
                         break;
                     default:
                         backup(1);
                         declaration();
                         state = S_INIT;
                         start = -1;
                         break;
                 }
                 break;

             case S_DOCTYPE_AFTER_ROOT_ELEMENT:
                 switch (id) {
                     case DECLARATION:
                         if (LexerUtils.equals("public", token.text(), true, true)) { //NOI18N
                             doctype_public_id = new String();
                             state = S_DOCTYPE_PUBLIC_ID;
                             break;
                         } else if (LexerUtils.equals("system", token.text(), true, true)) { //NOI18N
                             state = S_DOCTYPE_FILE;
                             doctype_file = new String();
                             break;
                         } else if (token.text().charAt(0) == '>') {
                             declaration();
                             state = S_INIT;
                             start = -1;
                         }
                         break;
                     case SGML_COMMENT:
                     case EOL:
                     case WS:
                         break;
                     default:
                         backup(1);
                         declaration();
                         state = S_INIT;
                         start = -1;
                         break;
                 }
                 break;

             case S_DOCTYPE_PUBLIC_ID:
                 switch (id) {
                     case WS:
                     case DECLARATION:
                         String tokenText = token.text().toString();
                         if (tokenText.startsWith("\"")) {
                             //first token
                             tokenText = tokenText.substring(1); //cut off the quotation mark
                         }
                         if (tokenText.endsWith("\"")) {
                             //last token
                             tokenText = tokenText.substring(0, tokenText.length() - 1); //cut off the quotation mark
                             doctype_public_id += tokenText; //short and rare strings, no perf problem
                             doctype_public_id = doctype_public_id.trim();
                             state = S_DOCTYPE_FILE;
                             break;
                         }
                         doctype_public_id += tokenText; //short and rare strings, no perf problem

                         break;
                     case SGML_COMMENT:
                     case EOL:

                         break;
                     default:
                         backup(1);
                         declaration();
                         state = S_INIT;
                         start = -1;
                         break;
                 }
                 break;

             case S_DOCTYPE_FILE:
                 switch (id) {
                     case DECLARATION:
                         doctype_file = token.text().toString();
                         //jump to simple sgml declaration so potentially
                         //other declaration tokens are inluded
                         state = S_DECLARATION;
                         break;
                     case SGML_COMMENT:
                     case EOL:
                     case WS:
                         break;
                     default:
                         backup(1);
                         declaration();
                         state = S_INIT;
                         start = -1;
                         break;
                 }
                 break;

         } //switch end

         return current;

     }

     private void handleEOF() {
         if (state != S_INIT) {
             //an incomplete syntax element at the end of the file
             switch (state) {
                 case S_COMMENT:
                     comment();
                     break;
                 case S_DECLARATION:
                 case S_DOCTYPE_AFTER_ROOT_ELEMENT:
                 case S_DOCTYPE_DECLARATION:
                 case S_DOCTYPE_FILE:
                 case S_DOCTYPE_PUBLIC_ID:
                     declaration();
                     break;
                 case S_TEXT:
                     text();
                     break;
                 case S_TAG:
                 case S_TAG_ATTR:
                 case S_TAG_VALUE:
                     tag(false);
                     break;
                 case S_TAG_AFTER_NAME:
                     tag(false);
                     break;
                 default:
                     error();
                     break;
             }

         }
     }

     private TokenInfo tokenInfo() {
         return new TokenInfo(ts.offset(), token);


     }

     static final class TokenInfo {

         public int offset;
         public Token token;

         public TokenInfo(int offset, Token token) {
             this.offset = offset;
             this.token = token;
         }

         @Override
         public boolean equals(Object obj) {
             if (obj == null) {
                 return false;
             }
             if (getClass() != obj.getClass()) {
                 return false;
             }
             final TokenInfo other = (TokenInfo) obj;
             if (this.offset != other.offset) {
                 return false;
             }
             if (this.token != other.token && (this.token == null || !this.token.equals(other.token))) {
                 return false;
             }
             return true;
         }

         @Override
         public int hashCode() {
             int hash = 3;
             hash = 37 * hash + this.offset;
             hash = 37 * hash + (this.token != null ? this.token.hashCode() : 0);
             return hash;
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.netbeans.modules.html.editor.lib;

	import java.util.ArrayList;
	import java.util.Iterator;
	import java.util.List;
	import java.util.concurrent.atomic.AtomicReference;
	import java.util.logging.Level;
	import java.util.logging.Logger;
	import org.netbeans.api.html.lexer.HTMLTokenId;
	import org.netbeans.api.lexer.Token;
	import org.netbeans.api.lexer.TokenSequence;
	import org.netbeans.modules.html.editor.lib.api.ProblemDescription;
	import org.netbeans.modules.html.editor.lib.api.elements.Attribute;
	import org.netbeans.modules.html.editor.lib.api.elements.Element;
	import org.netbeans.modules.html.editor.lib.plain.*;
	import org.netbeans.modules.web.common.api.LexerUtils;

	/**
	* Html syntax analyzer/plain parser
	*
	* @author mfukala@netbeans.org
	*/
	public class ElementsParser implements Iterator<Element> {

	//parser state
	private int state;
	//parser state constants
	private static final int S_INIT = 0;
	private static final int S_TAG_OPEN_SYMBOL = 1;
	private static final int S_TAG = 2;
	private static final int S_TAG_ATTR = 3;
	private static final int S_TAG_VALUE = 4;
	private static final int S_COMMENT = 5;
	private static final int S_DECLARATION = 6;
	private static final int S_DOCTYPE_DECLARATION = 7;
	private static final int S_DOCTYPE_AFTER_ROOT_ELEMENT = 8;
	private static final int S_DOCTYPE_PUBLIC_ID = 9;
	private static final int S_DOCTYPE_FILE = 10;
	private static final int S_TEXT = 11;
	private static final int S_TAG_AFTER_NAME = 12;
	//eof parser state constants
	public static final String UNEXPECTED_SYMBOL_IN_OPEN_TAG = "unexpected_symbol_in_open_tag"; //NOI18N
	private CharSequence sourceCode;
	private TokenSequence<HTMLTokenId> ts;
	//inner parsing states
	private Token<HTMLTokenId> token;
	private int start;
	private boolean openTag = true;
	private String tagName;
	private TokenInfo attrib;
	private List<TokenInfo> attr_keys;
	private List<List<TokenInfo>> attr_values;
	private Element current;
	private boolean eof;
	private AtomicReference<Element> lastFoundElement;
	private String root_element, doctype_public_id, doctype_file, doctype_name;

	/* The {@link TokenSequence} needs to be properly positioned. */
	private ElementsParser(CharSequence sourceCode, TokenSequence<HTMLTokenId> tokenSequence) {
	this.sourceCode = sourceCode;
	this.ts = tokenSequence;

	state = S_INIT;
	start = -1;
	attr_keys = new ArrayList<>();
	attr_values = new ArrayList<>();
	eof = false;
	}

	public static ElementsParser forOffset(CharSequence sourceCode, TokenSequence<HTMLTokenId> tokenSequence, int position) {
	if (position < 0) {
	throw new IllegalArgumentException(String.format("Position (%s) must be positive", position));
	}

	int diff = tokenSequence.move(position);
	if (diff != 0) {
	throw new IllegalArgumentException(String.format("Parser must be started "
	+ "at a token beginning, not in the middle (position=%s, token diff=%s, token=%s)",
	position, diff, (tokenSequence.moveNext() ? tokenSequence.token() : null))); //NOI18N
	}
	return new ElementsParser(sourceCode, tokenSequence);
	}

	public static ElementsParser forTokenIndex(CharSequence sourceCode, TokenSequence<HTMLTokenId> tokenSequence, int tokenIndex) {
	if (tokenIndex < 0) {
	throw new IllegalArgumentException(String.format("TokenSequence index (%s) must be positive", tokenIndex));
	}
	tokenSequence.moveEnd();
	int lastTokenIndex = tokenSequence.index();
	if(tokenIndex > lastTokenIndex) {
	throw new IllegalArgumentException(String.format("token index (%s) is bigger than last index in the sequence (%s)", tokenIndex, lastTokenIndex));
	}
	tokenSequence.moveIndex(tokenIndex);
	return new ElementsParser(sourceCode, tokenSequence);
	}

	@Override
	public boolean hasNext() {
	if (lastFoundElement == null) {
	lastFoundElement = new AtomicReference<>(findNextElement());
	}
	return lastFoundElement.get() != null;
	}

	@Override
	public Element next() {
	if (!hasNext()) {
	throw new IllegalStateException("No such element");
	}
	Element element = lastFoundElement.get();
	lastFoundElement = null;
	return element;
	}

	@Override
	public void remove() {
	//no-op
	}

	//---------------------------- private methods -----------------------------
	private void error() {
	current = new ErrorElement(sourceCode,
	start,
	(short) (ts.offset() + ts.token().length() - start));
	}

	private void text() {
	current = new TextElement(start, ts.offset() + ts.token().length());
	}

	private void entityReference() {
	current = new EntityReferenceElement(sourceCode,
	start,
	(short) (ts.offset() + ts.token().length() - start));

	}

	private void comment() {
	current = new CommentElement(sourceCode,
	start,
	ts.offset() + ts.token().length() - start);
	}

	private void declaration() {
	current = new DeclarationElement(sourceCode,
	start,
	(short) (ts.offset() + ts.token().length() - start),
	root_element,
	doctype_public_id,
	doctype_file,
	doctype_name);
	}

	private void tag(boolean emptyTag) {
	tag(emptyTag, null);
	}

	private void tag(boolean emptyTag, ProblemDescription problem) {
	List<Attribute> attributes = new ArrayList<>(1); //use small initial capacity since typically there are one or two attribs (if any)
	for (int i = 0; i < attr_keys.size(); i++) {
	TokenInfo key = attr_keys.get(i);
	List<TokenInfo> values = attr_values.get(i);
	StringBuilder joinedValue = new StringBuilder();

	if (values == null) {
	//attribute has no value
	assert key.token.length() < Short.MAX_VALUE;
	Attribute ta = new AttributeElement(
	sourceCode,
	key.offset,
	(short) key.token.length());
	attributes.add(ta);
	} else {
	if (values.size() == 1) {
	//one part value
	TokenInfo ti = values.get(0);

	assert key.token.length() < Short.MAX_VALUE;
	Attribute ta = new AttributeElement(
	sourceCode,
	key.offset,
	ti.offset,
	(short) key.token.length(),
	ti.token.length());

	attributes.add(ta);

	} else {
	//multipart value
	for (TokenInfo t : values) {
	joinedValue.append(t.token.text());
	}

	TokenInfo firstValuePart = values.get(0);
	TokenInfo lastValuePart = values.get(values.size() - 1);

	Attribute ta = new AttributeElement.AttributeElementWithJoinedValue(
	sourceCode,
	key.offset,
	(short) key.token.length(),
	firstValuePart.offset,
	joinedValue.toString().intern());

	attributes.add(ta);
	}
	}
	}

	//Bug 220775 - AssertionError: element length must be positive! debug>>>
	if (start == -1) {
	throw new IllegalStateException(getCodeSnippet());
	}
	int len = ts.offset() + ts.token().length() - start;
	if (len <= 0) {
	throw new IllegalStateException(getCodeSnippet());
	}
	//<<<

	if (openTag) {

	if (attributes.isEmpty()) {
	//no attributes
	if (problem == null) {
	current = new AttributelessOpenTagElement(
	sourceCode,
	start,
	(short) len,
	(byte) tagName.length(),
	emptyTag);
	} else {
	current = new ProblematicAttributelessOpenTagElement(
	sourceCode,
	start,
	(short) len,
	(byte) tagName.length(),
	emptyTag,
	problem);

	}
	} else {
	//attributes
	if (problem == null) {
	//open tag w/o error
	if (len > Short.MAX_VALUE) {
	//unusually long element
	current = new LongOpenTagElement(
	sourceCode,
	start,
	len,
	(byte) tagName.length(),
	attributes,
	emptyTag);
	} else {
	current = new OpenTagElement(
	sourceCode,
	start,
	(short) len,
	(byte) tagName.length(),
	attributes,
	emptyTag);
	}
	} else {
	//open tag w/ error
	//note: the ProblematicOpenTagElement also extends LongOpenTagElement
	current = new ProblematicOpenTagElement(
	sourceCode,
	start,
	(short) len,
	(byte) tagName.length(),
	attributes,
	emptyTag,
	problem);

	}
	}
	} else {
	current = new EndTagElement(
	sourceCode,
	start,
	(short) len,
	(byte) tagName.length());
	}

	tagName = null;
	attrib = null;
	attr_keys = new ArrayList<>();
	attr_values = new ArrayList<>();
	}
	private static final int SNIPPET_LEN = 100;

	private String getCodeSnippet() {
	int offset = ts.offset();
	int from = Math.max(0, offset - (SNIPPET_LEN / 2));
	int to = Math.min(sourceCode.length(), offset + (SNIPPET_LEN / 2));
	return sourceCode.subSequence(from, to).toString();
	}

	//an error inside a tag, at least the tag name is known
	private void tag_with_error(ProblemDescription problem) {
	//lets put back the errorneous symbol first
	backup(1);
	//make the tag, we do not know if empty or not
	tag(false, problem);

	state = S_INIT;
	start = -1;
	}

	//recover from error
	private void reset() {
	backup(1);
	//create error element excluding the last token caused the error
	error();
	state = S_INIT;
	start = -1;
	}

	private void backup(int tokens) {
	for (int i = 0; i < tokens; i++) {
	ts.movePrevious();
	token = ts.token();
	}
	}

	private Element findNextElement() {
	Element element = null;
	//parse tokens until a syntaxelement is found
	while (!eof && (element = processNextToken()) == null) {
	//no-op
	}
	return element;
	}

	private Element processNextToken() {
	current = null;

	if (!ts.moveNext()) {
	//eof
	handleEOF(); //may possibly set current element
	eof = true; //finish the parsing cycle
	return current;
	}

	int offset = ts.offset();
	token = ts.token();
	HTMLTokenId id = token.id();

	switch (state) {
	case S_INIT:
	switch (id) {
	case CHARACTER:
	start = ts.offset();
	entityReference();
	state = S_INIT;
	start = -1;
	break;
	case TAG_OPEN_SYMBOL:
	start = ts.offset();
	state = S_TAG_OPEN_SYMBOL;
	break;
	case BLOCK_COMMENT:
	start = ts.offset();
	state = S_COMMENT;
	break;
	case DECLARATION:
	start = ts.offset();
	if (LexerUtils.equals("<!doctype", token.text(), true, true)) { //NOI18N
	root_element = null;
	doctype_public_id = null;
	doctype_file = null;
	state = S_DOCTYPE_DECLARATION;
	} else {
	state = S_DECLARATION;
	}
	doctype_name = token.text().subSequence(2, token.text().length()).toString(); //strip off the <! chars
	break;
	default:
	//everything else is just a text
	start = ts.offset();
	state = S_TEXT;
	break;
	}
	break;

	case S_TEXT:
	switch (id) {
	case TEXT:
	break;
	default:
	backup(1);
	text();
	state = S_INIT;
	start = -1;
	break;
	}
	break;

	case S_TAG_OPEN_SYMBOL:
	switch (id) {
	case TAG_OPEN:
	state = S_TAG_AFTER_NAME;
	openTag = true;
	tagName = token.text().toString();
	break;
	case TAG_CLOSE:
	state = S_TAG_AFTER_NAME;
	openTag = false;
	tagName = token.text().toString();
	break;
	default:
	reset(); //error
	break;
	}
	break;

	case S_TAG_AFTER_NAME:
	//just switch to 'in tag state'
	backup(1);
	state = S_TAG;
	break;

	case S_TAG:
	switch (id) {
	case WS:
	case EOL:
	case ERROR:
	break;
	case ARGUMENT:
	state = S_TAG_ATTR;
	attrib = tokenInfo();
	break;
	case TAG_CLOSE_SYMBOL:
	boolean emptyTag = "/>".equals(token.text().toString());
	tag(emptyTag);
	state = S_INIT;
	start = -1;
	break;
	default:
	tag_with_error(
	ProblemDescription.create(UNEXPECTED_SYMBOL_IN_OPEN_TAG,
	String.format("Unexpected symbol '%s' found in the open tag", token.text()),
	ProblemDescription.ERROR,
	offset,
	offset + token.length()));
	break;
	}
	break;


	case S_TAG_ATTR:
	switch (id) {
	case OPERATOR:
	case WS:
	break;
	case VALUE:
	case VALUE_JAVASCRIPT:
	case VALUE_CSS:
	backup(1); //backup the value
	state = S_TAG_VALUE;
	break;
	case ARGUMENT:
	case TAG_CLOSE_SYMBOL:
	//attribute without value
	attr_keys.add(attrib);
	attr_values.add(null);
	state = S_TAG;
	backup(1);
	break;
	default:
	tag_with_error(
	ProblemDescription.create(UNEXPECTED_SYMBOL_IN_OPEN_TAG,
	String.format("Unexpected symbol '%s' found in the open tag", token.text()),
	ProblemDescription.ERROR,
	offset,
	offset + token.length()));
	break;
	}
	break;

	case S_TAG_VALUE:
	switch (id) {
	case VALUE:
	case VALUE_JAVASCRIPT:
	case VALUE_CSS:
	case EL_OPEN_DELIMITER:
	case EL_CONTENT:
	case EL_CLOSE_DELIMITER:
	int index = attr_keys.indexOf(attrib);
	if (index == -1) {
	List<TokenInfo> values = new ArrayList<>();
	values.add(tokenInfo());
	attr_keys.add(attrib);
	attr_values.add(values);
	} else {
	List<TokenInfo> valueParts = attr_values.get(index);
	//http://statistics.netbeans.org/exceptions/messageslog?id=679650
	//NPE might happen as attr_values.get(index) might return null
	//I cannot see the code path which leads to this so adding a silly NPE check
	if(valueParts != null) {
	valueParts.add(tokenInfo());
	}
	}

	break;
	case ERROR:
	tag_with_error(
	ProblemDescription.create(UNEXPECTED_SYMBOL_IN_OPEN_TAG,
	String.format("Unexpected symbol '%s' found in the open tag", token.text()),
	ProblemDescription.ERROR,
	offset,
	offset + token.length()));
	break;
	default:
	backup(1);
	state = S_TAG;
	break;
	}
	break;

	case S_COMMENT:
	switch (id) {
	case BLOCK_COMMENT:
	case EOL:
	case WS:
	break;
	default:
	backup(1);
	comment();
	state = S_INIT;
	start = -1;
	break;
	}
	break;

	case S_DECLARATION:
	switch (id) {
	case DECLARATION:
	case SGML_COMMENT:
	case EOL:
	case WS:
	break;
	default:
	backup(1);
	declaration();
	state = S_INIT;
	start = -1;
	break;
	}
	break;

	case S_DOCTYPE_DECLARATION:
	switch (id) {
	case DECLARATION:
	root_element = token.text().toString();
	state = S_DOCTYPE_AFTER_ROOT_ELEMENT;
	break;
	case SGML_COMMENT:
	case EOL:
	case WS:
	break;
	default:
	backup(1);
	declaration();
	state = S_INIT;
	start = -1;
	break;
	}
	break;

	case S_DOCTYPE_AFTER_ROOT_ELEMENT:
	switch (id) {
	case DECLARATION:
	if (LexerUtils.equals("public", token.text(), true, true)) { //NOI18N
	doctype_public_id = new String();
	state = S_DOCTYPE_PUBLIC_ID;
	break;
	} else if (LexerUtils.equals("system", token.text(), true, true)) { //NOI18N
	state = S_DOCTYPE_FILE;
	doctype_file = new String();
	break;
	} else if (token.text().charAt(0) == '>') {
	declaration();
	state = S_INIT;
	start = -1;
	}
	break;
	case SGML_COMMENT:
	case EOL:
	case WS:
	break;
	default:
	backup(1);
	declaration();
	state = S_INIT;
	start = -1;
	break;
	}
	break;

	case S_DOCTYPE_PUBLIC_ID:
	switch (id) {
	case WS:
	case DECLARATION:
	String tokenText = token.text().toString();
	if (tokenText.startsWith("\"")) {
	//first token
	tokenText = tokenText.substring(1); //cut off the quotation mark
	}
	if (tokenText.endsWith("\"")) {
	//last token
	tokenText = tokenText.substring(0, tokenText.length() - 1); //cut off the quotation mark
	doctype_public_id += tokenText; //short and rare strings, no perf problem
	doctype_public_id = doctype_public_id.trim();
	state = S_DOCTYPE_FILE;
	break;
	}
	doctype_public_id += tokenText; //short and rare strings, no perf problem

	break;
	case SGML_COMMENT:
	case EOL:

	break;
	default:
	backup(1);
	declaration();
	state = S_INIT;
	start = -1;
	break;
	}
	break;

	case S_DOCTYPE_FILE:
	switch (id) {
	case DECLARATION:
	doctype_file = token.text().toString();
	//jump to simple sgml declaration so potentially
	//other declaration tokens are inluded
	state = S_DECLARATION;
	break;
	case SGML_COMMENT:
	case EOL:
	case WS:
	break;
	default:
	backup(1);
	declaration();
	state = S_INIT;
	start = -1;
	break;
	}
	break;

	} //switch end

	return current;

	}

	private void handleEOF() {
	if (state != S_INIT) {
	//an incomplete syntax element at the end of the file
	switch (state) {
	case S_COMMENT:
	comment();
	break;
	case S_DECLARATION:
	case S_DOCTYPE_AFTER_ROOT_ELEMENT:
	case S_DOCTYPE_DECLARATION:
	case S_DOCTYPE_FILE:
	case S_DOCTYPE_PUBLIC_ID:
	declaration();
	break;
	case S_TEXT:
	text();
	break;
	case S_TAG:
	case S_TAG_ATTR:
	case S_TAG_VALUE:
	tag(false);
	break;
	case S_TAG_AFTER_NAME:
	tag(false);
	break;
	default:
	error();
	break;
	}

	}
	}

	private TokenInfo tokenInfo() {
	return new TokenInfo(ts.offset(), token);


	}

	static final class TokenInfo {

	public int offset;
	public Token token;

	public TokenInfo(int offset, Token token) {
	this.offset = offset;
	this.token = token;
	}

	@Override
	public boolean equals(Object obj) {
	if (obj == null) {
	return false;
	}
	if (getClass() != obj.getClass()) {
	return false;
	}
	final TokenInfo other = (TokenInfo) obj;
	if (this.offset != other.offset) {
	return false;
	}
	if (this.token != other.token && (this.token == null \|\| !this.token.equals(other.token))) {
	return false;
	}
	return true;
	}

	@Override
	public int hashCode() {
	int hash = 3;
	hash = 37 * hash + this.offset;
	hash = 37 * hash + (this.token != null ? this.token.hashCode() : 0);
	return hash;
	}
	}
	}