java/java.sourceui/src/org/netbeans/api/java/source/ui/HTMLJavadocParser.java - netbeans - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.netbeans.api.java.source.ui;

 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.InterruptedIOException;
 import java.io.Reader;
 import java.net.URL;
 import java.net.URLDecoder;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.StringTokenizer;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import javax.swing.text.ChangedCharSetException;
 import javax.swing.text.MutableAttributeSet;
 import javax.swing.text.html.HTML;
 import javax.swing.text.html.HTML.Tag;
 import javax.swing.text.html.HTMLEditorKit;
 import javax.swing.text.html.parser.ParserDelegator;
 import org.netbeans.modules.java.source.JavadocHelper;

 /**
  *  HTML Parser. It retrieves sections of the javadoc HTML file.
  *
  * @author  Martin Roskanin
  */
 class HTMLJavadocParser {

     public static final Logger LOG = Logger.getLogger(HTMLJavadocParser.class.getName());

     /** Gets the javadoc text from the given URL
      *  @param url location of Javadoc
      *  @param pkg true if URL should be retrieved for a package
      */
     public static String getJavadocText(URL url, boolean pkg) {
         return getJavadocText(new JavadocHelper.TextStream(url), pkg);
     }

     /** Gets the javadoc text from the given URL
      *  @param page location of Javadoc
      *  @param pkg true if URL should be retrieved for a package
      */
     public static String getJavadocText(JavadocHelper.TextStream page, boolean pkg) {
         if (page == null) return null;

         HTMLEditorKit.Parser parser;
         InputStream is = null;

         String charset = null;
         for (;;) {
             try{
                 is = page.openStream();
                 parser = new ParserDelegator();
                 String urlStr = URLDecoder.decode(page.getLocation().toString(), "UTF-8"); //NOI18N
                 int offsets[] = null;
                 Reader reader = charset == null?new InputStreamReader(is): new InputStreamReader(is, charset);

                 if (pkg){
                     // package description
                     offsets = parsePackage(reader, parser, charset != null);
                 }else if (urlStr.indexOf('#')>0){
                     // member javadoc info
                     final Collection<? extends URL> urls = page.getLocations();
                     final Collection<String> possibleNames = new HashSet<>(urls.size());
                     for (URL nameUrl : urls) {
                         urlStr = URLDecoder.decode(nameUrl.toString(), "UTF-8"); //NOI18N
                         final String memberName = urlStr.substring(urlStr.indexOf('#')+1);
                         if (!memberName.isEmpty()) {
                             possibleNames.add(memberName);
                         }
                     }
                     if (!possibleNames.isEmpty()) {
                         offsets = parseMember(reader, possibleNames, parser, charset != null);
                     }
                 }else{
                     // class javadoc info
                     offsets = parseClass(reader, parser, charset != null);
                 }

                 if (offsets != null){
                     return getTextFromURLStream(page, offsets, charset);
                 }
                 break;
             } catch (ChangedCharSetException e) {
                 if (charset == null) {
                     charset = getCharSet(e);
                     //restart with valid charset
                 } else {
                     e.printStackTrace();
                     break;
                 }
             } catch (FileNotFoundException x) {
                 break; // e.g. missing com.sun.** class in network Javadoc; ignore
             } catch (InterruptedIOException x) {
                 //Http javadoc timeout
                 break;
             }catch(IOException ioe){
                 ioe.printStackTrace();
                 break;
             }finally{
                 parser = null;
                 if (is!=null) {
                     try{
                         is.close();
                     }catch(IOException ioe){
                         ioe.printStackTrace();
                     }
                 }
             }
         }
         return null;
     }

     private static String getCharSet(ChangedCharSetException e) {
         String spec = e.getCharSetSpec();
         if (e.keyEqualsCharSet()) {
             //charsetspec contains only charset
             return spec;
         }

         //charsetspec is in form "text/html; charset=UTF-8"

         int index = spec.indexOf(";"); // NOI18N
         if (index != -1) {
             spec = spec.substring(index + 1);
         }

         spec = spec.toLowerCase();

         StringTokenizer st = new StringTokenizer(spec, " \t=", true); //NOI18N
         boolean foundCharSet = false;
         boolean foundEquals = false;
         while (st.hasMoreTokens()) {
             String token = st.nextToken();
             if (token.equals(" ") || token.equals("\t")) { //NOI18N
                 continue;
             }
             if (foundCharSet == false && foundEquals == false
                     && token.equals("charset")) { //NOI18N
                 foundCharSet = true;
                 continue;
             } else if (foundEquals == false && token.equals("=")) {//NOI18N
                 foundEquals = true;
                 continue;
             } else if (foundEquals == true && foundCharSet == true) {
                 return token;
             }

             foundCharSet = false;
             foundEquals = false;
         }

         return null;
     }

     private static String getTextFromURLStream(JavadocHelper.TextStream page, int[] offsets, String charset) throws IOException {
         if (page == null)
             return null;

         InputStream fis = null;
         InputStreamReader fisreader = null;
         try {
             fis = page.openStream();
             fisreader = charset == null ? new InputStreamReader(fis) : new InputStreamReader(fis, charset);

             StringBuilder sb = new StringBuilder();
             int offset = 0;

             for (int i = 0; i < offsets.length - 1; i+=2) {
                 int startOffset = offsets[i];
                 int endOffset = offsets[i + 1];
                 if (startOffset < 0 || endOffset < 0)
                     continue;
                 if (startOffset > endOffset) {
                     LOG.log(Level.WARNING,
                             "Was not able to correctly parse javadoc: {0}, startOffset={1}, endOffset={2}.",
                             new Object[] {page.getLocation(), startOffset, endOffset});
                     return null;
                 }

                 int len = endOffset - startOffset;
                 char buffer[] = new char[len];
                 int bytesToSkip = startOffset - offset;
                 long bytesSkipped = 0;
                 do {
                     bytesSkipped = fisreader.skip(bytesToSkip);
                     bytesToSkip -= bytesSkipped;
                 } while ((bytesToSkip > 0) && (bytesSkipped > 0));

                 int bytesAlreadyRead = 0;
                 do {
                     int count = fisreader.read(buffer, bytesAlreadyRead, len - bytesAlreadyRead);
                     if (count < 0){
                         break;
                     }
                     bytesAlreadyRead += count;
                 } while (bytesAlreadyRead < len);
                 sb.append(buffer);
                 offset = endOffset;
             }
             return sb.toString();
         } finally {
             if (fisreader != null)
                 fisreader.close();
         }
     }


     /** Retrieves the position (start offset and end offset) of class javadoc info
       * in the raw html file */
     private static int[] parseClass(Reader reader, final HTMLEditorKit.Parser parser, boolean ignoreCharset) throws IOException {
         final int INIT = 0;
         // javadoc HTML comment '======== START OF CLASS DATA ========'
         final int CLASS_DATA_START = 1;
         // start of the text we need. Located just after first P.
         final int TEXT_START = 2;
         // div tag after the CLASS_DATA_START
         final int INSIDE_DIV = 3;
         // div tag after the INSIDE_DIV
         final int AFTER_DIV = 4;

         final int state[] = new int[] {INIT};
         final int offset[] = new int[] {-1, -1, -1, -1};

         HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() {

             int div_counter = 0;
             int li_counter = 0;
             int nextHRPos = -1;
             int lastHRPos = -1;

             @Override
             public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
                 if (t == HTML.Tag.HR){
                     if (state[0] == TEXT_START){
                         nextHRPos = pos;
                     }
                     lastHRPos = pos;
                 }
             }

             @Override
             public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
                 if (t == HTML.Tag.P && state[0] == CLASS_DATA_START){
                     if (offset[0] != -1 && offset[1] == -1)
                         offset[1] = pos + 3;
                     else
                         state[0] = TEXT_START;
                 } else if (t == HTML.Tag.DIV) {
                     if (state[0] == CLASS_DATA_START && a.containsAttribute(HTML.Attribute.CLASS, "block")) {
                         state[0] = INSIDE_DIV;
                         if (offset[2] == -1)
                             offset[2] = pos;
                     }
                     if (state[0] == INSIDE_DIV)
                         div_counter++;
                 } else if (t == HTML.Tag.LI && state[0] == AFTER_DIV) {
                     li_counter++;
                 } else if (t == HTML.Tag.A && state[0] == TEXT_START) {
                     String attrName = (String)a.getAttribute(HTML.Attribute.NAME);
                     if (attrName!=null && attrName.length()>0){
                         if (nextHRPos!=-1 && nextHRPos > offset[2]){
                             offset[3] = nextHRPos;
                         }else{
                             offset[3] = pos;
                         }
                         state[0] = INIT;
                     }
                 }
             }

             @Override
             public void handleEndTag(Tag t, int pos) {
                 if (t == HTML.Tag.DIV && state[0] == INSIDE_DIV) {
                     if (--div_counter == 0) {
                         if (offset[0] > -1 && offset[1] == -1) {
                             state[0] = CLASS_DATA_START;
                             offset[1] = pos;
                         } else {
                             state[0] = AFTER_DIV;
                         }
                     }
                 } else if (t == HTML.Tag.LI && state[0] == AFTER_DIV) {
                     if (--li_counter < 0) {
                         offset[3] = pos;
                         state[0] = INIT;
                     }
                 }
             }

             public void handleComment(char[] data, int pos){
                 String comment = String.valueOf(data);
                 if (comment!=null){
                     if (comment.indexOf("START OF CLASS DATA")>0){ //NOI18N
                         state[0] = CLASS_DATA_START;
                     } else if (comment.indexOf("NESTED CLASS SUMMARY")>0 //NOI18N
                             && offset[3] == -1){
                         if (lastHRPos!=-1 && lastHRPos > offset[2]){
                             offset[3] = lastHRPos;
                         }else{
                             offset[3] = pos;
                         }
                     }
                 }
             }

             public void handleText(char[] data, int pos) {
                 if (state[0] == CLASS_DATA_START && "Deprecated.".equals(new String(data))) { //NOI18N
                     offset[0] = lastHRPos + 4;
                 } else if (state[0] == INSIDE_DIV && "Deprecated.".equals(new String(data))) { //NOI18N
                     offset[0] = offset[2];
                     offset[2] = -1;
                 } else if (state[0] == TEXT_START && offset[2] < 0) {
                     offset[2] = pos;
                 }
             }
         };

         parser.parse(reader, callback, ignoreCharset);
         callback = null;
         return offset;
     }

     /** Retrieves the position (start offset and end offset) of member javadoc info
       * in the raw html file */
     private static int[] parseMember(Reader reader, final Collection<? extends String> names, final HTMLEditorKit.Parser parser, boolean ignoreCharset) throws IOException {
         final int INIT = 0;
         // 'A' tag with the name we are looking for.
         final int A_OPEN = 1;
         // close tag of 'A'
         final int A_CLOSE = 2;
         // PRE close tag after the A_CLOSE
         final int PRE_CLOSE = 3;
         // div tag after the PRE_CLOSE
         final int INSIDE_DIV = 4;

         final int state[] = new int[1];
         final int offset[] = new int[2];

         offset[0] = -1; //start offset
         offset[1] = -1; //end offset
         state[0] = INIT;

         HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() {

             int div_counter = 0;
             int dl_counter = 0;
             int li_counter = 0;
             int hrPos = -1;
             boolean startWithNextText;

             @Override
             public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
                 if (t == HTML.Tag.HR && state[0] == PRE_CLOSE){
                     hrPos = pos;
                 }
             }

             @Override
             public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
                 if (t == HTML.Tag.A) {
                     String attrName = (String)a.getAttribute(HTML.Attribute.NAME);
                     String attrId = (String)a.getAttribute(HTML.Attribute.ID);
                     if (names.contains(attrName) || names.contains(attrId)){
                         // we have found desired javadoc member info anchor
                         state[0] = A_OPEN;
                     } else {
                         if ((state[0] == PRE_CLOSE) && (attrName != null || attrId != null) && hrPos != -1){
                             // reach the end of retrieved javadoc info
                             state[0] = INIT;
                             offset[1] = hrPos;
                         }
                     }
                 } else if (t == HTML.Tag.DL && state[0] == PRE_CLOSE) {
                     dl_counter++;
                 } else if (t == HTML.Tag.LI && state[0] == PRE_CLOSE) {
                     li_counter++;
                 } else if (t == HTML.Tag.DD && state[0] == PRE_CLOSE && offset[0] < 0){
                         offset[0] = pos;
                 } else if (t == HTML.Tag.DIV && (state[0] == PRE_CLOSE || state[0] == A_CLOSE || state[0] == INSIDE_DIV)){
                     state[0] = INSIDE_DIV;
                     div_counter++;
                     if (offset[0] < 0) {
                         if (div_counter == 2) {
                           offset[0] = pos;
                         } else if (div_counter == 1 && a.containsAttribute(HTML.Attribute.CLASS, "block")) {
                             startWithNextText = true;
                         }
                     }
                 }

             }

             @Override
             public void handleEndTag(HTML.Tag t, int pos){
                 if (t == HTML.Tag.A && state[0] == A_OPEN){
                     state[0] = A_CLOSE;
                 } else if (t == HTML.Tag.PRE && state[0] == A_CLOSE){
                     state[0] = PRE_CLOSE;
                 } else if (t == HTML.Tag.DL && state[0] == PRE_CLOSE) {
                     if (--dl_counter == 0)
                         hrPos = pos;
                 } else if (t == HTML.Tag.LI && state[0] == PRE_CLOSE) {
                     if (--li_counter < 0)
                         hrPos = pos;
                 } else if (t == HTML.Tag.DIV && state[0] == INSIDE_DIV) {
                     if (--div_counter == 0) {
                         state[0] = PRE_CLOSE;
                         hrPos = pos;
                     }
                 }
             }

             @Override
             public void handleText(char[] data, int pos) {
                 if (startWithNextText) {
                     startWithNextText = false;
                     if (offset[0] < 0) {
                         offset[0] = pos;
                     }
                 }
             }

             @Override
             public void handleComment(char[] data, int pos){
                 String comment = String.valueOf(data);
                 if (comment!=null){
                     if (comment.indexOf("END OF CLASS DATA")>0){ //NOI18N
                         if ((state[0] == PRE_CLOSE) && hrPos != -1){
                             // reach the end of retrieved javadoc info
                             state[0] = INIT;
                             offset[1] = hrPos;
                         }
                     }
                 }
             }
         };

         parser.parse(reader, callback, ignoreCharset);
         callback = null;
         return offset;
     }

     /** Retrieves the position (start offset and end offset) of member javadoc info
       * in the raw html file */
     private static int[] parsePackage(Reader reader, final HTMLEditorKit.Parser parser, boolean ignoreCharset) throws IOException {
         final String name = "package_description"; //NOI18N
         final int INIT = 0;
         // 'A' tag with the name we are looking for.
         final int A_OPEN = 1;

         final int state[] = new int[1];
         final int offset[] = new int[2];

         offset[0] = -1; //start offset
         offset[1] = -1; //end offset
         state[0] = INIT;

         HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() {

             int hrPos = -1;

             public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
                 if (t == HTML.Tag.HR && state[0]!=INIT){
                     if (state[0] == A_OPEN){
                         hrPos = pos;
                         offset[1] = pos;
                     }
                 }
             }

             public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {

                 if (t == HTML.Tag.A) {
                     String attrName = (String)a.getAttribute(HTML.Attribute.NAME);
                     if (name.equals(attrName)){
                         // we have found desired javadoc member info anchor
                         state[0] = A_OPEN;
                         offset[0] = pos;
                     } else {
                         if (state[0] == A_OPEN && attrName!=null){
                             // reach the end of retrieved javadoc info
                             state[0] = INIT;
                             offset[1] = (hrPos!=-1) ? hrPos : pos;
                         }
                     }
                 }
             }
         };

         parser.parse(reader, callback, ignoreCharset);
         callback = null;
         return offset;
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.netbeans.api.java.source.ui;

	import java.io.FileNotFoundException;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.InputStreamReader;
	import java.io.InterruptedIOException;
	import java.io.Reader;
	import java.net.URL;
	import java.net.URLDecoder;
	import java.util.Collection;
	import java.util.HashSet;
	import java.util.StringTokenizer;
	import java.util.logging.Level;
	import java.util.logging.Logger;
	import javax.swing.text.ChangedCharSetException;
	import javax.swing.text.MutableAttributeSet;
	import javax.swing.text.html.HTML;
	import javax.swing.text.html.HTML.Tag;
	import javax.swing.text.html.HTMLEditorKit;
	import javax.swing.text.html.parser.ParserDelegator;
	import org.netbeans.modules.java.source.JavadocHelper;

	/**
	* HTML Parser. It retrieves sections of the javadoc HTML file.
	*
	* @author Martin Roskanin
	*/
	class HTMLJavadocParser {

	public static final Logger LOG = Logger.getLogger(HTMLJavadocParser.class.getName());

	/** Gets the javadoc text from the given URL
	* @param url location of Javadoc
	* @param pkg true if URL should be retrieved for a package
	*/
	public static String getJavadocText(URL url, boolean pkg) {
	return getJavadocText(new JavadocHelper.TextStream(url), pkg);
	}

	/** Gets the javadoc text from the given URL
	* @param page location of Javadoc
	* @param pkg true if URL should be retrieved for a package
	*/
	public static String getJavadocText(JavadocHelper.TextStream page, boolean pkg) {
	if (page == null) return null;

	HTMLEditorKit.Parser parser;
	InputStream is = null;

	String charset = null;
	for (;;) {
	try{
	is = page.openStream();
	parser = new ParserDelegator();
	String urlStr = URLDecoder.decode(page.getLocation().toString(), "UTF-8"); //NOI18N
	int offsets[] = null;
	Reader reader = charset == null?new InputStreamReader(is): new InputStreamReader(is, charset);

	if (pkg){
	// package description
	offsets = parsePackage(reader, parser, charset != null);
	}else if (urlStr.indexOf('#')>0){
	// member javadoc info
	final Collection<? extends URL> urls = page.getLocations();
	final Collection<String> possibleNames = new HashSet<>(urls.size());
	for (URL nameUrl : urls) {
	urlStr = URLDecoder.decode(nameUrl.toString(), "UTF-8"); //NOI18N
	final String memberName = urlStr.substring(urlStr.indexOf('#')+1);
	if (!memberName.isEmpty()) {
	possibleNames.add(memberName);
	}
	}
	if (!possibleNames.isEmpty()) {
	offsets = parseMember(reader, possibleNames, parser, charset != null);
	}
	}else{
	// class javadoc info
	offsets = parseClass(reader, parser, charset != null);
	}

	if (offsets != null){
	return getTextFromURLStream(page, offsets, charset);
	}
	break;
	} catch (ChangedCharSetException e) {
	if (charset == null) {
	charset = getCharSet(e);
	//restart with valid charset
	} else {
	e.printStackTrace();
	break;
	}
	} catch (FileNotFoundException x) {
	break; // e.g. missing com.sun.** class in network Javadoc; ignore
	} catch (InterruptedIOException x) {
	//Http javadoc timeout
	break;
	}catch(IOException ioe){
	ioe.printStackTrace();
	break;
	}finally{
	parser = null;
	if (is!=null) {
	try{
	is.close();
	}catch(IOException ioe){
	ioe.printStackTrace();
	}
	}
	}
	}
	return null;
	}

	private static String getCharSet(ChangedCharSetException e) {
	String spec = e.getCharSetSpec();
	if (e.keyEqualsCharSet()) {
	//charsetspec contains only charset
	return spec;
	}

	//charsetspec is in form "text/html; charset=UTF-8"

	int index = spec.indexOf(";"); // NOI18N
	if (index != -1) {
	spec = spec.substring(index + 1);
	}

	spec = spec.toLowerCase();

	StringTokenizer st = new StringTokenizer(spec, " \t=", true); //NOI18N
	boolean foundCharSet = false;
	boolean foundEquals = false;
	while (st.hasMoreTokens()) {
	String token = st.nextToken();
	if (token.equals(" ") \|\| token.equals("\t")) { //NOI18N
	continue;
	}
	if (foundCharSet == false && foundEquals == false
	&& token.equals("charset")) { //NOI18N
	foundCharSet = true;
	continue;
	} else if (foundEquals == false && token.equals("=")) {//NOI18N
	foundEquals = true;
	continue;
	} else if (foundEquals == true && foundCharSet == true) {
	return token;
	}

	foundCharSet = false;
	foundEquals = false;
	}

	return null;
	}

	private static String getTextFromURLStream(JavadocHelper.TextStream page, int[] offsets, String charset) throws IOException {
	if (page == null)
	return null;

	InputStream fis = null;
	InputStreamReader fisreader = null;
	try {
	fis = page.openStream();
	fisreader = charset == null ? new InputStreamReader(fis) : new InputStreamReader(fis, charset);

	StringBuilder sb = new StringBuilder();
	int offset = 0;

	for (int i = 0; i < offsets.length - 1; i+=2) {
	int startOffset = offsets[i];
	int endOffset = offsets[i + 1];
	if (startOffset < 0 \|\| endOffset < 0)
	continue;
	if (startOffset > endOffset) {
	LOG.log(Level.WARNING,
	"Was not able to correctly parse javadoc: {0}, startOffset={1}, endOffset={2}.",
	new Object[] {page.getLocation(), startOffset, endOffset});
	return null;
	}

	int len = endOffset - startOffset;
	char buffer[] = new char[len];
	int bytesToSkip = startOffset - offset;
	long bytesSkipped = 0;
	do {
	bytesSkipped = fisreader.skip(bytesToSkip);
	bytesToSkip -= bytesSkipped;
	} while ((bytesToSkip > 0) && (bytesSkipped > 0));

	int bytesAlreadyRead = 0;
	do {
	int count = fisreader.read(buffer, bytesAlreadyRead, len - bytesAlreadyRead);
	if (count < 0){
	break;
	}
	bytesAlreadyRead += count;
	} while (bytesAlreadyRead < len);
	sb.append(buffer);
	offset = endOffset;
	}
	return sb.toString();
	} finally {
	if (fisreader != null)
	fisreader.close();
	}
	}


	/** Retrieves the position (start offset and end offset) of class javadoc info
	* in the raw html file */
	private static int[] parseClass(Reader reader, final HTMLEditorKit.Parser parser, boolean ignoreCharset) throws IOException {
	final int INIT = 0;
	// javadoc HTML comment '======== START OF CLASS DATA ========'
	final int CLASS_DATA_START = 1;
	// start of the text we need. Located just after first P.
	final int TEXT_START = 2;
	// div tag after the CLASS_DATA_START
	final int INSIDE_DIV = 3;
	// div tag after the INSIDE_DIV
	final int AFTER_DIV = 4;

	final int state[] = new int[] {INIT};
	final int offset[] = new int[] {-1, -1, -1, -1};

	HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() {

	int div_counter = 0;
	int li_counter = 0;
	int nextHRPos = -1;
	int lastHRPos = -1;

	@Override
	public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
	if (t == HTML.Tag.HR){
	if (state[0] == TEXT_START){
	nextHRPos = pos;
	}
	lastHRPos = pos;
	}
	}

	@Override
	public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
	if (t == HTML.Tag.P && state[0] == CLASS_DATA_START){
	if (offset[0] != -1 && offset[1] == -1)
	offset[1] = pos + 3;
	else
	state[0] = TEXT_START;
	} else if (t == HTML.Tag.DIV) {
	if (state[0] == CLASS_DATA_START && a.containsAttribute(HTML.Attribute.CLASS, "block")) {
	state[0] = INSIDE_DIV;
	if (offset[2] == -1)
	offset[2] = pos;
	}
	if (state[0] == INSIDE_DIV)
	div_counter++;
	} else if (t == HTML.Tag.LI && state[0] == AFTER_DIV) {
	li_counter++;
	} else if (t == HTML.Tag.A && state[0] == TEXT_START) {
	String attrName = (String)a.getAttribute(HTML.Attribute.NAME);
	if (attrName!=null && attrName.length()>0){
	if (nextHRPos!=-1 && nextHRPos > offset[2]){
	offset[3] = nextHRPos;
	}else{
	offset[3] = pos;
	}
	state[0] = INIT;
	}
	}
	}

	@Override
	public void handleEndTag(Tag t, int pos) {
	if (t == HTML.Tag.DIV && state[0] == INSIDE_DIV) {
	if (--div_counter == 0) {
	if (offset[0] > -1 && offset[1] == -1) {
	state[0] = CLASS_DATA_START;
	offset[1] = pos;
	} else {
	state[0] = AFTER_DIV;
	}
	}
	} else if (t == HTML.Tag.LI && state[0] == AFTER_DIV) {
	if (--li_counter < 0) {
	offset[3] = pos;
	state[0] = INIT;
	}
	}
	}

	public void handleComment(char[] data, int pos){
	String comment = String.valueOf(data);
	if (comment!=null){
	if (comment.indexOf("START OF CLASS DATA")>0){ //NOI18N
	state[0] = CLASS_DATA_START;
	} else if (comment.indexOf("NESTED CLASS SUMMARY")>0 //NOI18N
	&& offset[3] == -1){
	if (lastHRPos!=-1 && lastHRPos > offset[2]){
	offset[3] = lastHRPos;
	}else{
	offset[3] = pos;
	}
	}
	}
	}

	public void handleText(char[] data, int pos) {
	if (state[0] == CLASS_DATA_START && "Deprecated.".equals(new String(data))) { //NOI18N
	offset[0] = lastHRPos + 4;
	} else if (state[0] == INSIDE_DIV && "Deprecated.".equals(new String(data))) { //NOI18N
	offset[0] = offset[2];
	offset[2] = -1;
	} else if (state[0] == TEXT_START && offset[2] < 0) {
	offset[2] = pos;
	}
	}
	};

	parser.parse(reader, callback, ignoreCharset);
	callback = null;
	return offset;
	}

	/** Retrieves the position (start offset and end offset) of member javadoc info
	* in the raw html file */
	private static int[] parseMember(Reader reader, final Collection<? extends String> names, final HTMLEditorKit.Parser parser, boolean ignoreCharset) throws IOException {
	final int INIT = 0;
	// 'A' tag with the name we are looking for.
	final int A_OPEN = 1;
	// close tag of 'A'
	final int A_CLOSE = 2;
	// PRE close tag after the A_CLOSE
	final int PRE_CLOSE = 3;
	// div tag after the PRE_CLOSE
	final int INSIDE_DIV = 4;

	final int state[] = new int[1];
	final int offset[] = new int[2];

	offset[0] = -1; //start offset
	offset[1] = -1; //end offset
	state[0] = INIT;

	HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() {

	int div_counter = 0;
	int dl_counter = 0;
	int li_counter = 0;
	int hrPos = -1;
	boolean startWithNextText;

	@Override
	public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
	if (t == HTML.Tag.HR && state[0] == PRE_CLOSE){
	hrPos = pos;
	}
	}

	@Override
	public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {
	if (t == HTML.Tag.A) {
	String attrName = (String)a.getAttribute(HTML.Attribute.NAME);
	String attrId = (String)a.getAttribute(HTML.Attribute.ID);
	if (names.contains(attrName) \|\| names.contains(attrId)){
	// we have found desired javadoc member info anchor
	state[0] = A_OPEN;
	} else {
	if ((state[0] == PRE_CLOSE) && (attrName != null \|\| attrId != null) && hrPos != -1){
	// reach the end of retrieved javadoc info
	state[0] = INIT;
	offset[1] = hrPos;
	}
	}
	} else if (t == HTML.Tag.DL && state[0] == PRE_CLOSE) {
	dl_counter++;
	} else if (t == HTML.Tag.LI && state[0] == PRE_CLOSE) {
	li_counter++;
	} else if (t == HTML.Tag.DD && state[0] == PRE_CLOSE && offset[0] < 0){
	offset[0] = pos;
	} else if (t == HTML.Tag.DIV && (state[0] == PRE_CLOSE \|\| state[0] == A_CLOSE \|\| state[0] == INSIDE_DIV)){
	state[0] = INSIDE_DIV;
	div_counter++;
	if (offset[0] < 0) {
	if (div_counter == 2) {
	offset[0] = pos;
	} else if (div_counter == 1 && a.containsAttribute(HTML.Attribute.CLASS, "block")) {
	startWithNextText = true;
	}
	}
	}

	}

	@Override
	public void handleEndTag(HTML.Tag t, int pos){
	if (t == HTML.Tag.A && state[0] == A_OPEN){
	state[0] = A_CLOSE;
	} else if (t == HTML.Tag.PRE && state[0] == A_CLOSE){
	state[0] = PRE_CLOSE;
	} else if (t == HTML.Tag.DL && state[0] == PRE_CLOSE) {
	if (--dl_counter == 0)
	hrPos = pos;
	} else if (t == HTML.Tag.LI && state[0] == PRE_CLOSE) {
	if (--li_counter < 0)
	hrPos = pos;
	} else if (t == HTML.Tag.DIV && state[0] == INSIDE_DIV) {
	if (--div_counter == 0) {
	state[0] = PRE_CLOSE;
	hrPos = pos;
	}
	}
	}

	@Override
	public void handleText(char[] data, int pos) {
	if (startWithNextText) {
	startWithNextText = false;
	if (offset[0] < 0) {
	offset[0] = pos;
	}
	}
	}

	@Override
	public void handleComment(char[] data, int pos){
	String comment = String.valueOf(data);
	if (comment!=null){
	if (comment.indexOf("END OF CLASS DATA")>0){ //NOI18N
	if ((state[0] == PRE_CLOSE) && hrPos != -1){
	// reach the end of retrieved javadoc info
	state[0] = INIT;
	offset[1] = hrPos;
	}
	}
	}
	}
	};

	parser.parse(reader, callback, ignoreCharset);
	callback = null;
	return offset;
	}

	/** Retrieves the position (start offset and end offset) of member javadoc info
	* in the raw html file */
	private static int[] parsePackage(Reader reader, final HTMLEditorKit.Parser parser, boolean ignoreCharset) throws IOException {
	final String name = "package_description"; //NOI18N
	final int INIT = 0;
	// 'A' tag with the name we are looking for.
	final int A_OPEN = 1;

	final int state[] = new int[1];
	final int offset[] = new int[2];

	offset[0] = -1; //start offset
	offset[1] = -1; //end offset
	state[0] = INIT;

	HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback() {

	int hrPos = -1;

	public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) {
	if (t == HTML.Tag.HR && state[0]!=INIT){
	if (state[0] == A_OPEN){
	hrPos = pos;
	offset[1] = pos;
	}
	}
	}

	public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) {

	if (t == HTML.Tag.A) {
	String attrName = (String)a.getAttribute(HTML.Attribute.NAME);
	if (name.equals(attrName)){
	// we have found desired javadoc member info anchor
	state[0] = A_OPEN;
	offset[0] = pos;
	} else {
	if (state[0] == A_OPEN && attrName!=null){
	// reach the end of retrieved javadoc info
	state[0] = INIT;
	offset[1] = (hrPos!=-1) ? hrPos : pos;
	}
	}
	}
	}
	};

	parser.parse(reader, callback, ignoreCharset);
	callback = null;
	return offset;
	}

	}