blob: 0bd5ff6d45212c88cdbd7e5eb8d34a087ae4db31 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# HG changeset patch
# Parent d4c267a15aa0ac57d2f00ccd68eb46eb463738a7
Minimal changes to integrate html parser with netbeans infrastructure
- create a unique version
- disable the HotSpot workaound, as we need the detailed transition informations
- track the global position of the parser
- make startTage and end Tag overrideable in TreeBuilder
diff --git a/pom.xml b/pom.xml
--- a/pom.xml
+++ b/pom.xml
@@ -20,17 +20,17 @@
* DEALINGS IN THE SOFTWARE.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>nu.validator.htmlparser</groupId>
<artifactId>htmlparser</artifactId>
<packaging>bundle</packaging>
- <version>1.4</version>
+ <version>1.4.20190624</version>
<name>htmlparser</name>
<url>http://about.validator.nu/htmlparser/</url>
<description>The Validator.nu HTML Parser is an implementation of the HTML5 parsing algorithm in Java for applications. The parser is designed to work as a drop-in replacement for the XML parser in applications that already support XHTML 1.x content with an XML parser and use SAX, DOM or XOM to interface with the parser.</description>
<!--
Usage notes for this POM:
To build without signing, run:
mvn clean source:jar javadoc:jar repository:bundle-create
@@ -68,16 +68,17 @@
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<connection>scm:hg:http://hg.mozilla.org/projects/htmlparser/</connection>
<url>http://hg.mozilla.org/projects/htmlparser/</url>
</scm>
<build>
+ <!--<sourceDirectory>${basedir}/src</sourceDirectory>-->
<sourceDirectory>${project.build.directory}/src</sourceDirectory>
<testSourceDirectory>${basedir}/test-src</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.5</source>
@@ -108,17 +109,17 @@
<delete dir="${project.build.sourceDirectory}"/>
<mkdir dir="${project.build.sourceDirectory}"/>
<copy todir="${project.build.sourceDirectory}">
<fileset dir="${basedir}/src"/>
</copy>
</target>
</configuration>
</execution>
- <execution>
+<!-- <execution>
<id>tokenizer-hotspot-workaround</id>
<phase>process-sources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<property name="translator.sources" value="${basedir}/translator-src"/>
@@ -129,17 +130,17 @@
<classpath>
<pathelement location="${translator.classes}"/>
</classpath>
<arg value="${project.build.sourceDirectory}/nu/validator/htmlparser/impl/Tokenizer.java"/>
<arg value="${project.build.sourceDirectory}/nu/validator/htmlparser/impl/HotSpotWorkaround.txt"/>
</java>
</target>
</configuration>
- </execution>
+ </execution>-->
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
diff --git a/src/nu/validator/htmlparser/impl/Tokenizer.java b/src/nu/validator/htmlparser/impl/Tokenizer.java
--- a/src/nu/validator/htmlparser/impl/Tokenizer.java
+++ b/src/nu/validator/htmlparser/impl/Tokenizer.java
@@ -504,16 +504,19 @@ public class Tokenizer implements Locato
private final boolean newAttributesEachTime;
private boolean shouldSuspend;
protected boolean confident;
private int line;
+ //holds the offset of the current buffer relative to the beginning of the input source
+ protected int currentBufferGlobalOffset;
+
/*
* The line number of the current attribute. First set to the line of the
* attribute name and if there is a value, set to the line the value
* started on.
*/
// CPPONLY: private int attributeLine;
private Interner interner;
@@ -1371,16 +1374,18 @@ public class Tokenizer implements Locato
initializeWithoutStarting();
tokenHandler.startTokenization(this);
// [NOCPP[
startErrorReporting();
// ]NOCPP]
}
public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
+ setTransitionBaseOffset(currentBufferGlobalOffset);
+
int state = stateSave;
int returnState = returnStateSave;
char c = '\u0000';
shouldSuspend = false;
lastCR = false;
int start = buffer.getStart();
int end = buffer.getEnd();
@@ -1443,16 +1448,21 @@ public class Tokenizer implements Locato
end);
// ]NOCPP]
if (pos == end) {
// exiting due to end of buffer
buffer.setStart(pos);
} else {
buffer.setStart(pos + 1);
}
+
+ if(! buffer.hasMore()) {
+ currentBufferGlobalOffset += buffer.getEnd();
+ }
+
return lastCR;
}
// [NOCPP[
private void ensureBufferSpace(int inputLength) throws SAXException {
// Add 2 to account for emissions of LT_GT, LT_SOLIDUS and RSQB_RSQB.
// Adding to the general worst case instead of only the
// TreeBuilder-exposed worst case to avoid re-introducing a bug when
@@ -6924,16 +6934,17 @@ public class Tokenizer implements Locato
html4 = false;
metaBoundaryPassed = false;
wantsComments = tokenHandler.wantsComments();
if (!newAttributesEachTime) {
attributes = new HtmlAttributes(mappingLangToXmlLang);
}
// ]NOCPP]
resetToDataState();
+ currentBufferGlobalOffset = 0;
}
protected void errGarbageAfterLtSlash() throws SAXException {
}
protected void errLtSlashGt() throws SAXException {
}
diff --git a/src/nu/validator/htmlparser/impl/TreeBuilder.java b/src/nu/validator/htmlparser/impl/TreeBuilder.java
--- a/src/nu/validator/htmlparser/impl/TreeBuilder.java
+++ b/src/nu/validator/htmlparser/impl/TreeBuilder.java
@@ -1666,17 +1666,17 @@ public abstract class TreeBuilder<T> imp
}
// [NOCPP[
idLocations.clear();
// ]NOCPP]
charBuffer = null;
end();
}
- public final void startTag(ElementName elementName,
+ public void startTag(ElementName elementName,
HtmlAttributes attributes, boolean selfClosing) throws SAXException {
flushCharacters();
// [NOCPP[
if (errorHandler != null) {
// ID uniqueness
@IdType String id = attributes.getId();
if (id != null) {
@@ -3330,17 +3330,17 @@ public abstract class TreeBuilder<T> imp
if (tokenizer.internalEncodingDeclaration(extract)) {
requestSuspension();
}
}
Portability.releaseString(extract);
}
}
- public final void endTag(ElementName elementName) throws SAXException {
+ public void endTag(ElementName elementName) throws SAXException {
flushCharacters();
needToDropLF = false;
int eltPos;
int group = elementName.getGroup();
@Local String name = elementName.getName();
endtagloop: for (;;) {
if (isInForeign()) {
if (stack[currentPtr].name != name) {