| Index: E:/projects/lucene/trunk/common-build.xml
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/common-build.xml (revision 561292)
|
| +++ E:/projects/lucene/trunk/common-build.xml (working copy)
|
| @@ -7,16 +7,16 @@
|
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| - |
| + |
| http://www.apache.org/licenses/LICENSE-2.0 |
| - |
| + |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| - |
| + |
| <project name="common" xmlns:artifact="antlib:org.apache.maven.artifact.ant"> |
| <description> |
| This file is designed for importing into a main build file, and not intended |
| @@ -58,6 +58,7 @@
|
| <property name="maven.dist.dir" location="dist/maven"/> |
| |
| <property name="javacc.home" location="${common.dir}"/> |
| + <property name="jflex.home" location="${common.dir}"/> |
| |
| <property name="junit.output.dir" location="${build.dir}/test"/> |
| <property name="junit.reports" location="${build.dir}/test/reports"/> |
| @@ -71,11 +72,11 @@
|
| <include name="FIND_NOTHING"/> |
| <exclude name="**/*"/> |
| </patternset> |
| - |
| + |
| <condition property="build-1-5-contrib"> |
| <equals arg1="1.5" arg2="${ant.java.version}" /> |
| </condition> |
| - |
| + |
| <property name="clover.db.dir" location="${build.dir}/test/clover/db"/> |
| <property name="clover.report.dir" location="${build.dir}/test/clover/reports"/> |
| |
| @@ -98,6 +99,12 @@
|
| /> |
| |
| <available |
| + property="jflex.present" |
| + classname="JFlex.anttask.JFlexTask" |
| + classpath="${jflex.home}/lib/JFlex.jar" |
| + /> |
| + |
| + <available |
| property="junit.present" |
| classname="junit.framework.TestCase" |
| /> |
| @@ -106,7 +113,7 @@
|
| property="maven.ant.tasks.present" |
| classname="org.apache.maven.artifact.ant.Pom" |
| /> |
| - |
| + |
| <target name="clean" |
| description="Removes contents of build and dist directories"> |
| <delete dir="${build.dir}"/> |
| @@ -129,11 +136,25 @@
|
| One or more of the JavaCC .jj files is newer than its corresponding |
| .java file. Run the "javacc" target to regenerate the artifacts. |
| </echo> |
| + </target>
|
| + |
| + <target name="jflex-uptodate-check"> |
| + <uptodate property="jflex.files.uptodate"> |
| + <srcfiles dir="src" includes="**/*.jflex" /> |
| + <mapper type="glob" from="*.jflex" to="*.java"/> |
| + </uptodate> |
| </target> |
| |
| - <target name="init" depends="javacc-uptodate-check, javacc-notice"> |
| + <target name="jflex-notice" unless="jflex.files.uptodate"> |
| + <echo> |
| + One or more of the JFlex .jflex files is newer than its corresponding |
| + .java file. Run the "jflex" target to regenerate the artifacts. |
| + </echo> |
| </target> |
| |
| + <target name="init" depends="javacc-uptodate-check, javacc-notice, jflex-uptodate-check, jflex-notice"> |
| + </target> |
| + |
| <target name="javacc-check"> |
| <fail unless="javacc.present"> |
| ################################################################## |
| @@ -161,7 +182,30 @@
|
| </fail> |
| |
| </target> |
| - |
| + |
| + <target name="jflex-check"> |
| + <fail unless="jflex.present"> |
| + ################################################################## |
| + JFlex not found. |
| + JFlex Home: ${jflex.home} |
| + |
| + Please download and install JFlex from: |
| + |
| + <http://jflex.de/download.html> |
| + |
| + Then, create a build.properties file either in your home |
| + directory, or within the Lucene directory and set the jflex.home |
| + property to the path where JFlex is installed. For example, |
| + if you installed JFlex in /usr/local/java/jflex-1.4.1, then set the |
| + jflex.home property to: |
| + |
| + jflex.home=/usr/local/java/jflex-1.4.1 |
| + |
| + ################################################################## |
| + </fail> |
| + |
| + </target> |
| + |
| <target name="compile-core" depends="init, clover" |
| description="Compiles core classes"> |
| <compile |
| @@ -179,12 +223,12 @@
|
| description="Packages the JAR file"> |
| <jarify /> |
| </target> |
| - |
| + |
| <target name="maven.ant.tasks-check"> |
| <fail unless="maven.ant.tasks.present"> |
| ################################################################## |
| Maven ant tasks not found. |
| - Please make sure the maven-ant-tasks jar is in ANT_HOME/lib, or made |
| + Please make sure the maven-ant-tasks jar is in ANT_HOME/lib, or made |
| available to Ant using other mechanisms like -lib or CLASSPATH. |
| ################################################################## |
| </fail> |
| @@ -196,7 +240,7 @@
|
| <attribute name="pom.xml" default="${pom.xml}"/> |
| <sequential> |
| <copy file="@{pom.xml}" tofile="${build.dir}/@{pom.xml}"> |
| - <filterset begintoken="@" endtoken="@"> |
| + <filterset begintoken="@" endtoken="@"> |
| <filter token="version" value="${version}"/> |
| </filterset> |
| </copy> |
| @@ -206,9 +250,9 @@
|
| <pom refid="maven.project"/> |
| </artifact:install> |
| </sequential> |
| - </macrodef> |
| - |
| - |
| + </macrodef> |
| + |
| + |
| <macrodef name="jarify" description="Builds a JAR file"> |
| <attribute name="title" default="Lucene Search Engine: ${ant.project.name}" /> |
| <element name="manifest-attributes" optional="yes"/> |
| @@ -218,7 +262,7 @@
|
| outputproperty="svnversion" failifexecutionfails="false"> |
| <arg line="."/> |
| </exec> |
| - |
| + |
| <jar |
| destfile="${build.dir}/${final.name}.jar" |
| basedir="${build.dir}/classes/java"> |
| @@ -232,14 +276,14 @@
|
| --> |
| <!-- Don't set 'Manifest-Version' it identifies the version of the |
| manifest file format, and should allways be 1.0 (the default) |
| - |
| - Don't set 'Created-by' attribute, it's purpose is |
| + |
| + Don't set 'Created-by' attribute, it's purpose is |
| to identify the version of java used to build the jar, |
| which ant will do by default. |
| - |
| + |
| Ant will happily override these with bogus strings if you |
| tell it to, so don't. |
| - |
| + |
| NOTE: we don't use section info because all of our manifest data |
| applies to the entire jar/war ... no package specific info. |
| --> |
| @@ -254,9 +298,9 @@
|
| value="${version} ${svnversion} - ${user.name} - ${DSTAMP} ${TSTAMP}"/> |
| <attribute name="Implementation-Vendor" |
| value="The Apache Software Foundation"/> |
| - <attribute name="X-Compile-Source-JDK" |
| + <attribute name="X-Compile-Source-JDK" |
| value="${javac.source}"/> |
| - <attribute name="X-Compile-Target-JDK" |
| + <attribute name="X-Compile-Target-JDK" |
| value="${javac.target}"/> |
| <manifest-attributes/> |
| </manifest> |
| @@ -315,7 +359,7 @@
|
| anywhere. |
| --> |
| <sysproperty key="lucene.common.dir" file="${common.dir}" /> |
| - |
| + |
| <!-- contrib/ant IndexTaskTest needs these two system properties --> |
| <sysproperty key="docs.dir" file="src/test"/> |
| <sysproperty key="index.dir" file="${build.dir}/test/index"/> |
| @@ -339,7 +383,7 @@
|
| <fail if="tests.failed">Tests failed!</fail> |
| <!-- life would be easier if echo had an 'if' attribute like fail --> |
| <delete file="${build.dir}/test/junitfailed.flag" /> |
| - |
| + |
| </target> |
| |
| <!-- |
| Index: E:/projects/lucene/trunk/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java (working copy)
|
| @@ -23,89 +23,161 @@
|
| |
| public class TestStandardAnalyzer extends TestCase { |
| |
| - public void assertAnalyzesTo(Analyzer a, String input, String[] expected) throws Exception { |
| - TokenStream ts = a.tokenStream("dummy", new StringReader(input)); |
| - for (int i = 0; i < expected.length; i++) { |
| - Token t = ts.next(); |
| - assertNotNull(t); |
| - assertEquals(expected[i], t.termText()); |
| + private Analyzer a = new StandardAnalyzer(); |
| + |
| + public void assertAnalyzesTo(Analyzer a, String input, String[] expected) throws Exception { |
| + assertAnalyzesTo(a, input, expected, null); |
| } |
| - assertNull(ts.next()); |
| - ts.close(); |
| - } |
| |
| + public void assertAnalyzesTo(Analyzer a, String input, String[] expectedImages, String[] expectedTypes) throws Exception { |
| + TokenStream ts = a.tokenStream("dummy", new StringReader(input)); |
| + for (int i = 0; i < expectedImages.length; i++) { |
| + Token t = ts.next(); |
| + assertNotNull(t); |
| + assertEquals(expectedImages[i], t.termText()); |
| + if (expectedTypes != null) |
| + { |
| + assertEquals(expectedTypes[i], t.type()); |
| + } |
| + } |
| + assertNull(ts.next()); |
| + ts.close(); |
| + } |
| |
| - public void testStandard() throws Exception { |
| - Analyzer a = new StandardAnalyzer(); |
| |
| - // alphanumeric tokens |
| - assertAnalyzesTo(a, "B2B", new String[]{"b2b"}); |
| - assertAnalyzesTo(a, "2B", new String[]{"2b"}); |
| + public void testAlphanumeric() throws Exception { |
| + // alphanumeric tokens |
| + assertAnalyzesTo(a, "B2B", new String[]{"b2b"}); |
| + assertAnalyzesTo(a, "2B", new String[]{"2b"}); |
| + } |
| |
| - // underscores are delimiters, but not in email addresses (below) |
| - assertAnalyzesTo(a, "word_having_underscore", new String[]{"word", "having", "underscore"}); |
| - assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new String[]{"word", "underscore", "stopwords"}); |
| + public void testUnderscores() throws Exception { |
| + // underscores are delimiters, but not in email addresses (below) |
| + assertAnalyzesTo(a, "word_having_underscore", new String[]{"word", "having", "underscore"}); |
| + assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new String[]{"word", "underscore", "stopwords"}); |
| + } |
| |
| - // other delimiters: "-", "/", "," |
| - assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase" }); |
| - assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"}); |
| - assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"}); |
| + public void testDelimiters() throws Exception { |
| + // other delimiters: "-", "/", "," |
| + assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase" }); |
| + assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"}); |
| + assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"}); |
| + } |
| |
| - // internal apostrophes: O'Reilly, you're, O'Reilly's |
| - // possessives are actually removed by StardardFilter, not the tokenizer |
| - assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"}); |
| - assertAnalyzesTo(a, "you're", new String[]{"you're"}); |
| - assertAnalyzesTo(a, "she's", new String[]{"she"}); |
| - assertAnalyzesTo(a, "Jim's", new String[]{"jim"}); |
| - assertAnalyzesTo(a, "don't", new String[]{"don't"}); |
| - assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly"}); |
| + public void testApostrophes() throws Exception { |
| + // internal apostrophes: O'Reilly, you're, O'Reilly's |
| + // possessives are actually removed by StardardFilter, not the tokenizer |
| + assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"}); |
| + assertAnalyzesTo(a, "you're", new String[]{"you're"}); |
| + assertAnalyzesTo(a, "she's", new String[]{"she"}); |
| + assertAnalyzesTo(a, "Jim's", new String[]{"jim"}); |
| + assertAnalyzesTo(a, "don't", new String[]{"don't"}); |
| + assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly"}); |
| + } |
| |
| - // t and s had been stopwords in Lucene <= 2.0, which made it impossible |
| - // to correctly search for these terms: |
| - assertAnalyzesTo(a, "s-class", new String[]{"s", "class"}); |
| - assertAnalyzesTo(a, "t-com", new String[]{"t", "com"}); |
| - // 'a' is still a stopword: |
| - assertAnalyzesTo(a, "a-class", new String[]{"class"}); |
| + public void testTSADash() throws Exception { |
| + // t and s had been stopwords in Lucene <= 2.0, which made it impossible |
| + // to correctly search for these terms: |
| + assertAnalyzesTo(a, "s-class", new String[]{"s", "class"}); |
| + assertAnalyzesTo(a, "t-com", new String[]{"t", "com"}); |
| + // 'a' is still a stopword: |
| + assertAnalyzesTo(a, "a-class", new String[]{"class"}); |
| + } |
| |
| - // company names |
| - assertAnalyzesTo(a, "AT&T", new String[]{"at&t"}); |
| - assertAnalyzesTo(a, "Excite@Home", new String[]{"excite@home"}); |
| + public void testCompanyNames() throws Exception { |
| + // company names |
| + assertAnalyzesTo(a, "AT&T", new String[]{"at&t"}); |
| + assertAnalyzesTo(a, "Excite@Home", new String[]{"excite@home"}); |
| + } |
| |
| - // domain names |
| - assertAnalyzesTo(a, "www.nutch.org", new String[]{"www.nutch.org" }); |
| + public void testDomainNames() throws Exception { |
| + // domain names |
| + assertAnalyzesTo(a, "www.nutch.org", new String[]{"www.nutch.org" }); |
| + } |
| |
| - // email addresses, possibly with underscores, periods, etc |
| - assertAnalyzesTo(a, "test@example.com", new String[]{"test@example.com"}); |
| - assertAnalyzesTo(a, "first.lastname@example.com", new String[]{"first.lastname@example.com"}); |
| - assertAnalyzesTo(a, "first_lastname@example.com", new String[]{"first_lastname@example.com"}); |
| + public void testEMailAddresses() throws Exception { |
| + // email addresses, possibly with underscores, periods, etc |
| + assertAnalyzesTo(a, "test@example.com", new String[]{"test@example.com"}); |
| + assertAnalyzesTo(a, "first.lastname@example.com", new String[]{"first.lastname@example.com"}); |
| + assertAnalyzesTo(a, "first_lastname@example.com", new String[]{"first_lastname@example.com"}); |
| + } |
| |
| - // floating point, serial, model numbers, ip addresses, etc. |
| - // every other segment must have at least one digit |
| - assertAnalyzesTo(a, "21.35", new String[]{"21.35"}); |
| - assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"}); |
| - assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"}); |
| - assertAnalyzesTo(a, "1-2-3", new String[]{"1-2-3"}); |
| - assertAnalyzesTo(a, "a1-b2-c3", new String[]{"a1-b2-c3"}); |
| - assertAnalyzesTo(a, "a1-b-c3", new String[]{"a1-b-c3"}); |
| + public void testNumeric() throws Exception { |
| + // floating point, serial, model numbers, ip addresses, etc. |
| + // every other segment must have at least one digit |
| + assertAnalyzesTo(a, "21.35", new String[]{"21.35"}); |
| + assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"}); |
| + assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"}); |
| + assertAnalyzesTo(a, "1-2-3", new String[]{"1-2-3"}); |
| + assertAnalyzesTo(a, "a1-b2-c3", new String[]{"a1-b2-c3"}); |
| + assertAnalyzesTo(a, "a1-b-c3", new String[]{"a1-b-c3"}); |
| + } |
| |
| - // numbers |
| - assertAnalyzesTo(a, "David has 5000 bones", new String[]{"david", "has", "5000", "bones"}); |
| + public void testTextWithNumbers() throws Exception { |
| + // numbers |
| + assertAnalyzesTo(a, "David has 5000 bones", new String[]{"david", "has", "5000", "bones"}); |
| + } |
| |
| - // various |
| - assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted" }); |
| - assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"}); |
| - assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"}); |
| - assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"}); |
| + public void testVariousText() throws Exception { |
| + // various |
| + assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted" }); |
| + assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"}); |
| + assertAnalyzesTo(a, "foo bar . FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"}); |
| + assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"}); |
| + } |
| |
| - // acronyms have their dots stripped |
| - assertAnalyzesTo(a, "U.S.A.", new String[]{ "usa" }); |
| + public void testAcronyms() throws Exception { |
| + // acronyms have their dots stripped |
| + assertAnalyzesTo(a, "U.S.A.", new String[]{ "usa" }); |
| + } |
| |
| - // It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens. |
| - assertAnalyzesTo(a, "C++", new String[]{"c"}); |
| - assertAnalyzesTo(a, "C#", new String[]{"c"}); |
| + public void testCPlusPlusHash() throws Exception { |
| + // It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens. |
| + assertAnalyzesTo(a, "C++", new String[]{"c"}); |
| + assertAnalyzesTo(a, "C#", new String[]{"c"}); |
| + } |
| |
| - // Korean words |
| - assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"}); |
| + public void testKorean() throws Exception { |
| + // Korean words |
| + assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"}); |
| + } |
| |
| - } |
| + // Compliance with the "old" JavaCC-based analyzer, see: |
| + // https://issues.apache.org/jira/browse/LUCENE-966#action_12516752 |
| + |
| + public void testComplianceFileName() throws Exception { |
| + assertAnalyzesTo(a, "2004.jpg", |
| + new String[] { "2004.jpg" }, |
| + new String[] { "<HOST>" }); |
| + } |
| + |
| + public void testComplianceNumericIncorrect() throws Exception { |
| + // The original analyzer produced a <HOST> token type |
| + // for this (even though it looks more like a <NUM>) |
| + assertAnalyzesTo(a, "62.46,37004,37009,type", |
| + new String[] { "62.46,37004,37009,type" }, |
| + new String[] { "<NUM>" }); |
| + } |
| + |
| + public void testComplianceNumericLong() throws Exception { |
| + assertAnalyzesTo(a, "978-0-94045043-1,86408,86424,type", |
| + new String[] { "978-0-94045043-1,86408,86424,type" }, |
| + new String[] { "<NUM>" }); |
| + } |
| + |
| + public void testComplianceNumericFile() throws Exception { |
| + assertAnalyzesTo( |
| + a, |
| + "78academyawards/rules/rule02.html,7194,7227,type", |
| + new String[] { "78academyawards/rules/rule02.html,7194,7227,type" }, |
| + new String[] { "<NUM>" }); |
| + } |
| + |
| + public void testComplianceNumericWithUnderscores() throws Exception { |
| + assertAnalyzesTo( |
| + a, |
| + "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs,2076,2123,type", |
| + new String[] { "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs,2076,2123,type" }, |
| + new String[] { "<NUM>" }); |
| + } |
| } |
|
|
| Property changes on: E:\projects\lucene\trunk\src\java\org\apache\lucene\analysis\standard
|
| ___________________________________________________________________
|
| Name: svn:ignore
|
| - Token.java |
| StandardTokenizer.java |
| StandardTokenizerTokenManager.java |
| TokenMgrError.java |
| CharStream.java |
| StandardTokenizerConstants.java
|
| + Token.java |
| StandardTokenizer.java |
| StandardTokenizerTokenManager.java |
| TokenMgrError.java |
| CharStream.java |
| StandardTokenizerConstants.java |
|
|
|
|
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/Token.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/Token.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/Token.java (working copy)
|
| @@ -1,81 +0,0 @@
|
| -/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */ |
| -package org.apache.lucene.analysis.standard; |
| - |
| -/** |
| - * Describes the input token stream. |
| - */ |
| - |
| -public class Token { |
| - |
| - /** |
| - * An integer that describes the kind of this token. This numbering |
| - * system is determined by JavaCCParser, and a table of these numbers is |
| - * stored in the file ...Constants.java. |
| - */ |
| - public int kind; |
| - |
| - /** |
| - * beginLine and beginColumn describe the position of the first character |
| - * of this token; endLine and endColumn describe the position of the |
| - * last character of this token. |
| - */ |
| - public int beginLine, beginColumn, endLine, endColumn; |
| - |
| - /** |
| - * The string image of the token. |
| - */ |
| - public String image; |
| - |
| - /** |
| - * A reference to the next regular (non-special) token from the input |
| - * stream. If this is the last token from the input stream, or if the |
| - * token manager has not read tokens beyond this one, this field is |
| - * set to null. This is true only if this token is also a regular |
| - * token. Otherwise, see below for a description of the contents of |
| - * this field. |
| - */ |
| - public Token next; |
| - |
| - /** |
| - * This field is used to access special tokens that occur prior to this |
| - * token, but after the immediately preceding regular (non-special) token. |
| - * If there are no such special tokens, this field is set to null. |
| - * When there are more than one such special token, this field refers |
| - * to the last of these special tokens, which in turn refers to the next |
| - * previous special token through its specialToken field, and so on |
| - * until the first special token (whose specialToken field is null). |
| - * The next fields of special tokens refer to other special tokens that |
| - * immediately follow it (without an intervening regular token). If there |
| - * is no such token, this field is null. |
| - */ |
| - public Token specialToken; |
| - |
| - /** |
| - * Returns the image. |
| - */ |
| - public String toString() |
| - { |
| - return image; |
| - } |
| - |
| - /** |
| - * Returns a new Token object, by default. However, if you want, you |
| - * can create and return subclass objects based on the value of ofKind. |
| - * Simply add the cases to the switch for all those special cases. |
| - * For example, if you have a subclass of Token called IDToken that |
| - * you want to create if ofKind is ID, simlpy add something like : |
| - * |
| - * case MyParserConstants.ID : return new IDToken(); |
| - * |
| - * to the following switch statement. Then you can cast matchedToken |
| - * variable to the appropriate type and use it in your lexical actions. |
| - */ |
| - public static final Token newToken(int ofKind) |
| - { |
| - switch(ofKind) |
| - { |
| - default : return new Token(); |
| - } |
| - } |
| - |
| -} |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/TokenMgrError.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/TokenMgrError.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/TokenMgrError.java (working copy)
|
| @@ -1,133 +0,0 @@
|
| -/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */ |
| -package org.apache.lucene.analysis.standard; |
| - |
| -public class TokenMgrError extends Error |
| -{ |
| - /* |
| - * Ordinals for various reasons why an Error of this type can be thrown. |
| - */ |
| - |
| - /** |
| - * Lexical error occured. |
| - */ |
| - static final int LEXICAL_ERROR = 0; |
| - |
| - /** |
| - * An attempt wass made to create a second instance of a static token manager. |
| - */ |
| - static final int STATIC_LEXER_ERROR = 1; |
| - |
| - /** |
| - * Tried to change to an invalid lexical state. |
| - */ |
| - static final int INVALID_LEXICAL_STATE = 2; |
| - |
| - /** |
| - * Detected (and bailed out of) an infinite loop in the token manager. |
| - */ |
| - static final int LOOP_DETECTED = 3; |
| - |
| - /** |
| - * Indicates the reason why the exception is thrown. It will have |
| - * one of the above 4 values. |
| - */ |
| - int errorCode; |
| - |
| - /** |
| - * Replaces unprintable characters by their espaced (or unicode escaped) |
| - * equivalents in the given string |
| - */ |
| - protected static final String addEscapes(String str) { |
| - StringBuffer retval = new StringBuffer(); |
| - char ch; |
| - for (int i = 0; i < str.length(); i++) { |
| - switch (str.charAt(i)) |
| - { |
| - case 0 : |
| - continue; |
| - case '\b': |
| - retval.append("\\b"); |
| - continue; |
| - case '\t': |
| - retval.append("\\t"); |
| - continue; |
| - case '\n': |
| - retval.append("\\n"); |
| - continue; |
| - case '\f': |
| - retval.append("\\f"); |
| - continue; |
| - case '\r': |
| - retval.append("\\r"); |
| - continue; |
| - case '\"': |
| - retval.append("\\\""); |
| - continue; |
| - case '\'': |
| - retval.append("\\\'"); |
| - continue; |
| - case '\\': |
| - retval.append("\\\\"); |
| - continue; |
| - default: |
| - if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { |
| - String s = "0000" + Integer.toString(ch, 16); |
| - retval.append("\\u" + s.substring(s.length() - 4, s.length())); |
| - } else { |
| - retval.append(ch); |
| - } |
| - continue; |
| - } |
| - } |
| - return retval.toString(); |
| - } |
| - |
| - /** |
| - * Returns a detailed message for the Error when it is thrown by the |
| - * token manager to indicate a lexical error. |
| - * Parameters : |
| - * EOFSeen : indicates if EOF caused the lexicl error |
| - * curLexState : lexical state in which this error occured |
| - * errorLine : line number when the error occured |
| - * errorColumn : column number when the error occured |
| - * errorAfter : prefix that was seen before this error occured |
| - * curchar : the offending character |
| - * Note: You can customize the lexical error message by modifying this method. |
| - */ |
| - protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { |
| - return("Lexical error at line " + |
| - errorLine + ", column " + |
| - errorColumn + ". Encountered: " + |
| - (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + |
| - "after : \"" + addEscapes(errorAfter) + "\""); |
| - } |
| - |
| - /** |
| - * You can also modify the body of this method to customize your error messages. |
| - * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not |
| - * of end-users concern, so you can return something like : |
| - * |
| - * "Internal Error : Please file a bug report .... " |
| - * |
| - * from this method for such cases in the release version of your parser. |
| - */ |
| - public String getMessage() { |
| - return super.getMessage(); |
| - } |
| - |
| - /* |
| - * Constructors of various flavors follow. |
| - */ |
| - |
| - public TokenMgrError() { |
| - } |
| - |
| - public TokenMgrError(String message, int reason) { |
| - super(message); |
| - errorCode = reason; |
| - } |
| - |
| - public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { |
| - this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); |
| - } |
| -} |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (revision 0)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (revision 0)
|
| @@ -0,0 +1,107 @@
|
| +package org.apache.lucene.analysis.standard;
|
| +%%
|
| +
|
| +%class StandardTokenizerImpl
|
| +%unicode
|
| +%integer
|
| +%function getNextToken
|
| +%pack
|
| +%char
|
| +
|
| +%{
|
| +
|
| +public static final int ALPHANUM = 0;
|
| +public static final int APOSTROPHE = 1;
|
| +public static final int ACRONYM = 2;
|
| +public static final int COMPANY = 3;
|
| +public static final int EMAIL = 4;
|
| +public static final int HOST = 5;
|
| +public static final int NUM = 6;
|
| +public static final int CJ = 7;
|
| +
|
| +public static final String [] TOKEN_TYPES = new String [] {
|
| + "<ALPHANUM>",
|
| + "<APOSTROPHE>",
|
| + "<ACRONYM>",
|
| + "<COMPANY>",
|
| + "<EMAIL>",
|
| + "<HOST>",
|
| + "<NUM>",
|
| + "<CJ>"
|
| +};
|
| +
|
| +public final int yychar()
|
| +{
|
| + return yychar;
|
| +}
|
| +%}
|
| +
|
| +// basic word: a sequence of digits & letters
|
| +ALPHANUM = ({LETTER}|{DIGIT}|{KOREAN})+
|
| +
|
| +// internal apostrophes: O'Reilly, you're, O'Reilly's
|
| +// use a post-filter to remove possesives
|
| +APOSTROPHE = {ALPHA} ("'" {ALPHA})+
|
| +
|
| +// acronyms: U.S.A., I.B.M., etc.
|
| +// use a post-filter to remove dots
|
| +ACRONYM = {ALPHA} "." ({ALPHA} ".")+
|
| +
|
| +// company names like AT&T and Excite@Home.
|
| +COMPANY = {ALPHA} ("&"|"@") {ALPHA}
|
| +
|
| +// email addresses
|
| +EMAIL = {ALPHANUM} (("."|"-"|"_") {ALPHANUM})* "@" {ALPHANUM} (("."|"-") {ALPHANUM})+
|
| +
|
| +// hostname
|
| +HOST = {ALPHANUM} ("." {ALPHANUM})+
|
| +
|
| +// floating point, serial, model numbers, ip addresses, etc.
|
| +// every other segment must have at least one digit
|
| +/*
|
| +NUM = ({ALPHANUM} {P} {HAS_DIGIT}
|
| + | {HAS_DIGIT} {P} {ALPHANUM}
|
| + | {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+
|
| + | {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
|
| + | {ALPHANUM} {P} {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
|
| + | {HAS_DIGIT} {P} {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+)
|
| +*/
|
| +
|
| +NUM = ({P} ({HAS_DIGIT} | {ALPHANUM}))* {HAS_DIGIT} ({P} ({HAS_DIGIT} | {ALPHANUM}))*
|
| +
|
| +// punctuation
|
| +P = ("_"|"-"|"/"|"."|",")
|
| +
|
| +// at least one digit
|
| +HAS_DIGIT =
|
| + ({LETTER}|{DIGIT})*
|
| + {DIGIT}
|
| + ({LETTER}|{DIGIT})*
|
| +
|
| +ALPHA = ({LETTER})+
|
| +
|
| +
|
| +LETTER = [\u0041-\u005a\u0061-\u007a\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u00ff\u0100-\u1fff\uffa0-\uffdc]
|
| +
|
| +DIGIT = [\u0030-\u0039\u0660-\u0669\u06f0-\u06f9\u0966-\u096f\u09e6-\u09ef\u0a66-\u0a6f\u0ae6-\u0aef\u0b66-\u0b6f\u0be7-\u0bef\u0c66-\u0c6f\u0ce6-\u0cef\u0d66-\u0d6f\u0e50-\u0e59\u0ed0-\u0ed9\u1040-\u1049]
|
| +
|
| +KOREAN = [\uac00-\ud7af\u1100-\u11ff]
|
| +
|
| +// Chinese, Japanese
|
| +CJ = [\u3040-\u318f\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
|
| +
|
| +WHITESPACE = \r\n | [ \r\n\t\f]
|
| +
|
| +%%
|
| +
|
| +{ALPHANUM} { return ALPHANUM; }
|
| +{HOST} { return HOST; }
|
| +{NUM} { return NUM; }
|
| +{APOSTROPHE} { return APOSTROPHE; }
|
| +{ACRONYM} { return ACRONYM; }
|
| +{COMPANY} { return COMPANY; }
|
| +{EMAIL} { return EMAIL; }
|
| +{CJ} { return CJ; }
|
| +
|
| +/** Ignore the rest */
|
| +. | {WHITESPACE} { /* ignore */ }
|
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardFilter.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardFilter.java (working copy)
|
| @@ -17,12 +17,12 @@
|
| * limitations under the License. |
| */ |
| |
| -import org.apache.lucene.analysis.*; |
| +import org.apache.lucene.analysis.TokenFilter; |
| +import org.apache.lucene.analysis.TokenStream; |
| |
| /** Normalizes tokens extracted with {@link StandardTokenizer}. */ |
| |
| -public final class StandardFilter extends TokenFilter |
| - implements StandardTokenizerConstants { |
| +public final class StandardFilter extends TokenFilter { |
| |
| |
| /** Construct filtering <i>in</i>. */ |
| @@ -30,9 +30,9 @@
|
| super(in); |
| } |
| |
| - private static final String APOSTROPHE_TYPE = tokenImage[APOSTROPHE]; |
| - private static final String ACRONYM_TYPE = tokenImage[ACRONYM]; |
| - |
| + private static final String APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE]; |
| + private static final String ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]; |
| + |
| /** Returns the next token in the stream, or null at EOS. |
| * <p>Removes <tt>'s</tt> from the end of words. |
| * <p>Removes dots from acronyms. |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/FastCharStream.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/FastCharStream.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/FastCharStream.java (working copy)
|
| @@ -1,122 +0,0 @@
|
| -// FastCharStream.java |
| -package org.apache.lucene.analysis.standard; |
| - |
| -/** |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -import java.io.*; |
| - |
| -/** An efficient implementation of JavaCC's CharStream interface. <p>Note that |
| - * this does not do line-number counting, but instead keeps track of the |
| - * character position of the token in the input, as required by Lucene's {@link |
| - * org.apache.lucene.analysis.Token} API. */ |
| -public final class FastCharStream implements CharStream { |
| - char[] buffer = null; |
| - |
| - int bufferLength = 0; // end of valid chars |
| - int bufferPosition = 0; // next char to read |
| - |
| - int tokenStart = 0; // offset in buffer |
| - int bufferStart = 0; // position in file of buffer |
| - |
| - Reader input; // source of chars |
| - |
| - /** Constructs from a Reader. */ |
| - public FastCharStream(Reader r) { |
| - input = r; |
| - } |
| - |
| - public final char readChar() throws IOException { |
| - if (bufferPosition >= bufferLength) |
| - refill(); |
| - return buffer[bufferPosition++]; |
| - } |
| - |
| - private final void refill() throws IOException { |
| - int newPosition = bufferLength - tokenStart; |
| - |
| - if (tokenStart == 0) { // token won't fit in buffer |
| - if (buffer == null) { // first time: alloc buffer |
| - buffer = new char[2048]; |
| - } else if (bufferLength == buffer.length) { // grow buffer |
| - char[] newBuffer = new char[buffer.length*2]; |
| - System.arraycopy(buffer, 0, newBuffer, 0, bufferLength); |
| - buffer = newBuffer; |
| - } |
| - } else { // shift token to front |
| - System.arraycopy(buffer, tokenStart, buffer, 0, newPosition); |
| - } |
| - |
| - bufferLength = newPosition; // update state |
| - bufferPosition = newPosition; |
| - bufferStart += tokenStart; |
| - tokenStart = 0; |
| - |
| - int charsRead = // fill space in buffer |
| - input.read(buffer, newPosition, buffer.length-newPosition); |
| - if (charsRead == -1) |
| - throw new IOException("read past eof"); |
| - else |
| - bufferLength += charsRead; |
| - } |
| - |
| - public final char BeginToken() throws IOException { |
| - tokenStart = bufferPosition; |
| - return readChar(); |
| - } |
| - |
| - public final void backup(int amount) { |
| - bufferPosition -= amount; |
| - } |
| - |
| - public final String GetImage() { |
| - return new String(buffer, tokenStart, bufferPosition - tokenStart); |
| - } |
| - |
| - public final char[] GetSuffix(int len) { |
| - char[] value = new char[len]; |
| - System.arraycopy(buffer, bufferPosition - len, value, 0, len); |
| - return value; |
| - } |
| - |
| - public final void Done() { |
| - try { |
| - input.close(); |
| - } catch (IOException e) { |
| - System.err.println("Caught: " + e + "; ignoring."); |
| - } |
| - } |
| - |
| - public final int getColumn() { |
| - return bufferStart + bufferPosition; |
| - } |
| - public final int getLine() { |
| - return 1; |
| - } |
| - public final int getEndColumn() { |
| - return bufferStart + bufferPosition; |
| - } |
| - public final int getEndLine() { |
| - return 1; |
| - } |
| - public final int getBeginColumn() { |
| - return bufferStart + tokenStart; |
| - } |
| - public final int getBeginLine() { |
| - return 1; |
| - } |
| -} |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java (working copy)
|
| @@ -1,1233 +0,0 @@
|
| -/* Generated By:JavaCC: Do not edit this line. StandardTokenizerTokenManager.java */ |
| -package org.apache.lucene.analysis.standard; |
| -import java.io.*; |
| - |
| -public class StandardTokenizerTokenManager implements StandardTokenizerConstants |
| -{ |
| - public java.io.PrintStream debugStream = System.out; |
| - public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; } |
| -private final int jjMoveStringLiteralDfa0_0() |
| -{ |
| - return jjMoveNfa_0(0, 0); |
| -} |
| -private final void jjCheckNAdd(int state) |
| -{ |
| - if (jjrounds[state] != jjround) |
| - { |
| - jjstateSet[jjnewStateCnt++] = state; |
| - jjrounds[state] = jjround; |
| - } |
| -} |
| -private final void jjAddStates(int start, int end) |
| -{ |
| - do { |
| - jjstateSet[jjnewStateCnt++] = jjnextStates[start]; |
| - } while (start++ != end); |
| -} |
| -private final void jjCheckNAddTwoStates(int state1, int state2) |
| -{ |
| - jjCheckNAdd(state1); |
| - jjCheckNAdd(state2); |
| -} |
| -private final void jjCheckNAddStates(int start, int end) |
| -{ |
| - do { |
| - jjCheckNAdd(jjnextStates[start]); |
| - } while (start++ != end); |
| -} |
| -private final void jjCheckNAddStates(int start) |
| -{ |
| - jjCheckNAdd(jjnextStates[start]); |
| - jjCheckNAdd(jjnextStates[start + 1]); |
| -} |
| -static final long[] jjbitVec0 = { |
| - 0xfff0000000000000L, 0xffffffffffffdfffL, 0xffffffffL, 0x600000000000000L |
| -}; |
| -static final long[] jjbitVec2 = { |
| - 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL |
| -}; |
| -static final long[] jjbitVec3 = { |
| - 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffL, 0xffff000000000000L |
| -}; |
| -static final long[] jjbitVec4 = { |
| - 0xffffffffffffffffL, 0xffffffffffffffffL, 0x0L, 0x0L |
| -}; |
| -static final long[] jjbitVec5 = { |
| - 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0x0L |
| -}; |
| -static final long[] jjbitVec6 = { |
| - 0x0L, 0xffffffe000000000L, 0xffffffffL, 0x0L |
| -}; |
| -static final long[] jjbitVec7 = { |
| - 0x20000L, 0x0L, 0xfffff00000000000L, 0x7fffffL |
| -}; |
| -static final long[] jjbitVec8 = { |
| - 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffL, 0x0L |
| -}; |
| -static final long[] jjbitVec9 = { |
| - 0xfffffffeL, 0x0L, 0x0L, 0x0L |
| -}; |
| -static final long[] jjbitVec10 = { |
| - 0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL |
| -}; |
| -static final long[] jjbitVec11 = { |
| - 0x0L, 0x0L, 0xffffffff00000000L, 0x1fffffffL |
| -}; |
| -static final long[] jjbitVec12 = { |
| - 0x1600L, 0x0L, 0x0L, 0x0L |
| -}; |
| -static final long[] jjbitVec13 = { |
| - 0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L |
| -}; |
| -static final long[] jjbitVec14 = { |
| - 0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L |
| -}; |
| -static final long[] jjbitVec15 = { |
| - 0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L |
| -}; |
| -static final long[] jjbitVec16 = { |
| - 0x0L, 0xffc000000000L, 0x0L, 0x0L |
| -}; |
| -static final long[] jjbitVec17 = { |
| - 0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L |
| -}; |
| -static final long[] jjbitVec18 = { |
| - 0x0L, 0x3ffL, 0x0L, 0x0L |
| -}; |
| -static final long[] jjbitVec19 = { |
| - 0xfffffffeL, 0x0L, 0xfffff00000000000L, 0x7fffffL |
| -}; |
| -private final int jjMoveNfa_0(int startState, int curPos) |
| -{ |
| - int[] nextStates; |
| - int startsAt = 0; |
| - jjnewStateCnt = 75; |
| - int i = 1; |
| - jjstateSet[0] = startState; |
| - int j, kind = 0x7fffffff; |
| - for (;;) |
| - { |
| - if (++jjround == 0x7fffffff) |
| - ReInitRounds(); |
| - if (curChar < 64) |
| - { |
| - long l = 1L << curChar; |
| - MatchLoop: do |
| - { |
| - switch(jjstateSet[--i]) |
| - { |
| - case 0: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - { |
| - if (kind > 1) |
| - kind = 1; |
| - jjCheckNAddStates(0, 11); |
| - } |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddStates(12, 17); |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddStates(18, 23); |
| - break; |
| - case 2: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddStates(18, 23); |
| - break; |
| - case 3: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(3, 4); |
| - break; |
| - case 4: |
| - case 5: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(5, 6); |
| - break; |
| - case 6: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAdd(7); |
| - break; |
| - case 7: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAdd(7); |
| - break; |
| - case 8: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(8, 9); |
| - break; |
| - case 9: |
| - case 10: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(10, 11); |
| - break; |
| - case 11: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAdd(12); |
| - break; |
| - case 12: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(12, 13); |
| - break; |
| - case 13: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(14, 15); |
| - break; |
| - case 14: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(14, 15); |
| - break; |
| - case 15: |
| - case 16: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(11, 16); |
| - break; |
| - case 17: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(17, 18); |
| - break; |
| - case 18: |
| - case 19: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(19, 20); |
| - break; |
| - case 20: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAdd(21); |
| - break; |
| - case 21: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(21, 22); |
| - break; |
| - case 22: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(23, 24); |
| - break; |
| - case 23: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(23, 24); |
| - break; |
| - case 24: |
| - case 25: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(25, 26); |
| - break; |
| - case 26: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAdd(27); |
| - break; |
| - case 27: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(22, 27); |
| - break; |
| - case 28: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddStates(12, 17); |
| - break; |
| - case 29: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 1) |
| - kind = 1; |
| - jjCheckNAddStates(0, 11); |
| - break; |
| - case 30: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 1) |
| - kind = 1; |
| - jjCheckNAdd(30); |
| - break; |
| - case 31: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddStates(24, 26); |
| - break; |
| - case 32: |
| - if ((0x600000000000L & l) != 0L) |
| - jjCheckNAdd(33); |
| - break; |
| - case 33: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddStates(27, 29); |
| - break; |
| - case 35: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(35, 36); |
| - break; |
| - case 36: |
| - if ((0x600000000000L & l) != 0L) |
| - jjCheckNAdd(37); |
| - break; |
| - case 37: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 5) |
| - kind = 5; |
| - jjCheckNAddTwoStates(36, 37); |
| - break; |
| - case 38: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(38, 39); |
| - break; |
| - case 39: |
| - if (curChar == 46) |
| - jjCheckNAdd(40); |
| - break; |
| - case 40: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 6) |
| - kind = 6; |
| - jjCheckNAddTwoStates(39, 40); |
| - break; |
| - case 41: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(41, 42); |
| - break; |
| - case 42: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(43, 44); |
| - break; |
| - case 43: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(43, 44); |
| - break; |
| - case 44: |
| - case 45: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAdd(45); |
| - break; |
| - case 46: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(46, 47); |
| - break; |
| - case 47: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(48, 49); |
| - break; |
| - case 48: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(48, 49); |
| - break; |
| - case 49: |
| - case 50: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(50, 51); |
| - break; |
| - case 51: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAdd(52); |
| - break; |
| - case 52: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(47, 52); |
| - break; |
| - case 53: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(53, 54); |
| - break; |
| - case 54: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(55, 56); |
| - break; |
| - case 55: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(55, 56); |
| - break; |
| - case 56: |
| - case 57: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(57, 58); |
| - break; |
| - case 58: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAdd(59); |
| - break; |
| - case 59: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(59, 60); |
| - break; |
| - case 60: |
| - if ((0xf00000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(61, 62); |
| - break; |
| - case 61: |
| - if ((0x3ff000000000000L & l) != 0L) |
| - jjCheckNAddTwoStates(61, 62); |
| - break; |
| - case 62: |
| - case 63: |
| - if ((0x3ff000000000000L & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(58, 63); |
| - break; |
| - case 66: |
| - if (curChar == 39) |
| - jjstateSet[jjnewStateCnt++] = 67; |
| - break; |
| - case 69: |
| - if (curChar == 46) |
| - jjCheckNAdd(70); |
| - break; |
| - case 71: |
| - if (curChar != 46) |
| - break; |
| - if (kind > 3) |
| - kind = 3; |
| - jjCheckNAdd(70); |
| - break; |
| - case 73: |
| - if (curChar == 38) |
| - jjstateSet[jjnewStateCnt++] = 74; |
| - break; |
| - default : break; |
| - } |
| - } while(i != startsAt); |
| - } |
| - else if (curChar < 128) |
| - { |
| - long l = 1L << (curChar & 077); |
| - MatchLoop: do |
| - { |
| - switch(jjstateSet[--i]) |
| - { |
| - case 0: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddStates(30, 35); |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - { |
| - if (kind > 1) |
| - kind = 1; |
| - jjCheckNAddStates(0, 11); |
| - } |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddStates(18, 23); |
| - break; |
| - case 2: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddStates(18, 23); |
| - break; |
| - case 3: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(3, 4); |
| - break; |
| - case 5: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjAddStates(36, 37); |
| - break; |
| - case 6: |
| - if (curChar == 95) |
| - jjCheckNAdd(7); |
| - break; |
| - case 7: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAdd(7); |
| - break; |
| - case 8: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(8, 9); |
| - break; |
| - case 10: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(10, 11); |
| - break; |
| - case 11: |
| - if (curChar == 95) |
| - jjCheckNAdd(12); |
| - break; |
| - case 12: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(12, 13); |
| - break; |
| - case 13: |
| - if (curChar == 95) |
| - jjCheckNAddTwoStates(14, 15); |
| - break; |
| - case 14: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(14, 15); |
| - break; |
| - case 16: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(11, 16); |
| - break; |
| - case 17: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(17, 18); |
| - break; |
| - case 19: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjAddStates(38, 39); |
| - break; |
| - case 20: |
| - if (curChar == 95) |
| - jjCheckNAdd(21); |
| - break; |
| - case 21: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(21, 22); |
| - break; |
| - case 22: |
| - if (curChar == 95) |
| - jjCheckNAddTwoStates(23, 24); |
| - break; |
| - case 23: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(23, 24); |
| - break; |
| - case 25: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjAddStates(40, 41); |
| - break; |
| - case 26: |
| - if (curChar == 95) |
| - jjCheckNAdd(27); |
| - break; |
| - case 27: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(22, 27); |
| - break; |
| - case 29: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 1) |
| - kind = 1; |
| - jjCheckNAddStates(0, 11); |
| - break; |
| - case 30: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 1) |
| - kind = 1; |
| - jjCheckNAdd(30); |
| - break; |
| - case 31: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddStates(24, 26); |
| - break; |
| - case 32: |
| - if (curChar == 95) |
| - jjCheckNAdd(33); |
| - break; |
| - case 33: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddStates(27, 29); |
| - break; |
| - case 34: |
| - if (curChar == 64) |
| - jjCheckNAdd(35); |
| - break; |
| - case 35: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(35, 36); |
| - break; |
| - case 37: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 5) |
| - kind = 5; |
| - jjCheckNAddTwoStates(36, 37); |
| - break; |
| - case 38: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(38, 39); |
| - break; |
| - case 40: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 6) |
| - kind = 6; |
| - jjCheckNAddTwoStates(39, 40); |
| - break; |
| - case 41: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(41, 42); |
| - break; |
| - case 42: |
| - if (curChar == 95) |
| - jjCheckNAddTwoStates(43, 44); |
| - break; |
| - case 43: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(43, 44); |
| - break; |
| - case 45: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjstateSet[jjnewStateCnt++] = 45; |
| - break; |
| - case 46: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(46, 47); |
| - break; |
| - case 47: |
| - if (curChar == 95) |
| - jjCheckNAddTwoStates(48, 49); |
| - break; |
| - case 48: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(48, 49); |
| - break; |
| - case 50: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjAddStates(42, 43); |
| - break; |
| - case 51: |
| - if (curChar == 95) |
| - jjCheckNAdd(52); |
| - break; |
| - case 52: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(47, 52); |
| - break; |
| - case 53: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(53, 54); |
| - break; |
| - case 54: |
| - if (curChar == 95) |
| - jjCheckNAddTwoStates(55, 56); |
| - break; |
| - case 55: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(55, 56); |
| - break; |
| - case 57: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(57, 58); |
| - break; |
| - case 58: |
| - if (curChar == 95) |
| - jjCheckNAdd(59); |
| - break; |
| - case 59: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(59, 60); |
| - break; |
| - case 60: |
| - if (curChar == 95) |
| - jjCheckNAddTwoStates(61, 62); |
| - break; |
| - case 61: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(61, 62); |
| - break; |
| - case 63: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(58, 63); |
| - break; |
| - case 64: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddStates(30, 35); |
| - break; |
| - case 65: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(65, 66); |
| - break; |
| - case 67: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 2) |
| - kind = 2; |
| - jjCheckNAddTwoStates(66, 67); |
| - break; |
| - case 68: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(68, 69); |
| - break; |
| - case 70: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjAddStates(44, 45); |
| - break; |
| - case 72: |
| - if ((0x7fffffe07fffffeL & l) != 0L) |
| - jjCheckNAddTwoStates(72, 73); |
| - break; |
| - case 73: |
| - if (curChar == 64) |
| - jjCheckNAdd(74); |
| - break; |
| - case 74: |
| - if ((0x7fffffe07fffffeL & l) == 0L) |
| - break; |
| - if (kind > 4) |
| - kind = 4; |
| - jjCheckNAdd(74); |
| - break; |
| - default : break; |
| - } |
| - } while(i != startsAt); |
| - } |
| - else |
| - { |
| - int hiByte = (int)(curChar >> 8); |
| - int i1 = hiByte >> 6; |
| - long l1 = 1L << (hiByte & 077); |
| - int i2 = (curChar & 0xff) >> 6; |
| - long l2 = 1L << (curChar & 077); |
| - MatchLoop: do |
| - { |
| - switch(jjstateSet[--i]) |
| - { |
| - case 0: |
| - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) |
| - { |
| - if (kind > 12) |
| - kind = 12; |
| - } |
| - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) |
| - { |
| - if (kind > 13) |
| - kind = 13; |
| - } |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddStates(18, 23); |
| - if (jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddStates(12, 17); |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - { |
| - if (kind > 1) |
| - kind = 1; |
| - jjCheckNAddStates(0, 11); |
| - } |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddStates(30, 35); |
| - break; |
| - case 1: |
| - if (jjCanMove_1(hiByte, i1, i2, l1, l2) && kind > 13) |
| - kind = 13; |
| - break; |
| - case 2: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddStates(18, 23); |
| - break; |
| - case 3: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(3, 4); |
| - break; |
| - case 4: |
| - if (jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(5, 6); |
| - break; |
| - case 5: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(5, 6); |
| - break; |
| - case 7: |
| - if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjstateSet[jjnewStateCnt++] = 7; |
| - break; |
| - case 8: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(8, 9); |
| - break; |
| - case 9: |
| - if (jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(10, 11); |
| - break; |
| - case 10: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(10, 11); |
| - break; |
| - case 12: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjAddStates(46, 47); |
| - break; |
| - case 14: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjAddStates(48, 49); |
| - break; |
| - case 15: |
| - if (!jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(11, 16); |
| - break; |
| - case 16: |
| - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(11, 16); |
| - break; |
| - case 17: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(17, 18); |
| - break; |
| - case 18: |
| - if (jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(19, 20); |
| - break; |
| - case 19: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(19, 20); |
| - break; |
| - case 21: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(21, 22); |
| - break; |
| - case 23: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjAddStates(50, 51); |
| - break; |
| - case 24: |
| - if (jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(25, 26); |
| - break; |
| - case 25: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(25, 26); |
| - break; |
| - case 27: |
| - if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(22, 27); |
| - break; |
| - case 28: |
| - if (jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddStates(12, 17); |
| - break; |
| - case 29: |
| - if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 1) |
| - kind = 1; |
| - jjCheckNAddStates(0, 11); |
| - break; |
| - case 30: |
| - if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 1) |
| - kind = 1; |
| - jjCheckNAdd(30); |
| - break; |
| - case 31: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddStates(24, 26); |
| - break; |
| - case 33: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddStates(27, 29); |
| - break; |
| - case 35: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(35, 36); |
| - break; |
| - case 37: |
| - if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 5) |
| - kind = 5; |
| - jjCheckNAddTwoStates(36, 37); |
| - break; |
| - case 38: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(38, 39); |
| - break; |
| - case 40: |
| - if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 6) |
| - kind = 6; |
| - jjCheckNAddTwoStates(39, 40); |
| - break; |
| - case 41: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(41, 42); |
| - break; |
| - case 43: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjAddStates(52, 53); |
| - break; |
| - case 44: |
| - if (!jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAdd(45); |
| - break; |
| - case 45: |
| - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAdd(45); |
| - break; |
| - case 46: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(46, 47); |
| - break; |
| - case 48: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjAddStates(54, 55); |
| - break; |
| - case 49: |
| - if (jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(50, 51); |
| - break; |
| - case 50: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(50, 51); |
| - break; |
| - case 52: |
| - if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(47, 52); |
| - break; |
| - case 53: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(53, 54); |
| - break; |
| - case 55: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjAddStates(56, 57); |
| - break; |
| - case 56: |
| - if (jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(57, 58); |
| - break; |
| - case 57: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(57, 58); |
| - break; |
| - case 59: |
| - if (jjCanMove_4(hiByte, i1, i2, l1, l2)) |
| - jjAddStates(58, 59); |
| - break; |
| - case 61: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjAddStates(60, 61); |
| - break; |
| - case 62: |
| - if (!jjCanMove_3(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(58, 63); |
| - break; |
| - case 63: |
| - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 7) |
| - kind = 7; |
| - jjCheckNAddTwoStates(58, 63); |
| - break; |
| - case 64: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddStates(30, 35); |
| - break; |
| - case 65: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(65, 66); |
| - break; |
| - case 67: |
| - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 2) |
| - kind = 2; |
| - jjCheckNAddTwoStates(66, 67); |
| - break; |
| - case 68: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(68, 69); |
| - break; |
| - case 70: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjAddStates(44, 45); |
| - break; |
| - case 72: |
| - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - jjCheckNAddTwoStates(72, 73); |
| - break; |
| - case 74: |
| - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) |
| - break; |
| - if (kind > 4) |
| - kind = 4; |
| - jjstateSet[jjnewStateCnt++] = 74; |
| - break; |
| - default : break; |
| - } |
| - } while(i != startsAt); |
| - } |
| - if (kind != 0x7fffffff) |
| - { |
| - jjmatchedKind = kind; |
| - jjmatchedPos = curPos; |
| - kind = 0x7fffffff; |
| - } |
| - ++curPos; |
| - if ((i = jjnewStateCnt) == (startsAt = 75 - (jjnewStateCnt = startsAt))) |
| - return curPos; |
| - try { curChar = input_stream.readChar(); } |
| - catch(java.io.IOException e) { return curPos; } |
| - } |
| -} |
| -static final int[] jjnextStates = { |
| - 30, 31, 32, 34, 38, 39, 41, 42, 46, 47, 53, 54, 5, 6, 10, 11, |
| - 19, 20, 3, 4, 8, 9, 17, 18, 31, 32, 34, 32, 33, 34, 65, 66, |
| - 68, 69, 72, 73, 5, 6, 19, 20, 25, 26, 50, 51, 70, 71, 12, 13, |
| - 14, 15, 23, 24, 43, 44, 48, 49, 55, 56, 59, 60, 61, 62, |
| -}; |
| -private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) |
| -{ |
| - switch(hiByte) |
| - { |
| - case 48: |
| - return ((jjbitVec2[i2] & l2) != 0L); |
| - case 49: |
| - return ((jjbitVec3[i2] & l2) != 0L); |
| - case 51: |
| - return ((jjbitVec4[i2] & l2) != 0L); |
| - case 77: |
| - return ((jjbitVec5[i2] & l2) != 0L); |
| - case 255: |
| - return ((jjbitVec6[i2] & l2) != 0L); |
| - default : |
| - if ((jjbitVec0[i1] & l1) != 0L) |
| - return true; |
| - return false; |
| - } |
| -} |
| -private static final boolean jjCanMove_1(int hiByte, int i1, int i2, long l1, long l2) |
| -{ |
| - switch(hiByte) |
| - { |
| - case 215: |
| - return ((jjbitVec8[i2] & l2) != 0L); |
| - default : |
| - if ((jjbitVec7[i1] & l1) != 0L) |
| - return true; |
| - return false; |
| - } |
| -} |
| -private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, long l2) |
| -{ |
| - switch(hiByte) |
| - { |
| - case 0: |
| - return ((jjbitVec10[i2] & l2) != 0L); |
| - case 255: |
| - return ((jjbitVec11[i2] & l2) != 0L); |
| - default : |
| - if ((jjbitVec9[i1] & l1) != 0L) |
| - return true; |
| - return false; |
| - } |
| -} |
| -private static final boolean jjCanMove_3(int hiByte, int i1, int i2, long l1, long l2) |
| -{ |
| - switch(hiByte) |
| - { |
| - case 6: |
| - return ((jjbitVec14[i2] & l2) != 0L); |
| - case 11: |
| - return ((jjbitVec15[i2] & l2) != 0L); |
| - case 13: |
| - return ((jjbitVec16[i2] & l2) != 0L); |
| - case 14: |
| - return ((jjbitVec17[i2] & l2) != 0L); |
| - case 16: |
| - return ((jjbitVec18[i2] & l2) != 0L); |
| - default : |
| - if ((jjbitVec12[i1] & l1) != 0L) |
| - if ((jjbitVec13[i2] & l2) == 0L) |
| - return false; |
| - else |
| - return true; |
| - return false; |
| - } |
| -} |
| -private static final boolean jjCanMove_4(int hiByte, int i1, int i2, long l1, long l2) |
| -{ |
| - switch(hiByte) |
| - { |
| - case 0: |
| - return ((jjbitVec10[i2] & l2) != 0L); |
| - case 215: |
| - return ((jjbitVec8[i2] & l2) != 0L); |
| - case 255: |
| - return ((jjbitVec11[i2] & l2) != 0L); |
| - default : |
| - if ((jjbitVec19[i1] & l1) != 0L) |
| - return true; |
| - return false; |
| - } |
| -} |
| -public static final String[] jjstrLiteralImages = { |
| -"", null, null, null, null, null, null, null, null, null, null, null, null, |
| -null, null, null, }; |
| -public static final String[] lexStateNames = { |
| - "DEFAULT", |
| -}; |
| -static final long[] jjtoToken = { |
| - 0x30ffL, |
| -}; |
| -static final long[] jjtoSkip = { |
| - 0x8000L, |
| -}; |
| -protected CharStream input_stream; |
| -private final int[] jjrounds = new int[75]; |
| -private final int[] jjstateSet = new int[150]; |
| -protected char curChar; |
| -public StandardTokenizerTokenManager(CharStream stream) |
| -{ |
| - input_stream = stream; |
| -} |
| -public StandardTokenizerTokenManager(CharStream stream, int lexState) |
| -{ |
| - this(stream); |
| - SwitchTo(lexState); |
| -} |
| -public void ReInit(CharStream stream) |
| -{ |
| - jjmatchedPos = jjnewStateCnt = 0; |
| - curLexState = defaultLexState; |
| - input_stream = stream; |
| - ReInitRounds(); |
| -} |
| -private final void ReInitRounds() |
| -{ |
| - int i; |
| - jjround = 0x80000001; |
| - for (i = 75; i-- > 0;) |
| - jjrounds[i] = 0x80000000; |
| -} |
| -public void ReInit(CharStream stream, int lexState) |
| -{ |
| - ReInit(stream); |
| - SwitchTo(lexState); |
| -} |
| -public void SwitchTo(int lexState) |
| -{ |
| - if (lexState >= 1 || lexState < 0) |
| - throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); |
| - else |
| - curLexState = lexState; |
| -} |
| - |
| -protected Token jjFillToken() |
| -{ |
| - Token t = Token.newToken(jjmatchedKind); |
| - t.kind = jjmatchedKind; |
| - String im = jjstrLiteralImages[jjmatchedKind]; |
| - t.image = (im == null) ? input_stream.GetImage() : im; |
| - t.beginLine = input_stream.getBeginLine(); |
| - t.beginColumn = input_stream.getBeginColumn(); |
| - t.endLine = input_stream.getEndLine(); |
| - t.endColumn = input_stream.getEndColumn(); |
| - return t; |
| -} |
| - |
| -int curLexState = 0; |
| -int defaultLexState = 0; |
| -int jjnewStateCnt; |
| -int jjround; |
| -int jjmatchedPos; |
| -int jjmatchedKind; |
| - |
| -public Token getNextToken() |
| -{ |
| - int kind; |
| - Token specialToken = null; |
| - Token matchedToken; |
| - int curPos = 0; |
| - |
| - EOFLoop : |
| - for (;;) |
| - { |
| - try |
| - { |
| - curChar = input_stream.BeginToken(); |
| - } |
| - catch(java.io.IOException e) |
| - { |
| - jjmatchedKind = 0; |
| - matchedToken = jjFillToken(); |
| - return matchedToken; |
| - } |
| - |
| - jjmatchedKind = 0x7fffffff; |
| - jjmatchedPos = 0; |
| - curPos = jjMoveStringLiteralDfa0_0(); |
| - if (jjmatchedPos == 0 && jjmatchedKind > 15) |
| - { |
| - jjmatchedKind = 15; |
| - } |
| - if (jjmatchedKind != 0x7fffffff) |
| - { |
| - if (jjmatchedPos + 1 < curPos) |
| - input_stream.backup(curPos - jjmatchedPos - 1); |
| - if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) |
| - { |
| - matchedToken = jjFillToken(); |
| - return matchedToken; |
| - } |
| - else |
| - { |
| - continue EOFLoop; |
| - } |
| - } |
| - int error_line = input_stream.getEndLine(); |
| - int error_column = input_stream.getEndColumn(); |
| - String error_after = null; |
| - boolean EOFSeen = false; |
| - try { input_stream.readChar(); input_stream.backup(1); } |
| - catch (java.io.IOException e1) { |
| - EOFSeen = true; |
| - error_after = curPos <= 1 ? "" : input_stream.GetImage(); |
| - if (curChar == '\n' || curChar == '\r') { |
| - error_line++; |
| - error_column = 0; |
| - } |
| - else |
| - error_column++; |
| - } |
| - if (!EOFSeen) { |
| - input_stream.backup(1); |
| - error_after = curPos <= 1 ? "" : input_stream.GetImage(); |
| - } |
| - throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR); |
| - } |
| -} |
| - |
| -} |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java (working copy)
|
| @@ -1,44 +0,0 @@
|
| -/* Generated By:JavaCC: Do not edit this line. StandardTokenizerConstants.java */ |
| -package org.apache.lucene.analysis.standard; |
| - |
| -public interface StandardTokenizerConstants { |
| - |
| - int EOF = 0; |
| - int ALPHANUM = 1; |
| - int APOSTROPHE = 2; |
| - int ACRONYM = 3; |
| - int COMPANY = 4; |
| - int EMAIL = 5; |
| - int HOST = 6; |
| - int NUM = 7; |
| - int P = 8; |
| - int HAS_DIGIT = 9; |
| - int ALPHA = 10; |
| - int LETTER = 11; |
| - int CJ = 12; |
| - int KOREAN = 13; |
| - int DIGIT = 14; |
| - int NOISE = 15; |
| - |
| - int DEFAULT = 0; |
| - |
| - String[] tokenImage = { |
| - "<EOF>", |
| - "<ALPHANUM>", |
| - "<APOSTROPHE>", |
| - "<ACRONYM>", |
| - "<COMPANY>", |
| - "<EMAIL>", |
| - "<HOST>", |
| - "<NUM>", |
| - "<P>", |
| - "<HAS_DIGIT>", |
| - "<ALPHA>", |
| - "<LETTER>", |
| - "<CJ>", |
| - "<KOREAN>", |
| - "<DIGIT>", |
| - "<NOISE>", |
| - }; |
| - |
| -} |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/CharStream.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/CharStream.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/CharStream.java (working copy)
|
| @@ -1,110 +0,0 @@
|
| -/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */ |
| -package org.apache.lucene.analysis.standard; |
| - |
| -/** |
| - * This interface describes a character stream that maintains line and |
| - * column number positions of the characters. It also has the capability |
| - * to backup the stream to some extent. An implementation of this |
| - * interface is used in the TokenManager implementation generated by |
| - * JavaCCParser. |
| - * |
| - * All the methods except backup can be implemented in any fashion. backup |
| - * needs to be implemented correctly for the correct operation of the lexer. |
| - * Rest of the methods are all used to get information like line number, |
| - * column number and the String that constitutes a token and are not used |
| - * by the lexer. Hence their implementation won't affect the generated lexer's |
| - * operation. |
| - */ |
| - |
| -public interface CharStream { |
| - |
| - /** |
| - * Returns the next character from the selected input. The method |
| - * of selecting the input is the responsibility of the class |
| - * implementing this interface. Can throw any java.io.IOException. |
| - */ |
| - char readChar() throws java.io.IOException; |
| - |
| - /** |
| - * Returns the column position of the character last read. |
| - * @deprecated |
| - * @see #getEndColumn |
| - */ |
| - int getColumn(); |
| - |
| - /** |
| - * Returns the line number of the character last read. |
| - * @deprecated |
| - * @see #getEndLine |
| - */ |
| - int getLine(); |
| - |
| - /** |
| - * Returns the column number of the last character for current token (being |
| - * matched after the last call to BeginTOken). |
| - */ |
| - int getEndColumn(); |
| - |
| - /** |
| - * Returns the line number of the last character for current token (being |
| - * matched after the last call to BeginTOken). |
| - */ |
| - int getEndLine(); |
| - |
| - /** |
| - * Returns the column number of the first character for current token (being |
| - * matched after the last call to BeginTOken). |
| - */ |
| - int getBeginColumn(); |
| - |
| - /** |
| - * Returns the line number of the first character for current token (being |
| - * matched after the last call to BeginTOken). |
| - */ |
| - int getBeginLine(); |
| - |
| - /** |
| - * Backs up the input stream by amount steps. Lexer calls this method if it |
| - * had already read some characters, but could not use them to match a |
| - * (longer) token. So, they will be used again as the prefix of the next |
| - * token and it is the implemetation's responsibility to do this right. |
| - */ |
| - void backup(int amount); |
| - |
| - /** |
| - * Returns the next character that marks the beginning of the next token. |
| - * All characters must remain in the buffer between two successive calls |
| - * to this method to implement backup correctly. |
| - */ |
| - char BeginToken() throws java.io.IOException; |
| - |
| - /** |
| - * Returns a string made up of characters from the marked token beginning |
| - * to the current buffer position. Implementations have the choice of returning |
| - * anything that they want to. For example, for efficiency, one might decide |
| - * to just return null, which is a valid implementation. |
| - */ |
| - String GetImage(); |
| - |
| - /** |
| - * Returns an array of characters that make up the suffix of length 'len' for |
| - * the currently matched token. This is used to build up the matched string |
| - * for use in actions in the case of MORE. A simple and inefficient |
| - * implementation of this is as follows : |
| - * |
| - * { |
| - * String t = GetImage(); |
| - * return t.substring(t.length() - len, t.length()).toCharArray(); |
| - * } |
| - */ |
| - char[] GetSuffix(int len); |
| - |
| - /** |
| - * The lexer calls this function to indicate that it is done with the stream |
| - * and hence implementations can free any resources held by this class. |
| - * Again, the body of this function can be just empty and it will not |
| - * affect the lexer's operation. |
| - */ |
| - void Done(); |
| - |
| -} |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/ParseException.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/ParseException.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/ParseException.java (working copy)
|
| @@ -1,194 +0,0 @@
|
| -/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 0.7pre6 */ |
| -package org.apache.lucene.analysis.standard; |
| - |
| -/* Note: This file was also manually modified. |
| - * Regenerating it by JavaCC might undo these changes!. */ |
| - |
| -/** |
| - * This exception is thrown when parse errors are encountered. |
| - * You can explicitly create objects of this exception type by |
| - * calling the method generateParseException in the generated |
| - * parser. |
| - * |
| - * You can modify this class to customize your error reporting |
| - * mechanisms so long as you retain the public fields. |
| - */ |
| -public class ParseException extends java.io.IOException { |
| - |
| - /** |
| - * This constructor is used by the method "generateParseException" |
| - * in the generated parser. Calling this constructor generates |
| - * a new object of this type with the fields "currentToken", |
| - * "expectedTokenSequences", and "tokenImage" set. The boolean |
| - * flag "specialConstructor" is also set to true to indicate that |
| - * this constructor was used to create this object. |
| - * This constructor calls its super class with the empty string |
| - * to force the "toString" method of parent class "Throwable" to |
| - * print the error message in the form: |
| - * ParseException: <result of getMessage> |
| - */ |
| - public ParseException(Token currentTokenVal, |
| - int[][] expectedTokenSequencesVal, |
| - String[] tokenImageVal |
| - ) |
| - { |
| - super(""); |
| - specialConstructor = true; |
| - currentToken = currentTokenVal; |
| - expectedTokenSequences = expectedTokenSequencesVal; |
| - tokenImage = tokenImageVal; |
| - } |
| - |
| - /** |
| - * The following constructors are for use by you for whatever |
| - * purpose you can think of. Constructing the exception in this |
| - * manner makes the exception behave in the normal way - i.e., as |
| - * documented in the class "Throwable". The fields "errorToken", |
| - * "expectedTokenSequences", and "tokenImage" do not contain |
| - * relevant information. The JavaCC generated code does not use |
| - * these constructors. |
| - */ |
| - |
| - public ParseException() { |
| - super(); |
| - specialConstructor = false; |
| - } |
| - |
| - public ParseException(String message) { |
| - super(message); |
| - specialConstructor = false; |
| - } |
| - |
| - /** |
| - * This variable determines which constructor was used to create |
| - * this object and thereby affects the semantics of the |
| - * "getMessage" method (see below). |
| - */ |
| - protected boolean specialConstructor; |
| - |
| - /** |
| - * This is the last token that has been consumed successfully. If |
| - * this object has been created due to a parse error, the token |
| - * followng this token will (therefore) be the first error token. |
| - */ |
| - public Token currentToken; |
| - |
| - /** |
| - * Each entry in this array is an array of integers. Each array |
| - * of integers represents a sequence of tokens (by their ordinal |
| - * values) that is expected at this point of the parse. |
| - */ |
| - public int[][] expectedTokenSequences; |
| - |
| - /** |
| - * This is a reference to the "tokenImage" array of the generated |
| - * parser within which the parse error occurred. This array is |
| - * defined in the generated ...Constants interface. |
| - */ |
| - public String[] tokenImage; |
| - |
| - /** |
| - * This method has the standard behavior when this object has been |
| - * created using the standard constructors. Otherwise, it uses |
| - * "currentToken" and "expectedTokenSequences" to generate a parse |
| - * error message and returns it. If this object has been created |
| - * due to a parse error, and you do not catch it (it gets thrown |
| - * from the parser), then this method is called during the printing |
| - * of the final stack trace, and hence the correct error message |
| - * gets displayed. |
| - */ |
| - public String getMessage() { |
| - if (!specialConstructor) { |
| - return super.getMessage(); |
| - } |
| - String expected = ""; |
| - int maxSize = 0; |
| - for (int i = 0; i < expectedTokenSequences.length; i++) { |
| - if (maxSize < expectedTokenSequences[i].length) { |
| - maxSize = expectedTokenSequences[i].length; |
| - } |
| - for (int j = 0; j < expectedTokenSequences[i].length; j++) { |
| - expected += tokenImage[expectedTokenSequences[i][j]] + " "; |
| - } |
| - if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) { |
| - expected += "..."; |
| - } |
| - expected += eol + " "; |
| - } |
| - String retval = "Encountered \""; |
| - Token tok = currentToken.next; |
| - for (int i = 0; i < maxSize; i++) { |
| - if (i != 0) retval += " "; |
| - if (tok.kind == 0) { |
| - retval += tokenImage[0]; |
| - break; |
| - } |
| - retval += add_escapes(tok.image); |
| - tok = tok.next; |
| - } |
| - retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn + "." + eol; |
| - if (expectedTokenSequences.length == 1) { |
| - retval += "Was expecting:" + eol + " "; |
| - } else { |
| - retval += "Was expecting one of:" + eol + " "; |
| - } |
| - retval += expected; |
| - return retval; |
| - } |
| - |
| - /** |
| - * The end of line string for this machine. |
| - */ |
| - protected String eol = System.getProperty("line.separator", "\n"); |
| - |
| - /** |
| - * Used to convert raw characters to their escaped version |
| - * when these raw version cannot be used as part of an ASCII |
| - * string literal. |
| - */ |
| - protected String add_escapes(String str) { |
| - StringBuffer retval = new StringBuffer(); |
| - char ch; |
| - for (int i = 0; i < str.length(); i++) { |
| - switch (str.charAt(i)) |
| - { |
| - case 0 : |
| - continue; |
| - case '\b': |
| - retval.append("\\b"); |
| - continue; |
| - case '\t': |
| - retval.append("\\t"); |
| - continue; |
| - case '\n': |
| - retval.append("\\n"); |
| - continue; |
| - case '\f': |
| - retval.append("\\f"); |
| - continue; |
| - case '\r': |
| - retval.append("\\r"); |
| - continue; |
| - case '\"': |
| - retval.append("\\\""); |
| - continue; |
| - case '\'': |
| - retval.append("\\\'"); |
| - continue; |
| - case '\\': |
| - retval.append("\\\\"); |
| - continue; |
| - default: |
| - if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { |
| - String s = "0000" + Integer.toString(ch, 16); |
| - retval.append("\\u" + s.substring(s.length() - 4, s.length())); |
| - } else { |
| - retval.append(ch); |
| - } |
| - continue; |
| - } |
| - } |
| - return retval.toString(); |
| - } |
| - |
| -} |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (working copy)
|
| @@ -1,206 +1,68 @@
|
| -/* Generated By:JavaCC: Do not edit this line. StandardTokenizer.java */ |
| -package org.apache.lucene.analysis.standard; |
| - |
| -import java.io.*; |
| - |
| -/** A grammar-based tokenizer constructed with JavaCC. |
| +/* |
| + * Carrot2 project. |
| * |
| - * <p> This should be a good tokenizer for most European-language documents: |
| + * Copyright (C) 2002-2007, Dawid Weiss, Stanisław Osiński. |
| + * Portions (C) Contributors listed in "carrot2.CONTRIBUTORS" file. |
| + * All rights reserved. |
| * |
| - * <ul> |
| - * <li>Splits words at punctuation characters, removing punctuation. However, a |
| - * dot that's not followed by whitespace is considered part of a token. |
| - * <li>Splits words at hyphens, unless there's a number in the token, in which case |
| - * the whole token is interpreted as a product number and is not split. |
| - * <li>Recognizes email addresses and internet hostnames as one token. |
| - * </ul> |
| - * |
| - * <p>Many applications have specific tokenizer needs. If this tokenizer does |
| - * not suit your application, please consider copying this source code |
| - * directory to your project and maintaining your own grammar-based tokenizer. |
| + * Refer to the full license file "carrot2.LICENSE" |
| + * in the root folder of the repository checkout or at: |
| + * http://www.carrot2.org/carrot2.LICENSE |
| */ |
| -public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer implements StandardTokenizerConstants { |
| |
| - /** Constructs a tokenizer for this Reader. */ |
| - public StandardTokenizer(Reader reader) { |
| - this(new FastCharStream(reader)); |
| - this.input = reader; |
| - } |
| +package org.apache.lucene.analysis.standard; |
| |
| -/** Returns the next token in the stream, or null at EOS. |
| - * <p>The returned token's type is set to an element of {@link |
| - * StandardTokenizerConstants#tokenImage}. |
| - */ |
| - final public org.apache.lucene.analysis.Token next() throws ParseException, IOException { |
| - Token token = null; |
| - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { |
| - case ALPHANUM: |
| - token = jj_consume_token(ALPHANUM); |
| - break; |
| - case APOSTROPHE: |
| - token = jj_consume_token(APOSTROPHE); |
| - break; |
| - case ACRONYM: |
| - token = jj_consume_token(ACRONYM); |
| - break; |
| - case COMPANY: |
| - token = jj_consume_token(COMPANY); |
| - break; |
| - case EMAIL: |
| - token = jj_consume_token(EMAIL); |
| - break; |
| - case HOST: |
| - token = jj_consume_token(HOST); |
| - break; |
| - case NUM: |
| - token = jj_consume_token(NUM); |
| - break; |
| - case CJ: |
| - token = jj_consume_token(CJ); |
| - break; |
| - case 0: |
| - token = jj_consume_token(0); |
| - break; |
| - default: |
| - jj_la1[0] = jj_gen; |
| - jj_consume_token(-1); |
| - throw new ParseException(); |
| - } |
| - if (token.kind == EOF) { |
| - {if (true) return null;} |
| - } else { |
| - {if (true) return |
| - new org.apache.lucene.analysis.Token(token.image, |
| - token.beginColumn,token.endColumn, |
| - tokenImage[token.kind]);} |
| - } |
| - throw new Error("Missing return statement in function"); |
| - } |
| +import java.io.IOException; |
| +import java.io.Reader; |
| |
| - public StandardTokenizerTokenManager token_source; |
| - public Token token, jj_nt; |
| - private int jj_ntk; |
| - private int jj_gen; |
| - final private int[] jj_la1 = new int[1]; |
| - static private int[] jj_la1_0; |
| - static { |
| - jj_la1_0(); |
| - } |
| - private static void jj_la1_0() { |
| - jj_la1_0 = new int[] {0x10ff,}; |
| - } |
| +import org.apache.lucene.analysis.Token; |
| +import org.apache.lucene.analysis.Tokenizer; |
| |
| - public StandardTokenizer(CharStream stream) { |
| - token_source = new StandardTokenizerTokenManager(stream); |
| - token = new Token(); |
| - jj_ntk = -1; |
| - jj_gen = 0; |
| - for (int i = 0; i < 1; i++) jj_la1[i] = -1; |
| - } |
| +/** |
| + * An interface to the tokenizer constructed with JFlex. |
| + * |
| + * @author Stanislaw Osinski |
| + */ |
| +public class StandardTokenizer extends Tokenizer { |
| + /** A private instance of the JFlex-constructed scanner */ |
| + private final StandardTokenizerImpl scanner; |
| |
| - public void ReInit(CharStream stream) { |
| - token_source.ReInit(stream); |
| - token = new Token(); |
| - jj_ntk = -1; |
| - jj_gen = 0; |
| - for (int i = 0; i < 1; i++) jj_la1[i] = -1; |
| - } |
| - |
| - public StandardTokenizer(StandardTokenizerTokenManager tm) { |
| - token_source = tm; |
| - token = new Token(); |
| - jj_ntk = -1; |
| - jj_gen = 0; |
| - for (int i = 0; i < 1; i++) jj_la1[i] = -1; |
| - } |
| - |
| - public void ReInit(StandardTokenizerTokenManager tm) { |
| - token_source = tm; |
| - token = new Token(); |
| - jj_ntk = -1; |
| - jj_gen = 0; |
| - for (int i = 0; i < 1; i++) jj_la1[i] = -1; |
| - } |
| - |
| - final private Token jj_consume_token(int kind) throws ParseException { |
| - Token oldToken; |
| - if ((oldToken = token).next != null) token = token.next; |
| - else token = token.next = token_source.getNextToken(); |
| - jj_ntk = -1; |
| - if (token.kind == kind) { |
| - jj_gen++; |
| - return token; |
| + /** |
| + * Creates a new instance of the {@link StandardTokenizer}. Attaches the |
| + * <code>input</code> to a newly created JFlex scanner. |
| + */ |
| + public StandardTokenizer(Reader input) { |
| + this.input = input; |
| + this.scanner = new StandardTokenizerImpl(input); |
| } |
| - token = oldToken; |
| - jj_kind = kind; |
| - throw generateParseException(); |
| - } |
| |
| - final public Token getNextToken() { |
| - if (token.next != null) token = token.next; |
| - else token = token.next = token_source.getNextToken(); |
| - jj_ntk = -1; |
| - jj_gen++; |
| - return token; |
| - } |
| + /* |
| + * (non-Javadoc) |
| + * |
| + * @see org.apache.lucene.analysis.TokenStream#next() |
| + */ |
| + public Token next() throws IOException { |
| + int tokenType = scanner.getNextToken(); |
| |
| - final public Token getToken(int index) { |
| - Token t = token; |
| - for (int i = 0; i < index; i++) { |
| - if (t.next != null) t = t.next; |
| - else t = t.next = token_source.getNextToken(); |
| - } |
| - return t; |
| - } |
| + if (tokenType == StandardTokenizerImpl.YYEOF) { |
| + return null; |
| + } |
| |
| - final private int jj_ntk() { |
| - if ((jj_nt=token.next) == null) |
| - return (jj_ntk = (token.next=token_source.getNextToken()).kind); |
| - else |
| - return (jj_ntk = jj_nt.kind); |
| - } |
| + int startPosition = scanner.yychar(); |
| |
| - private java.util.Vector jj_expentries = new java.util.Vector(); |
| - private int[] jj_expentry; |
| - private int jj_kind = -1; |
| - |
| - public ParseException generateParseException() { |
| - jj_expentries.removeAllElements(); |
| - boolean[] la1tokens = new boolean[16]; |
| - for (int i = 0; i < 16; i++) { |
| - la1tokens[i] = false; |
| + final String tokenImage = scanner.yytext(); |
| + return new Token(tokenImage, startPosition, startPosition |
| + + tokenImage.length(), |
| + StandardTokenizerImpl.TOKEN_TYPES[tokenType]); |
| } |
| - if (jj_kind >= 0) { |
| - la1tokens[jj_kind] = true; |
| - jj_kind = -1; |
| - } |
| - for (int i = 0; i < 1; i++) { |
| - if (jj_la1[i] == jj_gen) { |
| - for (int j = 0; j < 32; j++) { |
| - if ((jj_la1_0[i] & (1<<j)) != 0) { |
| - la1tokens[j] = true; |
| - } |
| - } |
| - } |
| - } |
| - for (int i = 0; i < 16; i++) { |
| - if (la1tokens[i]) { |
| - jj_expentry = new int[1]; |
| - jj_expentry[0] = i; |
| - jj_expentries.addElement(jj_expentry); |
| - } |
| - } |
| - int[][] exptokseq = new int[jj_expentries.size()][]; |
| - for (int i = 0; i < jj_expentries.size(); i++) { |
| - exptokseq[i] = (int[])jj_expentries.elementAt(i); |
| - } |
| - return new ParseException(token, exptokseq, tokenImage); |
| - } |
| |
| - final public void enable_tracing() { |
| - } |
| - |
| - final public void disable_tracing() { |
| - } |
| - |
| + /* |
| + * (non-Javadoc) |
| + * |
| + * @see org.apache.lucene.analysis.TokenStream#reset() |
| + */ |
| + public void reset() throws IOException { |
| + super.reset(); |
| + scanner.yyreset(input); |
| + } |
| } |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj (working copy)
|
| @@ -1,196 +0,0 @@
|
| -/**f |
| - * Licensed to the Apache Software Foundation (ASF) under one or more |
| - * contributor license agreements. See the NOTICE file distributed with |
| - * this work for additional information regarding copyright ownership. |
| - * The ASF licenses this file to You under the Apache License, Version 2.0 |
| - * (the "License"); you may not use this file except in compliance with |
| - * the License. You may obtain a copy of the License at |
| - * |
| - * http://www.apache.org/licenses/LICENSE-2.0 |
| - * |
| - * Unless required by applicable law or agreed to in writing, software |
| - * distributed under the License is distributed on an "AS IS" BASIS, |
| - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| - * See the License for the specific language governing permissions and |
| - * limitations under the License. |
| - */ |
| - |
| -options { |
| - STATIC = false; |
| -//IGNORE_CASE = true; |
| -//BUILD_PARSER = false; |
| - UNICODE_INPUT = true; |
| - USER_CHAR_STREAM = true; |
| - OPTIMIZE_TOKEN_MANAGER = true; |
| -//DEBUG_TOKEN_MANAGER = true; |
| -} |
| -PARSER_BEGIN(StandardTokenizer) |
| - |
| -package org.apache.lucene.analysis.standard; |
| - |
| -import java.io.*; |
| - |
| -/** A grammar-based tokenizer constructed with JavaCC. |
| - * |
| - * <p> This should be a good tokenizer for most European-language documents: |
| - * |
| - * <ul> |
| - * <li>Splits words at punctuation characters, removing punctuation. However, a |
| - * dot that's not followed by whitespace is considered part of a token. |
| - * <li>Splits words at hyphens, unless there's a number in the token, in which case |
| - * the whole token is interpreted as a product number and is not split. |
| - * <li>Recognizes email addresses and internet hostnames as one token. |
| - * </ul> |
| - * |
| - * <p>Many applications have specific tokenizer needs. If this tokenizer does |
| - * not suit your application, please consider copying this source code |
| - * directory to your project and maintaining your own grammar-based tokenizer. |
| - */ |
| -public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer { |
| - |
| - /** Constructs a tokenizer for this Reader. */ |
| - public StandardTokenizer(Reader reader) { |
| - this(new FastCharStream(reader)); |
| - this.input = reader; |
| - } |
| -} |
| - |
| -PARSER_END(StandardTokenizer) |
| - |
| -TOKEN : { // token patterns |
| - |
| - // basic word: a sequence of digits & letters |
| - <ALPHANUM: (<LETTER>|<DIGIT>|<KOREAN>)+ > |
| - |
| - // internal apostrophes: O'Reilly, you're, O'Reilly's |
| - // use a post-filter to remove possesives |
| -| <APOSTROPHE: <ALPHA> ("'" <ALPHA>)+ > |
| - |
| - // acronyms: U.S.A., I.B.M., etc. |
| - // use a post-filter to remove dots |
| -| <ACRONYM: <ALPHA> "." (<ALPHA> ".")+ > |
| - |
| - // company names like AT&T and Excite@Home. |
| -| <COMPANY: <ALPHA> ("&"|"@") <ALPHA> > |
| - |
| - // email addresses |
| -| <EMAIL: <ALPHANUM> (("."|"-"|"_") <ALPHANUM>)* "@" <ALPHANUM> (("."|"-") <ALPHANUM>)+ > |
| - |
| - // hostname |
| -| <HOST: <ALPHANUM> ("." <ALPHANUM>)+ > |
| - |
| - // floating point, serial, model numbers, ip addresses, etc. |
| - // every other segment must have at least one digit |
| -| <NUM: (<ALPHANUM> <P> <HAS_DIGIT> |
| - | <HAS_DIGIT> <P> <ALPHANUM> |
| - | <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+ |
| - | <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+ |
| - | <ALPHANUM> <P> <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+ |
| - | <HAS_DIGIT> <P> <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+ |
| - ) |
| - > |
| -| <#P: ("_"|"-"|"/"|"."|",") > |
| -| <#HAS_DIGIT: // at least one digit |
| - (<LETTER>|<DIGIT>)* |
| - <DIGIT> |
| - (<LETTER>|<DIGIT>)* |
| - > |
| - |
| -| < #ALPHA: (<LETTER>)+> |
| -| < #LETTER: // unicode letters |
| - [ |
| - "\u0041"-"\u005a", |
| - "\u0061"-"\u007a", |
| - "\u00c0"-"\u00d6", |
| - "\u00d8"-"\u00f6", |
| - "\u00f8"-"\u00ff", |
| - "\u0100"-"\u1fff", |
| - "\uffa0"-"\uffdc" |
| - ] |
| - > |
| -| < CJ: // Chinese, Japanese |
| - [ |
| - "\u3040"-"\u318f", |
| - "\u3100"-"\u312f", // BaPoMoFo (aka ZhuYin) |
| - "\u3040"-"\u309F", // Japanese: Hiragana |
| - "\u30A0"-"\u30FF", // Japanese: Katakana |
| - "\u31F0"-"\u31FF", // Japanese: Katakana Phonetic Extensions |
| - "\u3300"-"\u337f", |
| - "\u3400"-"\u4dbf", // CJK Unified Ideographs Ext. A |
| - "\u4e00"-"\u9fff", |
| - "\uf900"-"\ufaff", |
| - "\uff65"-"\uff9f" |
| - |
| -// Otis: consider adding these, too |
| -// |
| -// 2E80-2EFF: CJK Radicals Supplement |
| -// 2F00-2FDF: Kangxi Radicals |
| -// 3190-319F: Kanbun |
| -// 31C0-31EF: CJK Strokes |
| -// 4E00-9FBF: CJK Unified |
| -// F900-FAFF: CJK Compatibility Ideographs |
| - |
| - ] |
| - > |
| -| < KOREAN: // Korean |
| - [ |
| - "\uac00"-"\ud7af", // Hangul Syllables |
| - "\u1100"-"\u11ff" // Hangul Jamo |
| - // "\uac00"-"\ud7a3" |
| - ] |
| - > |
| -| < #DIGIT: // unicode digits |
| - [ |
| - "\u0030"-"\u0039", |
| - "\u0660"-"\u0669", |
| - "\u06f0"-"\u06f9", |
| - "\u0966"-"\u096f", |
| - "\u09e6"-"\u09ef", |
| - "\u0a66"-"\u0a6f", |
| - "\u0ae6"-"\u0aef", |
| - "\u0b66"-"\u0b6f", |
| - "\u0be7"-"\u0bef", |
| - "\u0c66"-"\u0c6f", |
| - "\u0ce6"-"\u0cef", |
| - "\u0d66"-"\u0d6f", |
| - "\u0e50"-"\u0e59", |
| - "\u0ed0"-"\u0ed9", |
| - "\u1040"-"\u1049" |
| - ] |
| - > |
| -} |
| - |
| -SKIP : { // skip unrecognized chars |
| - <NOISE: ~[] > |
| -} |
| - |
| -/** Returns the next token in the stream, or null at EOS. |
| - * <p>The returned token's type is set to an element of {@link |
| - * StandardTokenizerConstants#tokenImage}. |
| - */ |
| -org.apache.lucene.analysis.Token next() throws IOException : |
| -{ |
| - Token token = null; |
| -} |
| -{ |
| - ( token = <ALPHANUM> | |
| - token = <APOSTROPHE> | |
| - token = <ACRONYM> | |
| - token = <COMPANY> | |
| - token = <EMAIL> | |
| - token = <HOST> | |
| - token = <NUM> | |
| - token = <CJ> | |
| - token = <EOF> |
| - ) |
| - { |
| - if (token.kind == EOF) { |
| - return null; |
| - } else { |
| - return |
| - new org.apache.lucene.analysis.Token(token.image, |
| - token.beginColumn,token.endColumn, |
| - tokenImage[token.kind]); |
| - } |
| - } |
| -} |
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (revision 0)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (revision 0)
|
| @@ -0,0 +1,606 @@
|
| +/* The following code was generated by JFlex 1.4.1 on 07-08-01 09:29 */
|
| +
|
| +package org.apache.lucene.analysis.standard;
|
| +
|
| +/**
|
| + * This class is a scanner generated by
|
| + * <a href="http://www.jflex.de/">JFlex</a> 1.4.1
|
| + * on 07-08-01 09:29 from the specification file
|
| + * <tt>E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
|
| + */
|
| +class StandardTokenizerImpl {
|
| +
|
| + /** This character denotes the end of file */
|
| + public static final int YYEOF = -1;
|
| +
|
| + /** initial size of the lookahead buffer */
|
| + private static final int ZZ_BUFFERSIZE = 16384;
|
| +
|
| + /** lexical states */
|
| + public static final int YYINITIAL = 0;
|
| +
|
| + /**
|
| + * Translates characters to character classes
|
| + */
|
| + private static final String ZZ_CMAP_PACKED =
|
| + "\11\0\1\0\1\16\1\0\1\0\1\15\22\0\1\0\5\0\1\3"+
|
| + "\1\1\4\0\1\7\1\5\1\2\1\7\12\11\6\0\1\4\32\10"+
|
| + "\4\0\1\6\1\0\32\10\105\0\27\10\1\0\37\10\1\0\u0568\10"+
|
| + "\12\12\206\10\12\12\u026c\10\12\12\166\10\12\12\166\10\12\12\166\10"+
|
| + "\12\12\166\10\12\12\167\10\11\12\166\10\12\12\166\10\12\12\166\10"+
|
| + "\12\12\340\10\12\12\166\10\12\12\u0166\10\12\12\266\10\u0100\10\u0e00\10"+
|
| + "\u1040\0\u0150\14\140\0\20\14\u0100\0\200\14\200\0\u19c0\14\100\0\u5200\14"+
|
| + "\u0c00\0\u2bb0\13\u2150\0\u0200\14\u0465\0\73\14\75\10\43\0";
|
| +
|
| + /**
|
| + * Translates characters to character classes
|
| + */
|
| + private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
|
| +
|
| + /**
|
| + * Translates DFA states to action switch labels.
|
| + */
|
| + private static final int [] ZZ_ACTION = zzUnpackAction();
|
| +
|
| + private static final String ZZ_ACTION_PACKED_0 =
|
| + "\1\0\2\1\4\2\1\3\1\1\15\0\1\4\1\5"+
|
| + "\2\6\2\7\2\0\1\6\2\4\1\6\1\10\1\0"+
|
| + "\1\10\1\11";
|
| +
|
| + private static int [] zzUnpackAction() {
|
| + int [] result = new int[38];
|
| + int offset = 0;
|
| + offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
|
| + return result;
|
| + }
|
| +
|
| + private static int zzUnpackAction(String packed, int offset, int [] result) {
|
| + int i = 0; /* index in packed string */
|
| + int j = offset; /* index in unpacked array */
|
| + int l = packed.length();
|
| + while (i < l) {
|
| + int count = packed.charAt(i++);
|
| + int value = packed.charAt(i++);
|
| + do result[j++] = value; while (--count > 0);
|
| + }
|
| + return j;
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Translates a state to a row index in the transition table
|
| + */
|
| + private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
|
| +
|
| + private static final String ZZ_ROWMAP_PACKED_0 =
|
| + "\0\0\0\17\0\36\0\55\0\74\0\113\0\132\0\17"+
|
| + "\0\151\0\170\0\207\0\226\0\245\0\264\0\303\0\322"+
|
| + "\0\341\0\360\0\377\0\u010e\0\u011d\0\36\0\u012c\0\u013b"+
|
| + "\0\u014a\0\u0159\0\245\0\u0168\0\u0177\0\u0186\0\u0195\0\u01a4"+
|
| + "\0\u01b3\0\u01c2\0\226\0\u01d1\0\u010e\0\u01e0";
|
| +
|
| + private static int [] zzUnpackRowMap() {
|
| + int [] result = new int[38];
|
| + int offset = 0;
|
| + offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
|
| + return result;
|
| + }
|
| +
|
| + private static int zzUnpackRowMap(String packed, int offset, int [] result) {
|
| + int i = 0; /* index in packed string */
|
| + int j = offset; /* index in unpacked array */
|
| + int l = packed.length();
|
| + while (i < l) {
|
| + int high = packed.charAt(i++) << 16;
|
| + result[j++] = high | packed.charAt(i++);
|
| + }
|
| + return j;
|
| + }
|
| +
|
| + /**
|
| + * The transition table of the DFA
|
| + */
|
| + private static final int [] ZZ_TRANS = zzUnpackTrans();
|
| +
|
| + private static final String ZZ_TRANS_PACKED_0 =
|
| + "\2\2\1\3\2\2\3\3\1\4\1\5\1\6\1\7"+
|
| + "\1\10\1\11\1\2\27\0\4\12\4\0\1\13\1\14"+
|
| + "\1\15\1\16\2\17\1\0\1\4\1\5\1\6\1\7"+
|
| + "\5\0\1\20\1\0\1\21\2\22\1\23\3\5\1\7"+
|
| + "\4\0\1\13\1\24\1\15\1\16\2\22\1\23\1\6"+
|
| + "\1\5\1\6\1\7\5\0\1\25\1\0\1\21\2\17"+
|
| + "\1\0\4\7\21\0\1\2\2\0\1\26\2\0\3\26"+
|
| + "\1\12\2\27\1\12\13\0\1\30\1\0\1\30\14\0"+
|
| + "\1\31\1\32\1\31\1\32\13\0\1\33\1\0\1\33"+
|
| + "\14\0\1\34\1\35\1\34\1\35\13\0\4\36\13\0"+
|
| + "\4\37\13\0\4\35\13\0\4\40\13\0\4\41\13\0"+
|
| + "\1\42\1\37\1\42\1\37\13\0\4\32\5\0\1\23"+
|
| + "\2\0\3\23\3\27\1\12\4\0\1\13\6\0\1\30"+
|
| + "\1\0\1\30\6\0\1\43\1\0\1\21\2\17\1\0"+
|
| + "\1\31\1\32\1\31\1\32\5\0\1\25\1\0\1\21"+
|
| + "\2\17\1\0\4\32\5\0\1\44\2\0\1\44\2\0"+
|
| + "\1\34\1\35\1\34\1\35\5\0\1\44\2\0\1\44"+
|
| + "\2\0\4\35\5\0\1\17\1\0\1\21\2\17\1\0"+
|
| + "\4\36\5\0\1\20\1\0\1\21\2\22\1\23\4\37"+
|
| + "\5\0\1\22\1\0\1\21\2\22\1\23\4\40\5\0"+
|
| + "\1\23\2\0\3\23\4\41\5\0\1\45\1\0\1\21"+
|
| + "\2\22\1\23\1\42\1\37\1\42\1\37\13\0\4\46"+
|
| + "\5\0\1\44\2\0\1\44\2\0\4\46\3\0";
|
| +
|
| + private static int [] zzUnpackTrans() {
|
| + int [] result = new int[495];
|
| + int offset = 0;
|
| + offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
|
| + return result;
|
| + }
|
| +
|
| + private static int zzUnpackTrans(String packed, int offset, int [] result) {
|
| + int i = 0; /* index in packed string */
|
| + int j = offset; /* index in unpacked array */
|
| + int l = packed.length();
|
| + while (i < l) {
|
| + int count = packed.charAt(i++);
|
| + int value = packed.charAt(i++);
|
| + value--;
|
| + do result[j++] = value; while (--count > 0);
|
| + }
|
| + return j;
|
| + }
|
| +
|
| +
|
| + /* error codes */
|
| + private static final int ZZ_UNKNOWN_ERROR = 0;
|
| + private static final int ZZ_NO_MATCH = 1;
|
| + private static final int ZZ_PUSHBACK_2BIG = 2;
|
| +
|
| + /* error messages for the codes above */
|
| + private static final String ZZ_ERROR_MSG[] = {
|
| + "Unkown internal scanner error",
|
| + "Error: could not match input",
|
| + "Error: pushback value was too large"
|
| + };
|
| +
|
| + /**
|
| + * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
|
| + */
|
| + private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
|
| +
|
| + private static final String ZZ_ATTRIBUTE_PACKED_0 =
|
| + "\1\0\1\11\5\1\1\11\1\1\15\0\6\1\2\0"+
|
| + "\5\1\1\0\2\1";
|
| +
|
| + private static int [] zzUnpackAttribute() {
|
| + int [] result = new int[38];
|
| + int offset = 0;
|
| + offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
|
| + return result;
|
| + }
|
| +
|
| + private static int zzUnpackAttribute(String packed, int offset, int [] result) {
|
| + int i = 0; /* index in packed string */
|
| + int j = offset; /* index in unpacked array */
|
| + int l = packed.length();
|
| + while (i < l) {
|
| + int count = packed.charAt(i++);
|
| + int value = packed.charAt(i++);
|
| + do result[j++] = value; while (--count > 0);
|
| + }
|
| + return j;
|
| + }
|
| +
|
| + /** the input device */
|
| + private java.io.Reader zzReader;
|
| +
|
| + /** the current state of the DFA */
|
| + private int zzState;
|
| +
|
| + /** the current lexical state */
|
| + private int zzLexicalState = YYINITIAL;
|
| +
|
| + /** this buffer contains the current text to be matched and is
|
| + the source of the yytext() string */
|
| + private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
|
| +
|
| + /** the textposition at the last accepting state */
|
| + private int zzMarkedPos;
|
| +
|
| + /** the textposition at the last state to be included in yytext */
|
| + private int zzPushbackPos;
|
| +
|
| + /** the current text position in the buffer */
|
| + private int zzCurrentPos;
|
| +
|
| + /** startRead marks the beginning of the yytext() string in the buffer */
|
| + private int zzStartRead;
|
| +
|
| + /** endRead marks the last character in the buffer, that has been read
|
| + from input */
|
| + private int zzEndRead;
|
| +
|
| + /** number of newlines encountered up to the start of the matched text */
|
| + private int yyline;
|
| +
|
| + /** the number of characters up to the start of the matched text */
|
| + private int yychar;
|
| +
|
| + /**
|
| + * the number of characters from the last newline up to the start of the
|
| + * matched text
|
| + */
|
| + private int yycolumn;
|
| +
|
| + /**
|
| + * zzAtBOL == true <=> the scanner is currently at the beginning of a line
|
| + */
|
| + private boolean zzAtBOL = true;
|
| +
|
| + /** zzAtEOF == true <=> the scanner is at the EOF */
|
| + private boolean zzAtEOF;
|
| +
|
| + /* user code: */
|
| +
|
| +public static final int ALPHANUM = 0;
|
| +public static final int APOSTROPHE = 1;
|
| +public static final int ACRONYM = 2;
|
| +public static final int COMPANY = 3;
|
| +public static final int EMAIL = 4;
|
| +public static final int HOST = 5;
|
| +public static final int NUM = 6;
|
| +public static final int CJ = 7;
|
| +
|
| +public static final String [] TOKEN_TYPES = new String [] {
|
| + "<ALPHANUM>",
|
| + "<APOSTROPHE>",
|
| + "<ACRONYM>",
|
| + "<COMPANY>",
|
| + "<EMAIL>",
|
| + "<HOST>",
|
| + "<NUM>",
|
| + "<CJ>"
|
| +};
|
| +
|
| +public final int yychar()
|
| +{
|
| + return yychar;
|
| +}
|
| +
|
| +
|
| + /**
|
| + * Creates a new scanner
|
| + * There is also a java.io.InputStream version of this constructor.
|
| + *
|
| + * @param in the java.io.Reader to read input from.
|
| + */
|
| + StandardTokenizerImpl(java.io.Reader in) {
|
| + this.zzReader = in;
|
| + }
|
| +
|
| + /**
|
| + * Creates a new scanner.
|
| + * There is also java.io.Reader version of this constructor.
|
| + *
|
| + * @param in the java.io.Inputstream to read input from.
|
| + */
|
| + StandardTokenizerImpl(java.io.InputStream in) {
|
| + this(new java.io.InputStreamReader(in));
|
| + }
|
| +
|
| + /**
|
| + * Unpacks the compressed character translation table.
|
| + *
|
| + * @param packed the packed character translation table
|
| + * @return the unpacked character translation table
|
| + */
|
| + private static char [] zzUnpackCMap(String packed) {
|
| + char [] map = new char[0x10000];
|
| + int i = 0; /* index in packed string */
|
| + int j = 0; /* index in unpacked array */
|
| + while (i < 156) {
|
| + int count = packed.charAt(i++);
|
| + char value = packed.charAt(i++);
|
| + do map[j++] = value; while (--count > 0);
|
| + }
|
| + return map;
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Refills the input buffer.
|
| + *
|
| + * @return <code>false</code>, iff there was new input.
|
| + *
|
| + * @exception java.io.IOException if any I/O-Error occurs
|
| + */
|
| + private boolean zzRefill() throws java.io.IOException {
|
| +
|
| + /* first: make room (if you can) */
|
| + if (zzStartRead > 0) {
|
| + System.arraycopy(zzBuffer, zzStartRead,
|
| + zzBuffer, 0,
|
| + zzEndRead-zzStartRead);
|
| +
|
| + /* translate stored positions */
|
| + zzEndRead-= zzStartRead;
|
| + zzCurrentPos-= zzStartRead;
|
| + zzMarkedPos-= zzStartRead;
|
| + zzPushbackPos-= zzStartRead;
|
| + zzStartRead = 0;
|
| + }
|
| +
|
| + /* is the buffer big enough? */
|
| + if (zzCurrentPos >= zzBuffer.length) {
|
| + /* if not: blow it up */
|
| + char newBuffer[] = new char[zzCurrentPos*2];
|
| + System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
|
| + zzBuffer = newBuffer;
|
| + }
|
| +
|
| + /* finally: fill the buffer with new input */
|
| + int numRead = zzReader.read(zzBuffer, zzEndRead,
|
| + zzBuffer.length-zzEndRead);
|
| +
|
| + if (numRead < 0) {
|
| + return true;
|
| + }
|
| + else {
|
| + zzEndRead+= numRead;
|
| + return false;
|
| + }
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Closes the input stream.
|
| + */
|
| + public final void yyclose() throws java.io.IOException {
|
| + zzAtEOF = true; /* indicate end of file */
|
| + zzEndRead = zzStartRead; /* invalidate buffer */
|
| +
|
| + if (zzReader != null)
|
| + zzReader.close();
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Resets the scanner to read from a new input stream.
|
| + * Does not close the old reader.
|
| + *
|
| + * All internal variables are reset, the old input stream
|
| + * <b>cannot</b> be reused (internal buffer is discarded and lost).
|
| + * Lexical state is set to <tt>ZZ_INITIAL</tt>.
|
| + *
|
| + * @param reader the new input stream
|
| + */
|
| + public final void yyreset(java.io.Reader reader) {
|
| + zzReader = reader;
|
| + zzAtBOL = true;
|
| + zzAtEOF = false;
|
| + zzEndRead = zzStartRead = 0;
|
| + zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
|
| + yyline = yychar = yycolumn = 0;
|
| + zzLexicalState = YYINITIAL;
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Returns the current lexical state.
|
| + */
|
| + public final int yystate() {
|
| + return zzLexicalState;
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Enters a new lexical state
|
| + *
|
| + * @param newState the new lexical state
|
| + */
|
| + public final void yybegin(int newState) {
|
| + zzLexicalState = newState;
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Returns the text matched by the current regular expression.
|
| + */
|
| + public final String yytext() {
|
| + return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Returns the character at position <tt>pos</tt> from the
|
| + * matched text.
|
| + *
|
| + * It is equivalent to yytext().charAt(pos), but faster
|
| + *
|
| + * @param pos the position of the character to fetch.
|
| + * A value from 0 to yylength()-1.
|
| + *
|
| + * @return the character at position pos
|
| + */
|
| + public final char yycharat(int pos) {
|
| + return zzBuffer[zzStartRead+pos];
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Returns the length of the matched text region.
|
| + */
|
| + public final int yylength() {
|
| + return zzMarkedPos-zzStartRead;
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Reports an error that occured while scanning.
|
| + *
|
| + * In a wellformed scanner (no or only correct usage of
|
| + * yypushback(int) and a match-all fallback rule) this method
|
| + * will only be called with things that "Can't Possibly Happen".
|
| + * If this method is called, something is seriously wrong
|
| + * (e.g. a JFlex bug producing a faulty scanner etc.).
|
| + *
|
| + * Usual syntax/scanner level error handling should be done
|
| + * in error fallback rules.
|
| + *
|
| + * @param errorCode the code of the errormessage to display
|
| + */
|
| + private void zzScanError(int errorCode) {
|
| + String message;
|
| + try {
|
| + message = ZZ_ERROR_MSG[errorCode];
|
| + }
|
| + catch (ArrayIndexOutOfBoundsException e) {
|
| + message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
|
| + }
|
| +
|
| + throw new Error(message);
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Pushes the specified amount of characters back into the input stream.
|
| + *
|
| + * They will be read again by then next call of the scanning method
|
| + *
|
| + * @param number the number of characters to be read again.
|
| + * This number must not be greater than yylength()!
|
| + */
|
| + public void yypushback(int number) {
|
| + if ( number > yylength() )
|
| + zzScanError(ZZ_PUSHBACK_2BIG);
|
| +
|
| + zzMarkedPos -= number;
|
| + }
|
| +
|
| +
|
| + /**
|
| + * Resumes scanning until the next regular expression is matched,
|
| + * the end of input is encountered or an I/O-Error occurs.
|
| + *
|
| + * @return the next token
|
| + * @exception java.io.IOException if any I/O-Error occurs
|
| + */
|
| + public int getNextToken() throws java.io.IOException {
|
| + int zzInput;
|
| + int zzAction;
|
| +
|
| + // cached fields:
|
| + int zzCurrentPosL;
|
| + int zzMarkedPosL;
|
| + int zzEndReadL = zzEndRead;
|
| + char [] zzBufferL = zzBuffer;
|
| + char [] zzCMapL = ZZ_CMAP;
|
| +
|
| + int [] zzTransL = ZZ_TRANS;
|
| + int [] zzRowMapL = ZZ_ROWMAP;
|
| + int [] zzAttrL = ZZ_ATTRIBUTE;
|
| +
|
| + while (true) {
|
| + zzMarkedPosL = zzMarkedPos;
|
| +
|
| + yychar+= zzMarkedPosL-zzStartRead;
|
| +
|
| + zzAction = -1;
|
| +
|
| + zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
|
| +
|
| + zzState = zzLexicalState;
|
| +
|
| +
|
| + zzForAction: {
|
| + while (true) {
|
| +
|
| + if (zzCurrentPosL < zzEndReadL)
|
| + zzInput = zzBufferL[zzCurrentPosL++];
|
| + else if (zzAtEOF) {
|
| + zzInput = YYEOF;
|
| + break zzForAction;
|
| + }
|
| + else {
|
| + // store back cached positions
|
| + zzCurrentPos = zzCurrentPosL;
|
| + zzMarkedPos = zzMarkedPosL;
|
| + boolean eof = zzRefill();
|
| + // get translated positions and possibly new buffer
|
| + zzCurrentPosL = zzCurrentPos;
|
| + zzMarkedPosL = zzMarkedPos;
|
| + zzBufferL = zzBuffer;
|
| + zzEndReadL = zzEndRead;
|
| + if (eof) {
|
| + zzInput = YYEOF;
|
| + break zzForAction;
|
| + }
|
| + else {
|
| + zzInput = zzBufferL[zzCurrentPosL++];
|
| + }
|
| + }
|
| + int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
|
| + if (zzNext == -1) break zzForAction;
|
| + zzState = zzNext;
|
| +
|
| + int zzAttributes = zzAttrL[zzState];
|
| + if ( (zzAttributes & 1) == 1 ) {
|
| + zzAction = zzState;
|
| + zzMarkedPosL = zzCurrentPosL;
|
| + if ( (zzAttributes & 8) == 8 ) break zzForAction;
|
| + }
|
| +
|
| + }
|
| + }
|
| +
|
| + // store back cached position
|
| + zzMarkedPos = zzMarkedPosL;
|
| +
|
| + switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
|
| + case 6:
|
| + { return HOST;
|
| + }
|
| + case 10: break;
|
| + case 8:
|
| + { return ACRONYM;
|
| + }
|
| + case 11: break;
|
| + case 1:
|
| + { /* ignore */
|
| + }
|
| + case 12: break;
|
| + case 4:
|
| + { return NUM;
|
| + }
|
| + case 13: break;
|
| + case 3:
|
| + { return CJ;
|
| + }
|
| + case 14: break;
|
| + case 2:
|
| + { return ALPHANUM;
|
| + }
|
| + case 15: break;
|
| + case 7:
|
| + { return COMPANY;
|
| + }
|
| + case 16: break;
|
| + case 5:
|
| + { return APOSTROPHE;
|
| + }
|
| + case 17: break;
|
| + case 9:
|
| + { return EMAIL;
|
| + }
|
| + case 18: break;
|
| + default:
|
| + if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
| + zzAtEOF = true;
|
| + return YYEOF;
|
| + }
|
| + else {
|
| + zzScanError(ZZ_NO_MATCH);
|
| + }
|
| + }
|
| + }
|
| + }
|
| +
|
| +
|
| +}
|
| Index: E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/package.html
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/package.html (revision 560135)
|
| +++ E:/projects/lucene/trunk/src/java/org/apache/lucene/analysis/standard/package.html (working copy)
|
| @@ -2,14 +2,9 @@
|
| <html> |
| <head> |
| <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> |
| - <meta name="Author" content="Doug Cutting"> |
| + <meta name="Author" content="Stanislaw Osinski"> |
| </head> |
| <body> |
| -A grammar-based tokenizer constructed with JavaCC. |
| -<p>Note that JavaCC defines lots of public classes, methods and fields |
| -that do not need to be public. These clutter the documentation. |
| -Sorry. |
| -<p>Note that because JavaCC defines a class named <tt>Token</tt>, <tt>org.apache.lucene.analysis.Token</tt> |
| -must always be fully qualified in source code in this package. |
| +A fast grammar-based tokenizer constructed with JFlex. |
| </body> |
| </html> |
| Index: E:/projects/lucene/trunk/build.xml
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/build.xml (revision 560135)
|
| +++ E:/projects/lucene/trunk/build.xml (working copy)
|
| @@ -7,9 +7,9 @@
|
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| - |
| + |
| http://www.apache.org/licenses/LICENSE-2.0 |
| - |
| + |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| @@ -23,7 +23,7 @@
|
| |
| <import file="common-build.xml"/> |
| |
| - <property name="build.demo.template" value="src/demo/demo-build.template"/> |
| + <property name="build.demo.template" value="src/demo/demo-build.template"/> |
| |
| <property name="demo.name" value="lucene-demos-${version}"/> |
| <property name="demo.war.name" value="luceneweb"/> |
| @@ -31,7 +31,7 @@
|
| |
| <!-- Type of checksum to compute for distribution files --> |
| <property name="checksum.algorithm" value="md5" /> |
| - |
| + |
| <!-- Build classpath --> |
| <path id="classpath"> |
| <pathelement location="${build.dir}/classes/java"/> |
| @@ -98,7 +98,7 @@
|
| <target name="test" depends="test-core, test-contrib" |
| description="Runs all unit tests (including contribs)" |
| /> |
| - |
| + |
| <!-- ================================================================== --> |
| <!-- J A R --> |
| <!-- ================================================================== --> |
| @@ -122,11 +122,11 @@
|
| value="${version}"/> |
| <attribute name="Implementation-Vendor" |
| value="The Apache Software Foundation"/> |
| - <attribute name="X-Compile-Source-JDK" |
| + <attribute name="X-Compile-Source-JDK" |
| value="${javac.source}"/> |
| - <attribute name="X-Compile-Target-JDK" |
| + <attribute name="X-Compile-Target-JDK" |
| value="${javac.target}"/> |
| - </manifest> |
| + </manifest> |
| <metainf dir="${common.dir}"> |
| <include name="LICENSE.txt"/> |
| <include name="NOTICE.txt"/> |
| @@ -152,11 +152,11 @@
|
| value="${version}"/> |
| <attribute name="Implementation-Vendor" |
| value="The Apache Software Foundation"/> |
| - <attribute name="X-Compile-Source-JDK" |
| + <attribute name="X-Compile-Source-JDK" |
| value="${javac.source}"/> |
| - <attribute name="X-Compile-Target-JDK" |
| + <attribute name="X-Compile-Target-JDK" |
| value="${javac.target}"/> |
| - </manifest> |
| + </manifest> |
| <metainf dir="${common.dir}"> |
| <include name="LICENSE.txt"/> |
| <include name="NOTICE.txt"/> |
| @@ -224,7 +224,7 @@
|
| <!-- ================================================================== --> |
| <target name="package" depends="jar-core, javadocs, war-demo, build-contrib, init-dist"> |
| <copy file="${build.demo.template}" tofile="${build.dir}/build-demo.xml"> |
| - <filterset begintoken="@PLACEHOLDER_" endtoken="@"> |
| + <filterset begintoken="@PLACEHOLDER_" endtoken="@"> |
| <filter token="version" value="${version}"/> |
| <filter token="javac.source" value="${javac.source}"/> |
| <filter token="javac.target" value="${javac.target}"/> |
| @@ -372,14 +372,14 @@
|
| <checksum forceOverwrite="yes" fileext=".md5"> |
| <fileset dir="${maven.dist.dir}" excludes="**/*.md5"/> |
| </checksum> |
| - |
| + |
| <!-- do sha1 checksums --> |
| <checksum forceOverwrite="yes" algorithm="SHA" fileext=".sha1"> |
| <fileset dir="${maven.dist.dir}" excludes="**/*.sha1, **/*.md5"/> |
| </checksum> |
| </sequential> |
| </target> |
| - |
| + |
| <!-- ================================================================== --> |
| <!-- Build the JavaCC files into the source tree --> |
| <!-- ================================================================== --> |
| @@ -414,35 +414,44 @@
|
| </delete> |
| </target> |
| |
| - <target name="javacc" depends="clean-javacc,javacc-StandardAnalyzer,javacc-QueryParser,javacc-HTMLParser"/> |
| + <target name="javacc" depends="clean-javacc,javacc-QueryParser,javacc-HTMLParser"/> |
| |
| - <target name="javacc-StandardAnalyzer" depends="init,javacc-check" if="javacc.present"> |
| - <!-- generate this in a build directory so we can exclude ParseException --> |
| - <mkdir dir="${build.dir}/gen/org/apache/lucene/analysis/standard"/> |
| - |
| - <invoke-javacc target="src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj" |
| - outputDir="${build.dir}/gen/org/apache/lucene/analysis/standard" |
| - /> |
| - <copy todir="src/java/org/apache/lucene/analysis/standard"> |
| - <fileset dir="${build.dir}/gen/org/apache/lucene/analysis/standard"> |
| - <include name="*.java"/> |
| - <exclude name="ParseException.java"/> |
| - </fileset> |
| - </copy> |
| - </target> |
| - |
| <target name="javacc-QueryParser" depends="init,javacc-check" if="javacc.present"> |
| <invoke-javacc target="src/java/org/apache/lucene/queryParser/QueryParser.jj" |
| outputDir="src/java/org/apache/lucene/queryParser" |
| /> |
| </target> |
| - |
| + |
| <target name="javacc-HTMLParser" depends="init,javacc-check" if="javacc.present"> |
| <invoke-javacc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj" |
| outputDir="src/demo/org/apache/lucene/demo/html" |
| /> |
| </target> |
| - |
| + |
| + <!-- ================================================================== --> |
| + <!-- Build the JFlex files into the source tree --> |
| + <!-- ================================================================== --> |
| + |
| + <target name="jflex" depends="clean-jflex,jflex-StandardAnalyzer" /> |
| + |
| + <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present"> |
| + <taskdef classname="JFlex.anttask.JFlexTask" name="jflex"> |
| + <classpath location="${jflex.home}/lib/JFlex.jar" /> |
| + </taskdef> |
| + |
| + <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex" |
| + outdir="src/java/org/apache/lucene/analysis/standard" |
| + nobak="on" /> |
| + </target> |
| + |
| + <target name="clean-jflex"> |
| + <delete> |
| + <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java"> |
| + <containsregexp expression="generated.*by.*JFlex"/> |
| + </fileset> |
| + </delete> |
| + </target> |
| + |
| <macrodef name="contrib-crawl"> |
| <attribute name="target" default=""/> |
| <attribute name="failonerror" default="true"/> |
| @@ -485,7 +494,7 @@
|
| <attribute name="access"/> |
| <attribute name="destdir"/> |
| <sequential> |
| - |
| + |
| <dirset dir="contrib/gdata-server/src/core/src/java" id="gdata-server-core"> |
| <include name="**" if="build-1-5-contrib" /> |
| </dirset> |
| @@ -520,7 +529,7 @@
|
| |
| <!-- please keep this list up to date, and in alpha order... --> |
| <!-- with the minor exception of gdata which is managed by ref --> |
| - |
| + |
| <!-- ie: `find contrib/* -path \*src/java | grep -v gdata | sort` --> |
| |
| <!-- if you make changes to the list of package sets, also --> |
| @@ -552,7 +561,7 @@
|
| <!-- If the main javadoc Group listing includes an "Other --> |
| <!-- Packages" group after the ones listed here, then those --> |
| <!-- packages are not being matched by any of these rules --> |
| - |
| + |
| <group title="Core" packages="org.apache.*:org.apache.lucene.analysis:org.apache.lucene.analysis.standard*"/> |
| |
| <group title="Demo" packages="org.apache.lucene.demo*"/> |
| @@ -585,7 +594,7 @@
|
| |
| </sequential> |
| </macrodef> |
| - |
| + |
| <!-- Macro for building checksum files |
| This is only needed until the "format" option is supported |
| by ant's built in checksum task |
| Index: E:/projects/lucene/trunk/contrib/miscellaneous/src/test/org/apache/lucene/misc/ChainedFilterTest.java
|
| ===================================================================
|
| --- E:/projects/lucene/trunk/contrib/miscellaneous/src/test/org/apache/lucene/misc/ChainedFilterTest.java (revision 560135)
|
| +++ E:/projects/lucene/trunk/contrib/miscellaneous/src/test/org/apache/lucene/misc/ChainedFilterTest.java (working copy)
|
| @@ -18,8 +18,7 @@
|
| */ |
| |
| import junit.framework.TestCase; |
| -import java.util.Calendar; |
| -import java.util.Date; |
| +import java.util.*; |
| import java.text.ParseException; |
| import java.text.SimpleDateFormat; |
| import org.apache.lucene.index.IndexWriter; |
| @@ -131,7 +130,7 @@
|
| } |
| |
| private Date parseDate(String s) throws ParseException { |
| - return new SimpleDateFormat("yyyy MMM dd").parse(s); |
| + return new SimpleDateFormat("yyyy MMM dd", Locale.US).parse(s); |
| } |
| |
| } |