| /* The following code was generated by JFlex 1.7.0 */ |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.lucene.analysis.classic; |
| |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| |
| /** This class implements the classic lucene StandardTokenizer up until 3.0 */ |
| @SuppressWarnings("fallthrough") |
| class ClassicTokenizerImpl { |
| |
| /** This character denotes the end of file */ |
| public static final int YYEOF = -1; |
| |
| /** initial size of the lookahead buffer */ |
| private static final int ZZ_BUFFERSIZE = 4096; |
| |
| /** lexical states */ |
| public static final int YYINITIAL = 0; |
| |
| /** |
| * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l ZZ_LEXSTATE[l+1] is the state in |
| * the DFA for the lexical state l at the beginning of a line l is of the form l = 2*k, k a non |
| * negative integer |
| */ |
| private static final int ZZ_LEXSTATE[] = {0, 0}; |
| |
| /** Translates characters to character classes */ |
| private static final String ZZ_CMAP_PACKED = |
| "\46\0\1\5\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0" |
| + "\1\6\32\12\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12" |
| + "\4\0\1\12\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12" |
| + "\34\0\136\12\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12" |
| + "\11\0\1\12\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12" |
| + "\1\0\24\12\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12" |
| + "\12\0\71\12\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12" |
| + "\67\0\46\12\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12" |
| + "\56\0\32\12\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12" |
| + "\17\0\2\12\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0" |
| + "\46\12\u015f\0\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0" |
| + "\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0" |
| + "\1\12\3\0\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12" |
| + "\23\0\6\12\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12" |
| + "\1\0\2\12\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2" |
| + "\2\0\3\12\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12" |
| + "\1\0\7\12\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12" |
| + "\17\0\1\12\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12" |
| + "\1\0\7\12\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12" |
| + "\1\0\3\12\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12" |
| + "\3\0\2\12\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12" |
| + "\3\0\10\12\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12" |
| + "\1\0\27\12\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2" |
| + "\25\0\10\12\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12" |
| + "\44\0\1\12\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12" |
| + "\1\0\27\12\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12" |
| + "\3\0\30\12\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1" |
| + "\60\12\1\1\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0" |
| + "\1\12\2\0\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0" |
| + "\7\12\1\0\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0" |
| + "\4\12\1\0\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0" |
| + "\12\2\2\0\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0" |
| + "\42\12\35\0\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0" |
| + "\12\2\6\0\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0" |
| + "\104\12\5\0\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0" |
| + "\4\12\2\0\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0" |
| + "\1\12\1\0\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0" |
| + "\7\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0" |
| + "\27\12\1\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0" |
| + "\47\12\1\0\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0" |
| + "\10\12\12\0\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0" |
| + "\12\2\6\0\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0" |
| + "\26\12\2\0\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0" |
| + "\1\12\1\0\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0" |
| + "\7\12\1\0\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0" |
| + "\6\12\4\0\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0" |
| + "\1\12\4\0\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0" |
| + "\1\12\1\0\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0" |
| + "\7\12\u0ecb\0\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13" |
| + "\2\13\132\13\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0" |
| + "\30\12\70\0\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13" |
| + "\132\13\u048d\12\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12" |
| + "\5\0\1\12\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12" |
| + "\1\0\2\12\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12" |
| + "\2\0\66\12\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12" |
| + "\23\0\12\2\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12" |
| + "\3\0\6\12\2\0\6\12\2\0\6\12\2\0\3\12\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\63\0"; |
| |
| /** Translates characters to character classes */ |
| private static final char[] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); |
| |
| /** Translates DFA states to action switch labels. */ |
| private static final int[] ZZ_ACTION = zzUnpackAction(); |
| |
| private static final String ZZ_ACTION_PACKED_0 = |
| "\1\0\1\1\3\2\1\3\13\0\1\2\3\4\2\0" |
| + "\1\5\1\0\1\5\3\4\6\5\1\6\1\4\2\7" |
| + "\1\10\1\0\1\10\3\0\2\10\1\11\1\12\1\4"; |
| |
| private static int[] zzUnpackAction() { |
| int[] result = new int[50]; |
| int offset = 0; |
| offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); |
| return result; |
| } |
| |
| private static int zzUnpackAction(String packed, int offset, int[] result) { |
| int i = 0; /* index in packed string */ |
| int j = offset; /* index in unpacked array */ |
| int l = packed.length(); |
| while (i < l) { |
| int count = packed.charAt(i++); |
| int value = packed.charAt(i++); |
| do result[j++] = value; |
| while (--count > 0); |
| } |
| return j; |
| } |
| |
| /** Translates a state to a row index in the transition table */ |
| private static final int[] ZZ_ROWMAP = zzUnpackRowMap(); |
| |
| private static final String ZZ_ROWMAP_PACKED_0 = |
| "\0\0\0\14\0\30\0\44\0\60\0\14\0\74\0\110" |
| + "\0\124\0\140\0\154\0\170\0\204\0\220\0\234\0\250" |
| + "\0\264\0\300\0\314\0\330\0\344\0\360\0\374\0\u0108" |
| + "\0\u0114\0\u0120\0\u012c\0\u0138\0\u0144\0\u0150\0\u015c\0\u0168" |
| + "\0\u0174\0\u0180\0\u018c\0\u0198\0\u01a4\0\250\0\u01b0\0\u01bc" |
| + "\0\u01c8\0\u01d4\0\u01e0\0\u01ec\0\u01f8\0\74\0\154\0\u0204" |
| + "\0\u0210\0\u021c"; |
| |
| private static int[] zzUnpackRowMap() { |
| int[] result = new int[50]; |
| int offset = 0; |
| offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); |
| return result; |
| } |
| |
| private static int zzUnpackRowMap(String packed, int offset, int[] result) { |
| int i = 0; /* index in packed string */ |
| int j = offset; /* index in unpacked array */ |
| int l = packed.length(); |
| while (i < l) { |
| int high = packed.charAt(i++) << 16; |
| result[j++] = high | packed.charAt(i++); |
| } |
| return j; |
| } |
| |
| /** The transition table of the DFA */ |
| private static final int[] ZZ_TRANS = zzUnpackTrans(); |
| |
| private static final String ZZ_TRANS_PACKED_0 = |
| "\1\2\1\3\1\4\7\2\1\5\1\6\15\0\2\3" |
| + "\1\0\1\7\1\0\1\10\2\11\1\12\1\3\2\0" |
| + "\1\3\1\4\1\0\1\13\1\0\1\10\2\14\1\15" |
| + "\1\4\2\0\1\3\1\4\1\16\1\17\1\20\1\21" |
| + "\2\11\1\12\1\22\2\0\1\23\1\24\7\0\1\25" |
| + "\2\0\2\26\7\0\1\26\2\0\1\27\1\30\7\0" |
| + "\1\31\3\0\1\32\7\0\1\12\2\0\1\33\1\34" |
| + "\7\0\1\35\2\0\1\36\1\37\7\0\1\40\2\0" |
| + "\1\41\1\42\7\0\1\43\13\0\1\44\2\0\1\23" |
| + "\1\24\7\0\1\45\13\0\1\46\2\0\2\26\7\0" |
| + "\1\47\2\0\1\3\1\4\1\16\1\7\1\20\1\21" |
| + "\2\11\1\12\1\22\2\0\2\23\1\0\1\50\1\0" |
| + "\1\10\2\51\1\0\1\23\2\0\1\23\1\24\1\0" |
| + "\1\52\1\0\1\10\2\53\1\54\1\24\2\0\1\23" |
| + "\1\24\1\0\1\50\1\0\1\10\2\51\1\0\1\25" |
| + "\2\0\2\26\1\0\1\55\2\0\1\55\2\0\1\26" |
| + "\2\0\2\27\1\0\1\51\1\0\1\10\2\51\1\0" |
| + "\1\27\2\0\1\27\1\30\1\0\1\53\1\0\1\10" |
| + "\2\53\1\54\1\30\2\0\1\27\1\30\1\0\1\51" |
| + "\1\0\1\10\2\51\1\0\1\31\3\0\1\32\1\0" |
| + "\1\54\2\0\3\54\1\32\2\0\2\33\1\0\1\56" |
| + "\1\0\1\10\2\11\1\12\1\33\2\0\1\33\1\34" |
| + "\1\0\1\57\1\0\1\10\2\14\1\15\1\34\2\0" |
| + "\1\33\1\34\1\0\1\56\1\0\1\10\2\11\1\12" |
| + "\1\35\2\0\2\36\1\0\1\11\1\0\1\10\2\11" |
| + "\1\12\1\36\2\0\1\36\1\37\1\0\1\14\1\0" |
| + "\1\10\2\14\1\15\1\37\2\0\1\36\1\37\1\0" |
| + "\1\11\1\0\1\10\2\11\1\12\1\40\2\0\2\41" |
| + "\1\0\1\12\2\0\3\12\1\41\2\0\1\41\1\42" |
| + "\1\0\1\15\2\0\3\15\1\42\2\0\1\41\1\42" |
| + "\1\0\1\12\2\0\3\12\1\43\4\0\1\16\6\0" |
| + "\1\44\2\0\1\23\1\24\1\0\1\60\1\0\1\10" |
| + "\2\51\1\0\1\25\2\0\2\26\1\0\1\55\2\0" |
| + "\1\55\2\0\1\47\2\0\2\23\7\0\1\23\2\0" |
| + "\2\27\7\0\1\27\2\0\2\33\7\0\1\33\2\0" |
| + "\2\36\7\0\1\36\2\0\2\41\7\0\1\41\2\0" |
| + "\2\61\7\0\1\61\2\0\2\23\7\0\1\62\2\0" |
| + "\2\61\1\0\1\55\2\0\1\55\2\0\1\61\2\0" |
| + "\2\23\1\0\1\60\1\0\1\10\2\51\1\0\1\23" |
| + "\1\0"; |
| |
| private static int[] zzUnpackTrans() { |
| int[] result = new int[552]; |
| int offset = 0; |
| offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); |
| return result; |
| } |
| |
| private static int zzUnpackTrans(String packed, int offset, int[] result) { |
| int i = 0; /* index in packed string */ |
| int j = offset; /* index in unpacked array */ |
| int l = packed.length(); |
| while (i < l) { |
| int count = packed.charAt(i++); |
| int value = packed.charAt(i++); |
| value--; |
| do result[j++] = value; |
| while (--count > 0); |
| } |
| return j; |
| } |
| |
| /* error codes */ |
| private static final int ZZ_UNKNOWN_ERROR = 0; |
| private static final int ZZ_NO_MATCH = 1; |
| private static final int ZZ_PUSHBACK_2BIG = 2; |
| |
| /* error messages for the codes above */ |
| private static final String ZZ_ERROR_MSG[] = { |
| "Unknown internal scanner error", |
| "Error: could not match input", |
| "Error: pushback value was too large" |
| }; |
| |
| /** ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> */ |
| private static final int[] ZZ_ATTRIBUTE = zzUnpackAttribute(); |
| |
| private static final String ZZ_ATTRIBUTE_PACKED_0 = |
| "\1\0\1\11\3\1\1\11\13\0\4\1\2\0\1\1" + "\1\0\17\1\1\0\1\1\3\0\5\1"; |
| |
| private static int[] zzUnpackAttribute() { |
| int[] result = new int[50]; |
| int offset = 0; |
| offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); |
| return result; |
| } |
| |
| private static int zzUnpackAttribute(String packed, int offset, int[] result) { |
| int i = 0; /* index in packed string */ |
| int j = offset; /* index in unpacked array */ |
| int l = packed.length(); |
| while (i < l) { |
| int count = packed.charAt(i++); |
| int value = packed.charAt(i++); |
| do result[j++] = value; |
| while (--count > 0); |
| } |
| return j; |
| } |
| |
| /** the input device */ |
| private java.io.Reader zzReader; |
| |
| /** the current state of the DFA */ |
| private int zzState; |
| |
| /** the current lexical state */ |
| private int zzLexicalState = YYINITIAL; |
| |
| /** |
| * this buffer contains the current text to be matched and is the source of the yytext() string |
| */ |
| private char zzBuffer[] = new char[ZZ_BUFFERSIZE]; |
| |
| /** the textposition at the last accepting state */ |
| private int zzMarkedPos; |
| |
| /** the current text position in the buffer */ |
| private int zzCurrentPos; |
| |
| /** startRead marks the beginning of the yytext() string in the buffer */ |
| private int zzStartRead; |
| |
| /** endRead marks the last character in the buffer, that has been read from input */ |
| private int zzEndRead; |
| |
| /** number of newlines encountered up to the start of the matched text */ |
| private int yyline; |
| |
| /** the number of characters up to the start of the matched text */ |
| private int yychar; |
| |
| /** the number of characters from the last newline up to the start of the matched text */ |
| private int yycolumn; |
| |
| /** zzAtBOL == true iff the scanner is currently at the beginning of a line */ |
| private boolean zzAtBOL = true; |
| |
| /** zzAtEOF == true iff the scanner is at the EOF */ |
| private boolean zzAtEOF; |
| |
| /** denotes if the user-EOF-code has already been executed */ |
| private boolean zzEOFDone; |
| |
| /** |
| * The number of occupied positions in zzBuffer beyond zzEndRead. When a lead/high surrogate has |
| * been read from the input stream into the final zzBuffer position, this will have a value of 1; |
| * otherwise, it will have a value of 0. |
| */ |
| private int zzFinalHighSurrogate = 0; |
| |
| /* user code: */ |
| |
| public static final int ALPHANUM = ClassicTokenizer.ALPHANUM; |
| public static final int APOSTROPHE = ClassicTokenizer.APOSTROPHE; |
| public static final int ACRONYM = ClassicTokenizer.ACRONYM; |
| public static final int COMPANY = ClassicTokenizer.COMPANY; |
| public static final int EMAIL = ClassicTokenizer.EMAIL; |
| public static final int HOST = ClassicTokenizer.HOST; |
| public static final int NUM = ClassicTokenizer.NUM; |
| public static final int CJ = ClassicTokenizer.CJ; |
| public static final int ACRONYM_DEP = ClassicTokenizer.ACRONYM_DEP; |
| |
| public static final String[] TOKEN_TYPES = ClassicTokenizer.TOKEN_TYPES; |
| |
| public final int yychar() { |
| return yychar; |
| } |
| |
| /** Fills CharTermAttribute with the current token text. */ |
| public final void getText(CharTermAttribute t) { |
| t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); |
| } |
| |
| public final void setBufferSize(int numChars) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| /** |
| * Creates a new scanner |
| * |
| * @param in the java.io.Reader to read input from. |
| */ |
| ClassicTokenizerImpl(java.io.Reader in) { |
| this.zzReader = in; |
| } |
| |
| /** |
| * Unpacks the compressed character translation table. |
| * |
| * @param packed the packed character translation table |
| * @return the unpacked character translation table |
| */ |
| private static char[] zzUnpackCMap(String packed) { |
| char[] map = new char[0x110000]; |
| int i = 0; /* index in packed string */ |
| int j = 0; /* index in unpacked array */ |
| while (i < 1170) { |
| int count = packed.charAt(i++); |
| char value = packed.charAt(i++); |
| do map[j++] = value; |
| while (--count > 0); |
| } |
| return map; |
| } |
| |
| /** |
| * Refills the input buffer. |
| * |
| * @return <code>false</code>, iff there was new input. |
| * @exception java.io.IOException if any I/O-Error occurs |
| */ |
| private boolean zzRefill() throws java.io.IOException { |
| |
| /* first: make room (if you can) */ |
| if (zzStartRead > 0) { |
| zzEndRead += zzFinalHighSurrogate; |
| zzFinalHighSurrogate = 0; |
| System.arraycopy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead); |
| |
| /* translate stored positions */ |
| zzEndRead -= zzStartRead; |
| zzCurrentPos -= zzStartRead; |
| zzMarkedPos -= zzStartRead; |
| zzStartRead = 0; |
| } |
| |
| /* is the buffer big enough? */ |
| if (zzCurrentPos >= zzBuffer.length - zzFinalHighSurrogate) { |
| /* if not: blow it up */ |
| char newBuffer[] = new char[zzBuffer.length * 2]; |
| System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length); |
| zzBuffer = newBuffer; |
| zzEndRead += zzFinalHighSurrogate; |
| zzFinalHighSurrogate = 0; |
| } |
| |
| /* fill the buffer with new input */ |
| int requested = zzBuffer.length - zzEndRead; |
| int numRead = zzReader.read(zzBuffer, zzEndRead, requested); |
| |
| /* not supposed to occur according to specification of java.io.Reader */ |
| if (numRead == 0) { |
| throw new java.io.IOException( |
| "Reader returned 0 characters. See JFlex examples for workaround."); |
| } |
| if (numRead > 0) { |
| zzEndRead += numRead; |
| /* If numRead == requested, we might have requested to few chars to |
| encode a full Unicode character. We assume that a Reader would |
| otherwise never return half characters. */ |
| if (numRead == requested) { |
| if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) { |
| --zzEndRead; |
| zzFinalHighSurrogate = 1; |
| } |
| } |
| /* potentially more input available */ |
| return false; |
| } |
| |
| /* numRead < 0 ==> end of stream */ |
| return true; |
| } |
| |
| /** Closes the input stream. */ |
| public final void yyclose() throws java.io.IOException { |
| zzAtEOF = true; /* indicate end of file */ |
| zzEndRead = zzStartRead; /* invalidate buffer */ |
| |
| if (zzReader != null) zzReader.close(); |
| } |
| |
| /** |
| * Resets the scanner to read from a new input stream. Does not close the old reader. |
| * |
| * <p>All internal variables are reset, the old input stream <b>cannot</b> be reused (internal |
| * buffer is discarded and lost). Lexical state is set to <code>ZZ_INITIAL</code>. |
| * |
| * <p>Internal scan buffer is resized down to its initial length, if it has grown. |
| * |
| * @param reader the new input stream |
| */ |
| public final void yyreset(java.io.Reader reader) { |
| zzReader = reader; |
| zzAtBOL = true; |
| zzAtEOF = false; |
| zzEOFDone = false; |
| zzEndRead = zzStartRead = 0; |
| zzCurrentPos = zzMarkedPos = 0; |
| zzFinalHighSurrogate = 0; |
| yyline = yychar = yycolumn = 0; |
| zzLexicalState = YYINITIAL; |
| if (zzBuffer.length > ZZ_BUFFERSIZE) zzBuffer = new char[ZZ_BUFFERSIZE]; |
| } |
| |
| /** Returns the current lexical state. */ |
| public final int yystate() { |
| return zzLexicalState; |
| } |
| |
| /** |
| * Enters a new lexical state |
| * |
| * @param newState the new lexical state |
| */ |
| public final void yybegin(int newState) { |
| zzLexicalState = newState; |
| } |
| |
| /** Returns the text matched by the current regular expression. */ |
| public final String yytext() { |
| return new String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); |
| } |
| |
| /** |
| * Returns the character at position <code>pos</code> from the matched text. |
| * |
| * <p>It is equivalent to yytext().charAt(pos), but faster |
| * |
| * @param pos the position of the character to fetch. A value from 0 to yylength()-1. |
| * @return the character at position pos |
| */ |
| public final char yycharat(int pos) { |
| return zzBuffer[zzStartRead + pos]; |
| } |
| |
| /** Returns the length of the matched text region. */ |
| public final int yylength() { |
| return zzMarkedPos - zzStartRead; |
| } |
| |
| /** |
| * Reports an error that occured while scanning. |
| * |
| * <p>In a wellformed scanner (no or only correct usage of yypushback(int) and a match-all |
| * fallback rule) this method will only be called with things that "Can't Possibly Happen". If |
| * this method is called, something is seriously wrong (e.g. a JFlex bug producing a faulty |
| * scanner etc.). |
| * |
| * <p>Usual syntax/scanner level error handling should be done in error fallback rules. |
| * |
| * @param errorCode the code of the errormessage to display |
| */ |
| private void zzScanError(int errorCode) { |
| String message; |
| try { |
| message = ZZ_ERROR_MSG[errorCode]; |
| } catch (ArrayIndexOutOfBoundsException e) { |
| message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR]; |
| } |
| |
| throw new Error(message); |
| } |
| |
| /** |
| * Pushes the specified amount of characters back into the input stream. |
| * |
| * <p>They will be read again by then next call of the scanning method |
| * |
| * @param number the number of characters to be read again. This number must not be greater than |
| * yylength()! |
| */ |
| public void yypushback(int number) { |
| if (number > yylength()) zzScanError(ZZ_PUSHBACK_2BIG); |
| |
| zzMarkedPos -= number; |
| } |
| |
| /** |
| * Resumes scanning until the next regular expression is matched, the end of input is encountered |
| * or an I/O-Error occurs. |
| * |
| * @return the next token |
| * @exception java.io.IOException if any I/O-Error occurs |
| */ |
| public int getNextToken() throws java.io.IOException { |
| int zzInput; |
| int zzAction; |
| |
| // cached fields: |
| int zzCurrentPosL; |
| int zzMarkedPosL; |
| int zzEndReadL = zzEndRead; |
| char[] zzBufferL = zzBuffer; |
| char[] zzCMapL = ZZ_CMAP; |
| |
| int[] zzTransL = ZZ_TRANS; |
| int[] zzRowMapL = ZZ_ROWMAP; |
| int[] zzAttrL = ZZ_ATTRIBUTE; |
| |
| while (true) { |
| zzMarkedPosL = zzMarkedPos; |
| |
| yychar += zzMarkedPosL - zzStartRead; |
| |
| zzAction = -1; |
| |
| zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; |
| |
| zzState = ZZ_LEXSTATE[zzLexicalState]; |
| |
| // set up zzAction for empty match case: |
| int zzAttributes = zzAttrL[zzState]; |
| if ((zzAttributes & 1) == 1) { |
| zzAction = zzState; |
| } |
| |
| zzForAction: |
| { |
| while (true) { |
| |
| if (zzCurrentPosL < zzEndReadL) { |
| zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); |
| zzCurrentPosL += Character.charCount(zzInput); |
| } else if (zzAtEOF) { |
| zzInput = YYEOF; |
| break zzForAction; |
| } else { |
| // store back cached positions |
| zzCurrentPos = zzCurrentPosL; |
| zzMarkedPos = zzMarkedPosL; |
| boolean eof = zzRefill(); |
| // get translated positions and possibly new buffer |
| zzCurrentPosL = zzCurrentPos; |
| zzMarkedPosL = zzMarkedPos; |
| zzBufferL = zzBuffer; |
| zzEndReadL = zzEndRead; |
| if (eof) { |
| zzInput = YYEOF; |
| break zzForAction; |
| } else { |
| zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); |
| zzCurrentPosL += Character.charCount(zzInput); |
| } |
| } |
| int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; |
| if (zzNext == -1) break zzForAction; |
| zzState = zzNext; |
| |
| zzAttributes = zzAttrL[zzState]; |
| if ((zzAttributes & 1) == 1) { |
| zzAction = zzState; |
| zzMarkedPosL = zzCurrentPosL; |
| if ((zzAttributes & 8) == 8) break zzForAction; |
| } |
| } |
| } |
| |
| // store back cached position |
| zzMarkedPos = zzMarkedPosL; |
| |
| if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { |
| zzAtEOF = true; |
| return YYEOF; |
| } else { |
| switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { |
| case 1: |
| { |
| /* Break so we don't hit fall-through warning: */ |
| break; /* ignore */ |
| } |
| // fall through |
| case 11: |
| break; |
| case 2: |
| { |
| return ALPHANUM; |
| } |
| // fall through |
| case 12: |
| break; |
| case 3: |
| { |
| return CJ; |
| } |
| // fall through |
| case 13: |
| break; |
| case 4: |
| { |
| return HOST; |
| } |
| // fall through |
| case 14: |
| break; |
| case 5: |
| { |
| return NUM; |
| } |
| // fall through |
| case 15: |
| break; |
| case 6: |
| { |
| return APOSTROPHE; |
| } |
| // fall through |
| case 16: |
| break; |
| case 7: |
| { |
| return COMPANY; |
| } |
| // fall through |
| case 17: |
| break; |
| case 8: |
| { |
| return ACRONYM_DEP; |
| } |
| // fall through |
| case 18: |
| break; |
| case 9: |
| { |
| return ACRONYM; |
| } |
| // fall through |
| case 19: |
| break; |
| case 10: |
| { |
| return EMAIL; |
| } |
| // fall through |
| case 20: |
| break; |
| default: |
| zzScanError(ZZ_NO_MATCH); |
| } |
| } |
| } |
| } |
| } |