| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * $Id$ |
| */ |
| |
| // --------------------------------------------------------------------------- |
| // Includes |
| // --------------------------------------------------------------------------- |
| #include <xercesc/util/regx/RegxParser.hpp> |
| #include <xercesc/util/XMLString.hpp> |
| #include <xercesc/util/ParseException.hpp> |
| #include <xercesc/util/regx/RegularExpression.hpp> |
| #include <xercesc/util/regx/RegxUtil.hpp> |
| #include <xercesc/util/regx/RegxDefs.hpp> |
| #include <xercesc/util/regx/TokenInc.hpp> |
| #include <xercesc/framework/XMLErrorCodes.hpp> |
| |
| XERCES_CPP_NAMESPACE_BEGIN |
| |
| // --------------------------------------------------------------------------- |
| // RegxParser::ReferencePostion: Constructors and Destructor |
| // --------------------------------------------------------------------------- |
| RegxParser::ReferencePosition::ReferencePosition(const int refNo, |
| const XMLSize_t position) |
| :fReferenceNo(refNo) |
| , fPosition(position) |
| { |
| |
| } |
| |
| // --------------------------------------------------------------------------- |
| // RegxParser: Constructors and Destructors |
| // --------------------------------------------------------------------------- |
| RegxParser::RegxParser(MemoryManager* const manager) |
| :fMemoryManager(manager), |
| fHasBackReferences(false), |
| fOptions(0), |
| fOffset(0), |
| fNoGroups(1), |
| fParseContext(regexParserStateNormal), |
| fStringLen(0), |
| fState(REGX_T_EOF), |
| fCharData(0), |
| fString(0), |
| fReferences(0), |
| fTokenFactory(0) |
| { |
| } |
| |
| RegxParser::~RegxParser() { |
| |
| fMemoryManager->deallocate(fString);//delete [] fString; |
| delete fReferences; |
| } |
| |
| // --------------------------------------------------------------------------- |
| // RegxParser: Parsing methods |
| // --------------------------------------------------------------------------- |
| Token* RegxParser::parse(const XMLCh* const regxStr, const int options) { |
| |
| // if TokenFactory is not set do nothing. |
| // REVISIT - should we throw an exception |
| if (fTokenFactory == 0) { |
| return 0; |
| } |
| |
| fOptions = options; |
| fOffset = 0; |
| fNoGroups = 1; |
| fHasBackReferences = false; |
| setParseContext(regexParserStateNormal); |
| if (fString) |
| fMemoryManager->deallocate(fString);//delete [] fString; |
| fString = XMLString::replicate(regxStr, fMemoryManager); |
| |
| if (isSet(RegularExpression::EXTENDED_COMMENT)) { |
| |
| if (fString) |
| fMemoryManager->deallocate(fString);//delete [] fString; |
| fString = RegxUtil::stripExtendedComment(regxStr, fMemoryManager); |
| } |
| |
| fStringLen = XMLString::stringLen(fString); |
| processNext(); |
| |
| Token* retTok = parseRegx(); |
| |
| if (fOffset != fStringLen) { |
| XMLCh value1[65]; |
| XMLString::sizeToText(fOffset, value1, 64, 10, fMemoryManager); |
| ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_Parse1, value1, fString, fMemoryManager); |
| } |
| |
| if (fReferences != 0) { |
| |
| XMLSize_t refSize = fReferences->size(); |
| for (XMLSize_t i = 0; i < refSize; i++) { |
| |
| if (fNoGroups <= fReferences->elementAt(i)->fReferenceNo) { |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Parse2, fMemoryManager); |
| } |
| } |
| |
| fReferences->removeAllElements(); |
| } |
| |
| return retTok; |
| } |
| |
| |
| void RegxParser::processNext() { |
| |
| if (fOffset >= fStringLen) { |
| |
| fCharData = -1; |
| fState = REGX_T_EOF; |
| return; |
| } |
| |
| parserState nextState; |
| XMLCh ch = fString[fOffset++]; |
| fCharData = ch; |
| |
| if (fParseContext == regexParserStateInBrackets) { |
| |
| switch (ch) { |
| case chBackSlash: |
| nextState = REGX_T_BACKSOLIDUS; |
| |
| if (fOffset >= fStringLen) { |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Next1, fMemoryManager); |
| } |
| |
| fCharData = fString[fOffset++]; |
| break; |
| case chDash: |
| if (fOffset < fStringLen && fString[fOffset] == chOpenSquare) { |
| |
| fOffset++; |
| nextState = REGX_T_XMLSCHEMA_CC_SUBTRACTION; |
| } |
| else { |
| nextState = REGX_T_CHAR; |
| } |
| break; |
| default: |
| if (RegxUtil::isHighSurrogate(ch) && fOffset < fStringLen) { |
| |
| XMLCh lowCh = fString[fOffset]; |
| if (RegxUtil::isLowSurrogate(lowCh)) { |
| fCharData = RegxUtil::composeFromSurrogate(ch, lowCh); |
| fOffset++; |
| } |
| else { |
| throw XMLErrs::Expected2ndSurrogateChar; |
| } |
| } |
| |
| nextState = REGX_T_CHAR; |
| } |
| |
| fState = nextState; |
| return; |
| } |
| |
| switch (ch) { |
| |
| case chPipe: |
| nextState = REGX_T_OR; |
| break; |
| case chAsterisk: |
| nextState = REGX_T_STAR; |
| break; |
| case chPlus: |
| nextState = REGX_T_PLUS; |
| break; |
| case chQuestion: |
| nextState = REGX_T_QUESTION; |
| break; |
| case chCloseParen: |
| nextState = REGX_T_RPAREN; |
| break; |
| case chPeriod: |
| nextState = REGX_T_DOT; |
| break; |
| case chOpenSquare: |
| nextState = REGX_T_LBRACKET; |
| break; |
| case chCaret: |
| nextState = REGX_T_CARET; |
| break; |
| case chDollarSign: |
| nextState = REGX_T_DOLLAR; |
| break; |
| case chOpenParen: |
| nextState = REGX_T_LPAREN; |
| break; |
| case chBackSlash: |
| nextState = REGX_T_BACKSOLIDUS; |
| if (fOffset >= fStringLen) { |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Next1, fMemoryManager); |
| } |
| |
| fCharData = fString[fOffset++]; |
| break; |
| default: |
| nextState = REGX_T_CHAR; |
| if (RegxUtil::isHighSurrogate(ch) && fOffset < fStringLen) { |
| |
| XMLCh lowCh = fString[fOffset]; |
| if (RegxUtil::isLowSurrogate(lowCh)) { |
| fCharData = RegxUtil::composeFromSurrogate(ch, lowCh); |
| fOffset++; |
| } |
| else { |
| throw XMLErrs::Expected2ndSurrogateChar; |
| } |
| } |
| } |
| |
| fState = nextState; |
| } |
| |
| |
| Token* RegxParser::parseRegx(const bool matchingRParen) { |
| |
| Token* tok = parseTerm(matchingRParen); |
| Token* parentTok = 0; |
| |
| while (fState == REGX_T_OR) { |
| |
| processNext(); |
| if (parentTok == 0) { |
| |
| parentTok = fTokenFactory->createUnion(); |
| parentTok->addChild(tok, fTokenFactory); |
| tok = parentTok; |
| } |
| |
| tok->addChild(parseTerm(matchingRParen), fTokenFactory); |
| } |
| |
| return tok; |
| } |
| |
| |
| Token* RegxParser::parseTerm(const bool matchingRParen) { |
| |
| parserState state = fState; |
| |
| if (state == REGX_T_OR || state == REGX_T_EOF |
| || (state == REGX_T_RPAREN && matchingRParen)) { |
| return fTokenFactory->createToken(Token::T_EMPTY); |
| } |
| else { |
| |
| Token* tok = parseFactor(); |
| Token* concatTok = 0; |
| |
| while ((state = fState) != REGX_T_OR && state != REGX_T_EOF |
| && (state != REGX_T_RPAREN || !matchingRParen)) |
| { |
| if (concatTok == 0) { |
| |
| concatTok = fTokenFactory->createUnion(true); |
| concatTok->addChild(tok, fTokenFactory); |
| tok = concatTok; |
| } |
| concatTok->addChild(parseFactor(), fTokenFactory); |
| } |
| |
| return tok; |
| } |
| } |
| |
| |
| Token* RegxParser::processCaret() { |
| |
| processNext(); |
| return fTokenFactory->getLineBegin(); |
| } |
| |
| |
| Token* RegxParser::processDollar() { |
| |
| processNext(); |
| return fTokenFactory->getLineEnd(); |
| } |
| |
| |
| Token* RegxParser::processStar(Token* const tok) { |
| |
| processNext(); |
| |
| if (fState == REGX_T_QUESTION) { |
| processNext(); |
| return fTokenFactory->createClosure(tok, true); |
| } |
| |
| return fTokenFactory->createClosure(tok); |
| } |
| |
| |
| Token* RegxParser::processPlus(Token* const tok) { |
| |
| processNext(); |
| |
| if (fState == REGX_T_QUESTION) { |
| processNext(); |
| return fTokenFactory->createConcat(tok, |
| fTokenFactory->createClosure(tok,true)); |
| } |
| |
| return fTokenFactory->createConcat(tok, |
| fTokenFactory->createClosure(tok)); |
| } |
| |
| |
| Token* RegxParser::processQuestion(Token* const tok) { |
| |
| processNext(); |
| |
| Token* parentTok = fTokenFactory->createUnion(); |
| |
| if (fState == REGX_T_QUESTION) { |
| processNext(); |
| parentTok->addChild(fTokenFactory->createToken(Token::T_EMPTY), fTokenFactory); |
| parentTok->addChild(tok, fTokenFactory); |
| } |
| else { |
| parentTok->addChild(tok, fTokenFactory); |
| parentTok->addChild(fTokenFactory->createToken(Token::T_EMPTY), fTokenFactory); |
| } |
| |
| return parentTok; |
| } |
| |
| |
| Token* RegxParser::processParen() { |
| |
| processNext(); |
| int num = fNoGroups++; |
| Token* tok = fTokenFactory->createParenthesis(parseRegx(true),num); |
| |
| if (fState != REGX_T_RPAREN) |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Factor1, fMemoryManager); |
| |
| processNext(); |
| return tok; |
| } |
| |
| |
| Token* RegxParser::processBackReference() { |
| |
| XMLSize_t position = fOffset - 2; |
| |
| // Handle multi digit back references |
| int refNo = fCharData - chDigit_0; |
| while(true) { |
| processNext(); |
| if(fState != REGX_T_CHAR || fCharData < chDigit_0 || fCharData > chDigit_9) |
| break; |
| |
| int nextRefNo = (refNo * 10) + fCharData - chDigit_0; |
| if(nextRefNo >= fNoGroups) |
| break; |
| |
| refNo = nextRefNo; |
| } |
| |
| Token* tok = fTokenFactory->createBackReference(refNo); |
| |
| fHasBackReferences = true; |
| if (fReferences == 0) { |
| fReferences = new (fMemoryManager) RefVectorOf<ReferencePosition>(8, true, fMemoryManager); |
| } |
| |
| fReferences->addElement(new (fMemoryManager) ReferencePosition(refNo, position)); |
| return tok; |
| } |
| |
| |
| Token* RegxParser::parseFactor() { |
| |
| Token* tok = parseAtom(); |
| |
| switch(fState) { |
| |
| case REGX_T_STAR: |
| return processStar(tok); |
| case REGX_T_PLUS: |
| return processPlus(tok); |
| case REGX_T_QUESTION: |
| return processQuestion(tok); |
| case REGX_T_CHAR: |
| if (fCharData == chOpenCurly && fOffset < fStringLen) { |
| |
| int min = 0; |
| int max = -1; |
| XMLInt32 ch = fString[fOffset++]; |
| |
| if (ch >= chDigit_0 && ch <= chDigit_9) { |
| |
| min = ch - chDigit_0; |
| while (fOffset < fStringLen |
| && (ch = fString[fOffset++]) >= chDigit_0 |
| && ch <= chDigit_9) { |
| |
| min = min*10 + ch - chDigit_0; |
| } |
| |
| if (min < 0) |
| ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier5, fString, fMemoryManager); |
| } |
| else { |
| ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier1, fString, fMemoryManager); |
| } |
| |
| max = min; |
| |
| if (ch == chComma) { |
| |
| if (fOffset >= fStringLen) { |
| ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier3, fString, fMemoryManager); |
| } |
| else if ((ch = fString[fOffset++]) >= chDigit_0 && ch <= chDigit_9) { |
| |
| max = ch - chDigit_0; |
| while (fOffset < fStringLen |
| && (ch = fString[fOffset++]) >= chDigit_0 |
| && ch <= chDigit_9) { |
| |
| max = max*10 + ch - chDigit_0; |
| } |
| |
| if (max < 0) |
| ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier5, fString, fMemoryManager); |
| else if (min > max) |
| ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier4, fString, fMemoryManager); |
| } |
| else { |
| max = -1; |
| } |
| } |
| |
| if (ch != chCloseCurly) { |
| ThrowXMLwithMemMgr1(ParseException, XMLExcepts::Parser_Quantifier2, fString, fMemoryManager); |
| } |
| |
| if (checkQuestion(fOffset)) { |
| |
| tok = fTokenFactory->createClosure(tok, true); |
| fOffset++; |
| } |
| else { |
| tok = fTokenFactory->createClosure(tok); |
| } |
| |
| tok->setMin(min); |
| tok->setMax(max); |
| processNext(); |
| } |
| break; |
| default: |
| break; |
| } |
| |
| return tok; |
| } |
| |
| |
| Token* RegxParser::parseAtom() { |
| |
| Token* tok = 0; |
| |
| switch(fState) { |
| |
| case REGX_T_LPAREN: |
| return processParen(); |
| case REGX_T_DOT: |
| processNext(); |
| tok = fTokenFactory->getDot(); |
| break; |
| case REGX_T_CARET: |
| return processCaret(); |
| case REGX_T_DOLLAR: |
| return processDollar(); |
| case REGX_T_LBRACKET: |
| return parseCharacterClass(true); |
| case REGX_T_BACKSOLIDUS: |
| switch(fCharData) { |
| |
| case chLatin_d: |
| case chLatin_D: |
| case chLatin_w: |
| case chLatin_W: |
| case chLatin_s: |
| case chLatin_S: |
| case chLatin_c: |
| case chLatin_C: |
| case chLatin_i: |
| case chLatin_I: |
| tok = getTokenForShorthand(fCharData); |
| processNext(); |
| return tok; |
| case chDigit_0: |
| case chDigit_1: |
| case chDigit_2: |
| case chDigit_3: |
| case chDigit_4: |
| case chDigit_5: |
| case chDigit_6: |
| case chDigit_7: |
| case chDigit_8: |
| case chDigit_9: |
| return processBackReference(); |
| case chLatin_p: |
| case chLatin_P: |
| { |
| tok = processBacksolidus_pP(fCharData); |
| if (tok == 0) { |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom5, fMemoryManager); |
| } |
| } |
| break; |
| default: |
| { |
| XMLInt32 ch = decodeEscaped(); |
| if (ch < 0x10000) { |
| tok = fTokenFactory->createChar(ch); |
| } |
| else { |
| |
| XMLCh* surrogateStr = RegxUtil::decomposeToSurrogates(ch, fMemoryManager); |
| ArrayJanitor<XMLCh> janSurrogate(surrogateStr, fMemoryManager); |
| tok = fTokenFactory->createString(surrogateStr); |
| } |
| } |
| break; |
| } // end switch |
| |
| processNext(); |
| break; |
| case REGX_T_CHAR: |
| if (fCharData == chOpenCurly |
| || fCharData == chCloseCurly |
| || fCharData == chCloseSquare) |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom4, fMemoryManager); |
| |
| tok = fTokenFactory->createChar(fCharData); |
| processNext(); |
| break; |
| default: |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom4, fMemoryManager); |
| } //end switch |
| |
| return tok; |
| } |
| |
| |
| RangeToken* RegxParser::processBacksolidus_pP(const XMLInt32 ch) { |
| |
| processNext(); |
| |
| if (fState != REGX_T_CHAR || fCharData != chOpenCurly) |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom2, fMemoryManager); |
| |
| XMLSize_t nameStart = fOffset; |
| int nameEnd = XMLString::indexOf(fString,chCloseCurly,nameStart, fMemoryManager); |
| |
| if (nameEnd < 0) |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom3, fMemoryManager); |
| |
| fOffset = nameEnd + 1; |
| XMLCh* rangeName = (XMLCh*) fMemoryManager->allocate |
| ( |
| (nameEnd - nameStart + 1) * sizeof(XMLCh) |
| );//new XMLCh[(nameEnd - nameStart) + 1]; |
| ArrayJanitor<XMLCh> janRangeName(rangeName, fMemoryManager); |
| XMLString::subString(rangeName, fString, nameStart, nameEnd, fMemoryManager); |
| |
| return fTokenFactory->getRange(rangeName, !(ch == chLatin_p)); |
| } |
| |
| RangeToken* RegxParser::parseCharacterClass(const bool useNRange) { |
| |
| setParseContext(regexParserStateInBrackets); |
| processNext(); |
| |
| RangeToken* tok = 0; |
| bool isNRange = false; |
| |
| if (getState() == REGX_T_CHAR && getCharData() == chCaret) { |
| isNRange = true; |
| processNext(); |
| } |
| tok = fTokenFactory->createRange(); |
| |
| parserState type; |
| bool firstLoop = true; |
| bool wasDecoded; |
| |
| while ( (type = getState()) != REGX_T_EOF) { |
| |
| wasDecoded = false; |
| |
| // single range | from-to-range | subtraction |
| if (type == REGX_T_CHAR && getCharData() == chCloseSquare && !firstLoop) |
| break; |
| |
| XMLInt32 ch = getCharData(); |
| bool end = false; |
| |
| if (type == REGX_T_BACKSOLIDUS) { |
| |
| switch(ch) { |
| case chLatin_d: |
| case chLatin_D: |
| case chLatin_w: |
| case chLatin_W: |
| case chLatin_s: |
| case chLatin_S: |
| case chLatin_i: |
| case chLatin_I: |
| case chLatin_c: |
| case chLatin_C: |
| { |
| tok->mergeRanges(getTokenForShorthand(ch)); |
| end = true; |
| } |
| break; |
| case chLatin_p: |
| case chLatin_P: |
| { |
| RangeToken* tok2 = processBacksolidus_pP(ch); |
| |
| if (tok2 == 0) { |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom5, getMemoryManager()); |
| } |
| |
| tok->mergeRanges(tok2); |
| end = true; |
| } |
| break; |
| case chDash: |
| wasDecoded = true; |
| // fall thru to default. |
| default: |
| ch = decodeEscaped(); |
| } |
| } // end if REGX_T_BACKSOLIDUS |
| else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION && !firstLoop) { |
| |
| if (isNRange) |
| { |
| tok = RangeToken::complementRanges(tok, fTokenFactory, fMemoryManager); |
| isNRange=false; |
| } |
| RangeToken* rangeTok = parseCharacterClass(false); |
| tok->subtractRanges(rangeTok); |
| |
| if (getState() != REGX_T_CHAR || getCharData() != chCloseSquare) { |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC5, getMemoryManager()); |
| } |
| break; |
| } // end if REGX_T_XMLSCHEMA... |
| |
| processNext(); |
| |
| if (!end) { |
| |
| if (type == REGX_T_CHAR |
| && (ch == chOpenSquare |
| || ch == chCloseSquare |
| || (ch == chDash && getCharData() == chCloseSquare && firstLoop))) { |
| // if regex = [-] then invalid... |
| // '[', ']', '-' not allowed and should be escaped |
| XMLCh chStr[] = { (XMLCh)ch, chNull }; |
| ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager()); |
| } |
| if (ch == chDash && getCharData() == chDash && getState() != REGX_T_BACKSOLIDUS && !wasDecoded) { |
| XMLCh chStr[] = { (XMLCh)ch, chNull }; |
| ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager()); |
| } |
| |
| if (getState() != REGX_T_CHAR || getCharData() != chDash) { |
| tok->addRange(ch, ch); |
| } |
| else { |
| |
| processNext(); |
| if ((type = getState()) == REGX_T_EOF) |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager()); |
| |
| if (type == REGX_T_CHAR && getCharData() == chCloseSquare) { |
| tok->addRange(ch, ch); |
| tok->addRange(chDash, chDash); |
| } |
| else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) { |
| |
| static const XMLCh dashStr[] = { chDash, chNull}; |
| ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, dashStr, dashStr, getMemoryManager()); |
| } |
| else { |
| |
| XMLInt32 rangeEnd = getCharData(); |
| XMLCh rangeEndStr[] = { (XMLCh)rangeEnd, chNull }; |
| |
| if (type == REGX_T_CHAR) { |
| |
| if (rangeEnd == chOpenSquare |
| || rangeEnd == chCloseSquare |
| || rangeEnd == chDash) |
| // '[', ']', '-' not allowed and should be escaped |
| ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr, getMemoryManager()); |
| } |
| else if (type == REGX_T_BACKSOLIDUS) { |
| rangeEnd = decodeEscaped(); |
| } |
| |
| processNext(); |
| |
| if (ch > rangeEnd) { |
| XMLCh chStr[] = { (XMLCh)ch, chNull }; |
| ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_Ope3, rangeEndStr, chStr, getMemoryManager()); |
| } |
| |
| tok->addRange(ch, rangeEnd); |
| } |
| } |
| } |
| firstLoop = false; |
| } |
| |
| if (getState() == REGX_T_EOF) |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager()); |
| |
| if (isNRange) |
| { |
| if(useNRange) |
| tok->setTokenType(Token::T_NRANGE); |
| else |
| tok = RangeToken::complementRanges(tok, fTokenFactory, fMemoryManager); |
| } |
| |
| tok->sortRanges(); |
| tok->compactRanges(); |
| |
| // If the case-insensitive option is enabled, we need to |
| // have the new RangeToken instance build its internal |
| // case-insensitive RangeToken. |
| if (RegularExpression::isSet(fOptions, RegularExpression::IGNORE_CASE)) |
| { |
| tok->getCaseInsensitiveToken(fTokenFactory); |
| } |
| |
| setParseContext(regexParserStateNormal); |
| processNext(); |
| |
| return tok; |
| } |
| |
| |
| RangeToken* RegxParser::getTokenForShorthand(const XMLInt32 ch) { |
| |
| switch(ch) { |
| case chLatin_d: |
| return fTokenFactory->getRange(fgUniDecimalDigit); |
| //return fTokenFactory->getRange(fgXMLDigit); |
| case chLatin_D: |
| return fTokenFactory->getRange(fgUniDecimalDigit, true); |
| //return fTokenFactory->getRange(fgXMLDigit, true); |
| case chLatin_w: |
| return fTokenFactory->getRange(fgXMLWord); |
| case chLatin_W: |
| return fTokenFactory->getRange(fgXMLWord, true); |
| case chLatin_s: |
| return fTokenFactory->getRange(fgXMLSpace); |
| case chLatin_S: |
| return fTokenFactory->getRange(fgXMLSpace, true); |
| case chLatin_c: |
| return fTokenFactory->getRange(fgXMLNameChar); |
| case chLatin_C: |
| return fTokenFactory->getRange(fgXMLNameChar, true); |
| case chLatin_i: |
| return fTokenFactory->getRange(fgXMLInitialNameChar); |
| case chLatin_I: |
| return fTokenFactory->getRange(fgXMLInitialNameChar, true); |
| // default: |
| // ThrowXMLwithMemMgr(RuntimeException, "Invalid shorthand {0}", chAsString) |
| } |
| |
| return 0; |
| } |
| |
| |
| XMLInt32 RegxParser::decodeEscaped() { |
| |
| if (fState != REGX_T_BACKSOLIDUS) |
| ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Next1, getMemoryManager()); |
| |
| XMLInt32 ch = fCharData; |
| |
| switch (ch) { |
| case chLatin_n: |
| ch = chLF; |
| break; |
| case chLatin_r: |
| ch = chCR; |
| break; |
| case chLatin_t: |
| ch = chHTab; |
| break; |
| case chBackSlash: |
| case chPipe: |
| case chPeriod: |
| case chCaret: |
| case chDash: |
| case chQuestion: |
| case chAsterisk: |
| case chPlus: |
| case chOpenCurly: |
| case chCloseCurly: |
| case chOpenParen: |
| case chCloseParen: |
| case chOpenSquare: |
| case chCloseSquare: |
| case chDollarSign: |
| break; |
| default: |
| { |
| XMLCh chString[] = {chBackSlash, (XMLCh)ch, chNull}; |
| ThrowXMLwithMemMgr1(ParseException,XMLExcepts::Parser_Process2, chString, getMemoryManager()); |
| } |
| } |
| |
| return ch; |
| } |
| |
| // --------------------------------------------------------------------------- |
| // RegxParser: Helper Methods |
| // --------------------------------------------------------------------------- |
| bool RegxParser::checkQuestion(const XMLSize_t off) { |
| |
| return ((off < fStringLen) && fString[off] == chQuestion); |
| } |
| |
| XERCES_CPP_NAMESPACE_END |
| |
| /** |
| * End file RegxParser.cpp |
| */ |