| /******************************************************************************* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| ******************************************************************************/ |
| package org.apache.olingo.odata2.core.uri.expression; |
| |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import org.apache.olingo.odata2.api.edm.EdmLiteral; |
| import org.apache.olingo.odata2.api.edm.EdmLiteralException; |
| import org.apache.olingo.odata2.api.edm.EdmSimpleTypeFacade; |
| import org.apache.olingo.odata2.api.edm.EdmSimpleTypeKind; |
| import org.apache.olingo.odata2.api.uri.expression.ExpressionParserException; |
| import org.apache.olingo.odata2.core.edm.EdmSimpleTypeFacadeImpl; |
| |
| /** |
| * Expression tokenizer |
| * |
| */ |
| public class Tokenizer { |
| |
| private static final Pattern OTHER_LIT = Pattern.compile("(?:\\p{L}|\\p{Digit}|[-._~%!$&*+;:@])+"); |
| private static final Pattern FUNK = |
| Pattern |
| .compile("^(startswith|endswith|substring|substring|substringof|indexof|replace|tolower|toupper" + |
| "|trim|concat|length|year|mounth|day|hour|minute|second|round|ceiling|floor)( *)\\("); |
| private static final Pattern AND_SUB1 = Pattern.compile("^(add|sub|mul|div|mod|not) "); |
| private static final Pattern AND_SUB = Pattern.compile("^(and|or|eq|ne|lt|gt|le|ge) "); |
| private static final Pattern prefix = Pattern.compile("^(X|binary|guid|datetime|datetimeoffset|time)'"); |
| private boolean flagIncludeWhitespace = false; |
| private EdmSimpleTypeFacade typeDectector = null; |
| |
| int curPosition; |
| final String expression; |
| final int expressionLength; |
| TokenList tokens; |
| |
| public Tokenizer(final String expression) { |
| typeDectector = new EdmSimpleTypeFacadeImpl(); |
| this.expression = expression; |
| expressionLength = expression.length(); |
| tokens = new TokenList(); |
| } |
| |
| /** |
| * Inform the Tokenizer whether extra tokens for whitespace characters should be added to the token list or not. |
| * @param flagIncludeWhitespace True -> Whitespace token will be added to token list; False otherwise |
| * @return this |
| */ |
| public Tokenizer setFlagWhiteSpace(final Boolean flagIncludeWhitespace) { |
| this.flagIncludeWhitespace = flagIncludeWhitespace; |
| return this; |
| } |
| |
| /** |
| * Tokenizes an expression as defined per OData specification |
| * @return Token list |
| */ |
| public TokenList tokenize() throws TokenizerException, ExpressionParserException { |
| curPosition = 0; |
| int oldPosition; |
| char curCharacter; |
| String token = ""; |
| |
| while (curPosition < expressionLength) { |
| oldPosition = curPosition; |
| |
| curCharacter = expression.charAt(curPosition); |
| switch (curCharacter) { |
| case ' ': |
| // count whitespace and move pointer to next non-whitespace char |
| eatWhiteSpaces(curPosition, curCharacter); |
| break; |
| |
| case '(': |
| tokens.appendToken(curPosition, TokenKind.OPENPAREN, curCharacter); |
| curPosition = curPosition + 1; |
| |
| break; |
| |
| case ')': |
| tokens.appendToken(curPosition, TokenKind.CLOSEPAREN, curCharacter); |
| curPosition = curPosition + 1; |
| break; |
| |
| case '\'': |
| token = ""; |
| readLiteral(curCharacter); |
| |
| break; |
| |
| case ',': |
| tokens.appendToken(oldPosition, TokenKind.COMMA, curCharacter); |
| curPosition = curPosition + 1; |
| break; |
| |
| case '=': |
| case '/': |
| case '?': |
| case '.': |
| case '*': |
| curPosition = curPosition + 1; |
| tokens.appendToken(oldPosition, TokenKind.SYMBOL, curCharacter); |
| break; |
| |
| default: |
| String rem_expr = expression.substring(curPosition); // remaining expression |
| |
| boolean isBinary = checkForBinary(oldPosition, rem_expr); |
| if (isBinary) { |
| break; |
| } |
| |
| // check for prefixes like X, binary, guid, datetime |
| boolean isPrefix = checkForPrefix(rem_expr); |
| if (isPrefix) { |
| break; |
| } |
| |
| // check for math |
| boolean isMath = checkForMath(oldPosition, rem_expr); |
| if (isMath) { |
| break; |
| } |
| |
| // check for function |
| boolean isFunction = checkForMethod(oldPosition, rem_expr); |
| if (isFunction) { |
| break; |
| } |
| |
| boolean isBoolean = checkForBoolean(oldPosition, rem_expr); |
| if (isBoolean) { |
| break; |
| } |
| |
| boolean isLiteral = checkForLiteral(oldPosition, curCharacter, rem_expr); |
| if (isLiteral) { |
| break; |
| } |
| |
| token = new Character(curCharacter).toString(); |
| throw TokenizerException.createUNKNOWN_CHARACTER(oldPosition, token, expression); |
| } |
| } |
| return tokens; |
| } |
| |
| private boolean checkForLiteral(final int oldPosition, final char curCharacter, final String rem_expr) { |
| final Matcher matcher = OTHER_LIT.matcher(rem_expr); |
| boolean isLiteral = false; |
| if (matcher.lookingAt()) { |
| String token = matcher.group(); |
| try { |
| EdmLiteral edmLiteral = typeDectector.parseUriLiteral(token); |
| curPosition = curPosition + token.length(); |
| // It is a simple type. |
| tokens.appendEdmTypedToken(oldPosition, TokenKind.SIMPLE_TYPE, token, edmLiteral); |
| isLiteral = true; |
| } catch (EdmLiteralException e) { |
| // We treat it as normal untyped literal. |
| |
| // The '-' is checked here (and not in the switch statement) because it may be |
| // part of a negative number. |
| if (curCharacter == '-') { |
| curPosition = curPosition + 1; |
| tokens.appendToken(oldPosition, TokenKind.SYMBOL, curCharacter); |
| isLiteral = true; |
| } else { |
| curPosition = curPosition + token.length(); |
| tokens.appendToken(oldPosition, TokenKind.LITERAL, token); |
| isLiteral = true; |
| } |
| } |
| } |
| return isLiteral; |
| } |
| |
| private boolean checkForBoolean(final int oldPosition, final String rem_expr) { |
| boolean isBoolean = false; |
| if ("true".equals(rem_expr) || "false".equals(rem_expr)) { |
| curPosition = curPosition + rem_expr.length(); |
| tokens.appendEdmTypedToken(oldPosition, TokenKind.SIMPLE_TYPE, rem_expr, new EdmLiteral(EdmSimpleTypeFacadeImpl |
| .getEdmSimpleType(EdmSimpleTypeKind.Boolean), rem_expr)); |
| isBoolean = true; |
| } |
| return isBoolean; |
| } |
| |
| private void eatWhiteSpaces(final int oldPosition, char curCharacter) { |
| int lv_token_len; |
| String expression_sub; |
| while ((curCharacter == ' ') && (curPosition < expressionLength)) { |
| curPosition = curPosition + 1; |
| if (curPosition < expressionLength) { |
| curCharacter = expression.charAt(curPosition); |
| } |
| } |
| |
| lv_token_len = curPosition - oldPosition; |
| |
| if (flagIncludeWhitespace == true) { |
| expression_sub = expression.substring(oldPosition, oldPosition + lv_token_len); |
| tokens.appendEdmTypedToken(oldPosition, TokenKind.WHITESPACE, expression_sub, null); |
| } |
| } |
| |
| private boolean checkForMethod(final int oldPosition, final String rem_expr) { |
| boolean isMethod = false; |
| Matcher matcher = FUNK.matcher(rem_expr); |
| if (matcher.find()) { |
| String token = matcher.group(1); |
| curPosition = curPosition + token.length(); |
| tokens.appendToken(oldPosition, TokenKind.LITERAL, token); |
| isMethod = true; |
| } |
| return isMethod; |
| } |
| |
| private boolean checkForMath(final int oldPosition, final String rem_expr) { |
| boolean isMath = false; |
| Matcher matcher1 = AND_SUB1.matcher(rem_expr); |
| if (matcher1.find()) { |
| String token = matcher1.group(1); |
| curPosition = curPosition + token.length(); |
| tokens.appendToken(oldPosition, TokenKind.LITERAL, token); |
| isMath = true; |
| } |
| return isMath; |
| } |
| |
| private boolean checkForBinary(final int oldPosition, final String rem_expr) { |
| boolean isBinary = false; |
| Matcher matcher1 = AND_SUB.matcher(rem_expr); |
| if (matcher1.find()) { |
| String token = matcher1.group(1); |
| curPosition = curPosition + token.length(); |
| tokens.appendToken(oldPosition, TokenKind.LITERAL, token); |
| isBinary = true; |
| } |
| return isBinary; |
| } |
| |
| private boolean checkForPrefix(final String rem_expr) throws ExpressionParserException, TokenizerException { |
| boolean isPrefix = false; |
| Matcher matcher = prefix.matcher(rem_expr); |
| String token = ""; |
| char curCharacter; |
| |
| if (matcher.find()) { |
| token = matcher.group(1); |
| curPosition = curPosition + token.length(); |
| curCharacter = expression.charAt(curPosition); // "should be ' |
| readLiteral(curCharacter, token); |
| isPrefix = true; |
| } |
| return isPrefix; |
| } |
| |
| private void readLiteral(final char curCharacter) throws ExpressionParserException, TokenizerException { |
| readLiteral(curCharacter, ""); |
| } |
| |
| /** |
| * Read up to single ' and move pointer to the following char and tries a type detection |
| * @param curCharacter |
| * @param token |
| * @throws ExpressionParserException |
| * @throws TokenizerException |
| */ |
| private void readLiteral(char curCharacter, String token) throws ExpressionParserException, TokenizerException { |
| int offsetPos = -token.length(); |
| int oldPosition = curPosition; |
| token = token + Character.toString(curCharacter); |
| curPosition = curPosition + 1; |
| |
| boolean wasApostroph = false; // leading ' does not count |
| while (curPosition < expressionLength) { |
| curCharacter = expression.charAt(curPosition); |
| |
| if (curCharacter != '\'') { |
| if (wasApostroph == true) { |
| break; |
| } |
| |
| token = token + curCharacter; |
| wasApostroph = false; |
| } else { |
| if (wasApostroph) { |
| wasApostroph = false; // a double ' is a normal character ' |
| } else { |
| wasApostroph = true; |
| } |
| token = token + curCharacter; |
| } |
| curPosition = curPosition + 1; |
| } |
| |
| if (!wasApostroph) { |
| // Exception tested within TestPMparseFilterString |
| throw FilterParserExceptionImpl.createTOKEN_UNDETERMINATED_STRING(oldPosition, expression); |
| } |
| |
| try { |
| EdmLiteral edmLiteral = typeDectector.parseUriLiteral(token); |
| tokens.appendEdmTypedToken(oldPosition + offsetPos, TokenKind.SIMPLE_TYPE, token, edmLiteral); |
| } catch (EdmLiteralException ex) { |
| throw TokenizerException.createTYPEDECTECTION_FAILED_ON_STRING(ex, oldPosition, token); |
| } |
| } |
| } |