blob: 0cae7d42912077401e714a3e5f4cb5e44fc6f0ab [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.cli.tsql;
import org.apache.tajo.cli.tsql.ParsedResult.StatementType;
import java.util.ArrayList;
import java.util.List;
/**
* This is a parser used in tsql to parse multiple SQL lines into SQL statements.
* It helps tsql recognizes the termination of each SQL statement and quotation mark (') while
* parses multiple separate lines.
*/
public class SimpleParser {
public static enum ParsingState {
TOK_START, // Start State
META, // Meta Command
STATEMENT, // Statement
WITHIN_QUOTE, // Within Quote
INVALID, // Invalid Statement
STATEMENT_EOS, // End State (End of Statement)
META_EOS // End State (End of Statement)
}
ParsingState state = START_STATE;
int lineNum;
/**
* It will be used to store a query statement into Jline history.
* the query statement for history does not include unnecessary white spaces and new line.
*/
private StringBuilder historyAppender = new StringBuilder();
/**
* It will be used to submit a query statement to the TajoMaster. It just contains a raw query statement string.
*/
private StringBuilder rawAppender = new StringBuilder();
public static final ParsingState START_STATE = ParsingState.TOK_START;
/**
* <h2>State Machine</h2>
* All whitespace are ignored in all cases except for
*
* <pre>
* (start) TOK_START --> META ---------------------> META_EOS
* |
* |
* |
* |-----------> STATEMENT ----------> STMT_EOS
* \ ^
* \ /
* \-> WITHIN_QUOTE
* \ ^
* \---/
* </pre>
*/
public static List<ParsedResult> parseScript(String str) throws InvalidStatementException {
SimpleParser parser = new SimpleParser();
List<ParsedResult> parsedResults = new ArrayList<ParsedResult>();
parsedResults.addAll(parser.parseLines(str));
parsedResults.addAll(parser.EOF());
return parsedResults;
}
public List<ParsedResult> parseLines(String str) throws InvalidStatementException {
List<ParsedResult> statements = new ArrayList<ParsedResult>();
int lineStartIdx;
int idx = 0;
char [] chars = str.toCharArray();
// if parsing continues, it means that the previous line is broken by '\n'.
// So, we should add new line to rawAppender.
int appenderLen = rawAppender.length();
if (appenderLen != 0
&& rawAppender.charAt(appenderLen - 1) != '\n'
&& isStatementContinue()) {
rawAppender.append("\n");
}
while(idx < str.length()) {
// initialization for new statement
if (state == ParsingState.TOK_START) {
lineNum = 0;
// ignore all whitespace before start
if (Character.isWhitespace(chars[idx])) {
idx++;
continue;
}
}
////////////////////////////
// TOK_START --> META
////////////////////////////
lineStartIdx = idx;
if (state == ParsingState.TOK_START && chars[idx] == '\\') {
state = ParsingState.META;
////////////////////////////
// META --> TOK_EOS
////////////////////////////
while (state != ParsingState.META_EOS && idx < chars.length) {
char character = chars[idx++];
if (isEndOfMeta(character)) {
state = ParsingState.META_EOS;
} else if (Character.isWhitespace(character)) {
// skip
}
}
if (state == ParsingState.META_EOS) {
historyAppender.append(str.subSequence(lineStartIdx, idx - 1).toString());
appendToRawStatement(str.subSequence(lineStartIdx, idx - 1).toString(), true);
} else {
historyAppender.append(str.subSequence(lineStartIdx, idx).toString());
appendToRawStatement(str.subSequence(lineStartIdx, idx).toString(), true);
}
} else if (isInlineCommentStart(chars, idx)) {
idx = consumeInlineComment(chars, idx);
appendToRawStatement(str.subSequence(lineStartIdx, idx).toString(), true);
/////////////////////////////////
// TOK_START -> STATEMENT
// or TOK_STATEMENT -> STATEMENT
////////////////////////////////
} else if (isStatementContinue() || isStatementStart(chars[idx])) {
if (!isStatementContinue()) { // TOK_START -> STATEMENT
state = ParsingState.STATEMENT;
rawAppender.append("\n");
}
while (!isTerminateState(state) && idx < chars.length) {
char character = chars[idx++];
///////////////////////////////////////////////////////
// in-statement loop BEGIN
///////////////////////////////////////////////////////
if (isEndOfStatement(character)) {
state = ParsingState.STATEMENT_EOS;
} else if (state == ParsingState.STATEMENT && character == '\n') {
appendToBothStatements(chars, lineStartIdx, idx, 1); // omit new line chacter '\n' from history statement
lineStartIdx = idx;
} else if (state == ParsingState.STATEMENT && character == '\'') { // TOK_STATEMENT -> WITHIN_QUOTE
state = ParsingState.WITHIN_QUOTE;
if (idx < chars.length) {
character = chars[idx++];
} else {
continue;
}
// idx points the characters followed by the current character. So, we should use 'idx - 1'
// in order to point the current character.
} else if (state == ParsingState.STATEMENT && idx < chars.length && isInlineCommentStart(chars, idx - 1)) {
idx++;
appendToBothStatements(chars, lineStartIdx, idx, 2); // omit two dash characters '--' from history statement
int commentStartIdx = idx;
idx = consumeInlineComment(chars, idx);
appendToRawStatement(str.subSequence(commentStartIdx, idx).toString(), false);
lineStartIdx = idx;
}
///////////////////////////////////////////////////////
// in-statement loop END
///////////////////////////////////////////////////////
if (state == ParsingState.WITHIN_QUOTE) {
while(idx < chars.length) {
///////////////////////////////
// WITHIN_QUOTE --> STATEMENT
///////////////////////////////
if (character == '\'') {
state = ParsingState.STATEMENT;
break;
}
character = chars[idx++];
}
if (state == ParsingState.WITHIN_QUOTE && character == '\'') {
state = ParsingState.STATEMENT;
}
}
}
// After all characters are consumed
if (state == ParsingState.STATEMENT_EOS) { // If one query statement is terminated
appendToBothStatements(chars, lineStartIdx, idx - 1); // skip semicolon (;)
} else {
appendToBothStatements(chars, lineStartIdx, idx);
// if it is not within quote and there is no space between lines, adds a space.
if (state == ParsingState.STATEMENT && (historyAppender.charAt(historyAppender.length() - 1) != ' ')) {
historyAppender.append(" ");
rawAppender.append("\n");
}
}
} else { // skip unknown character
idx++;
}
lineNum++;
statements.addAll(doProcessEndOfStatement(state == ParsingState.META));
}
return statements;
}
/**
* Append the range of characters into a given StringBuilder instance.
*
* @param chars Characters
* @param fromIdx start character index
* @param toIdx end character index
*/
private void appendToStatement(StringBuilder builder, char[] chars, int fromIdx, int toIdx) {
builder.append(chars, fromIdx, toIdx - fromIdx);
}
/**
* Append the range of characters into both history and raw appenders. It omits the number of characters specified by
* <code>omitCharNums</code>.
*
*
* @param chars Characters
* @param fromIdx start character index
* @param toIdx end character index
* @param omitCharNums how many characters will be omitted from history statement
*/
private void appendToBothStatements(char[] chars, int fromIdx, int toIdx, int omitCharNums) {
appendToStatement(historyAppender, chars, fromIdx, toIdx - omitCharNums);
if (historyAppender.charAt(historyAppender.length() - 1) != ' ') {
historyAppender.append(" ");
}
appendToStatement(rawAppender, chars, fromIdx, toIdx);
}
/**
* Append the range of characters into both history and raw appenders.
*
*
* @param chars Characters
* @param fromIdx start character index
* @param toIdx end character index
*/
private void appendToBothStatements(char[] chars, int fromIdx, int toIdx) {
historyAppender.append(chars, fromIdx, toIdx - fromIdx);
rawAppender.append(chars, fromIdx, toIdx - fromIdx);
}
private int consumeInlineComment(char [] chars, int currentIdx) {
currentIdx++;
while (currentIdx < chars.length && !isNewLine(chars[currentIdx])) {
currentIdx++;
}
return currentIdx;
}
private void appendToRawStatement(String str, boolean addLF) {
if (!str.isEmpty() && !"\n".equals(str) &&
rawAppender.length() > 0 && addLF && rawAppender.charAt(rawAppender.length() - 1) != '\n') {
rawAppender.append("\n");
rawAppender.append(str);
} else {
rawAppender.append(str);
}
}
private static boolean isEndOfMeta(char character) {
return character == ';' || character == '\n';
}
private static boolean isEndOfStatement(char character) {
return character == ';';
}
/**
* It checks if inline comment '--' begins.
*/
private boolean isInlineCommentStart(char[] chars, int idx) {
if (idx >= chars.length - 1) {
return false;
}
return (state == ParsingState.STATEMENT || state == ParsingState.TOK_START) &&
(chars[idx] == '-' && chars[idx + 1] == '-');
}
private boolean isNewLine(char character) {
return character == '\n';
}
private boolean isStatementStart(char character) {
return state == ParsingState.TOK_START && (Character.isLetterOrDigit(character) || character == '(');
}
private boolean isStatementContinue() {
return state == ParsingState.WITHIN_QUOTE || state == ParsingState.STATEMENT;
}
/**
* process all parsed statements so far and return a list of parsed results.
*
* @param endOfFile TRUE if the end of file.
* @return the list of parsed results, each of result contains one query statement or meta command.
* @throws InvalidStatementException
*/
private List<ParsedResult> doProcessEndOfStatement(boolean endOfFile) throws InvalidStatementException {
List<ParsedResult> parsedResults = new ArrayList<ParsedResult>();
String errorMessage = "";
if (endOfFile) {
if (state == ParsingState.META) {
state = ParsingState.META_EOS;
} else if (state == ParsingState.STATEMENT) {
state = ParsingState.STATEMENT_EOS;
} else if (state == ParsingState.WITHIN_QUOTE) {
state = ParsingState.INVALID;
errorMessage = "unterminated quoted string at LINE " + lineNum;
}
}
if (isTerminateState(state)) {
String historyStatement = historyAppender.toString();
String rawStatement = rawAppender.toString();
if (state == ParsingState.META_EOS) {
parsedResults.add(new ParsedResult(StatementType.META, rawStatement, historyStatement));
state = ParsingState.TOK_START;
} else if (state == ParsingState.STATEMENT_EOS) {
parsedResults.add(new ParsedResult(StatementType.STATEMENT, rawStatement, historyStatement));
} else {
throw new InvalidStatementException("ERROR: " + errorMessage);
}
// reset all states
historyAppender.delete(0, historyAppender.length());
rawAppender.delete(0, rawAppender.length());
state = START_STATE;
}
return parsedResults;
}
/**
* It manually triggers the end of file.
*
* @return the list of parsed results, each of result contains one query statement or meta command.
* @throws InvalidStatementException
*/
public List<ParsedResult> EOF() throws InvalidStatementException {
return doProcessEndOfStatement(true);
}
private static boolean isTerminateState(ParsingState state) {
return (state == ParsingState.META_EOS || state == ParsingState.STATEMENT_EOS || state == ParsingState.INVALID);
}
public ParsingState getState() {
return state;
}
public String toString() {
return "[" + state.name() + "]: " + historyAppender.toString();
}
}