blob: c58cd0668a06f11a033d687581ab6fb4ed28eded [file] [log] [blame]
/*
Derby - Class org.apache.derby.impl.tools.ij.StatementFinder
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.apache.derby.impl.tools.ij;
import org.apache.derby.iapi.tools.i18n.LocalizedOutput;
import org.apache.derby.iapi.tools.i18n.LocalizedInput;
import java.io.IOException;
import java.io.Reader;
/**
StatementGrabber looks through an input stream for
the next JSQL statement. A statement is considered to
be any tokens up to the next semicolon or EOF.
<p>
Semicolons inside comments, strings, and delimited identifiers
are not considered to be statement terminators but to be
part of those tokens.
<p>
Comments currently recognized include the SQL comment,
which begins with "--" and ends at the next EOL, and nested
bracketed comments.
<p>
Strings and delimited identifiers are permitted to contain
newlines; the actual IJ or JSQL parsers will report errors when
those cases occur.
<p>
There are no escaped characters, i.e. "\n" is considered to
be two characters, '\' and 'n'.
*/
public class StatementFinder {
private Reader source;
private StringBuffer statement = new StringBuffer();
private int state;
private boolean atEOF = false;
private boolean peekEOF = false;
private char peekChar;
private boolean peeked = false;
private LocalizedOutput promptwriter;
private boolean doPrompt;
private boolean continuedStatement;
// state variables
private static final int IN_STATEMENT = 0;
private static final int IN_STRING = 1;
private static final int IN_SQLCOMMENT = 2;
private static final int END_OF_STATEMENT = 3;
private static final int END_OF_INPUT = 4;
// special state-changing characters
private static final char MINUS = '-';
private static final char SINGLEQUOTE = '\'';
private static final char DOUBLEQUOTE = '\"';
private static final char SEMICOLON = ';';
private static final char NEWLINE = '\n';
private static final char RETURN = '\r';
private static final char SPACE = ' ';
private static final char TAB = '\t';
private static final char FORMFEED = '\f';
private static final char SLASH = '/';
private static final char ASTERISK = '*';
/**
The constructor does not assume the stream is data input
or buffered, so it will wrap it appropriately.
If the StatementFinder's input stream is connected to
System.in, a LocalizedOutput stream may be given to print
line continuation prompts when StatementFinder reads a newline.
@param s the input stream for reading statements from.
@param promptDest LocalizedOutput stream to write line
continuation prompts ("&gt; ") to. If null,
no such prompts will be written.
*/
public StatementFinder(LocalizedInput s, LocalizedOutput promptDest) {
source = s;
if(promptDest != null && s.isStandardInput()) {
promptwriter = promptDest;
doPrompt = true;
} else {
doPrompt = false;
}
}
/**
Reinit is used to redirect the finder to another stream.
The previous stream should not have been in a PEEK state.
If an output stream was given when constructing this
StatementFinder and the input is standard input,
continuation prompting will be enabled.
@param s the input stream for reading statements from.
*/
public void ReInit(LocalizedInput s) {
try {
source.close();
} catch (IOException ioe) {
// just be quiet if it is already gone
}
source = s;
state = IN_STATEMENT;
atEOF = false;
peekEOF = false;
peeked = false;
if(s.isStandardInput() && promptwriter != null) {
doPrompt = true;
} else {
doPrompt = false;
}
}
public void close() throws IOException {
source.close();
}
/**
get the next statement in the input stream. Returns it,
dropping its closing semicolon if it has one. If there is
no next statement, return a null.
@return the next statement in the input stream.
*/
public String nextStatement() {
boolean haveSemi = false;
char nextChar;
// initialize fields for getting the next statement
statement.setLength(0);
if (state == END_OF_INPUT) return null;
state = IN_STATEMENT;
// skip leading whitespace
nextChar = peekChar();
if (peekEOF()) {
state = END_OF_INPUT;
return null;
}
if (whiteSpace(nextChar)) {
while (whiteSpace(peekChar()) && ! peekEOF());
if (peekEOF()) {
state = END_OF_INPUT;
return null;
}
}
while (state != END_OF_STATEMENT && state != END_OF_INPUT) {
// get the next character from the input
nextChar = readChar();
if (atEOF()) {
state = END_OF_INPUT;
break;
}
if (!(nextChar == MINUS))
continuedStatement=true;
switch(nextChar) {
case MINUS:
readSingleLineComment(nextChar);
break;
case SLASH:
readBracketedComment();
break;
case SINGLEQUOTE:
case DOUBLEQUOTE:
readString(nextChar);
break;
case SEMICOLON:
haveSemi = true;
state = END_OF_STATEMENT;
continuedStatement=false;
break;
case NEWLINE:
case RETURN:
if(doPrompt) {
utilMain.doPrompt(false, promptwriter, "");
/* If the next character is a newline as well,
we swallow it to avoid double prompting on
Windows. */
if(nextChar == RETURN && peekChar() == NEWLINE) {
readChar();
}
}
default:
// keep going, just a normal character
break;
}
}
if (haveSemi)
statement.setLength(statement.length()-1);
return statement.toString();
}
/**
Determine if the given character is considered whitespace
@param c the character to consider
@return true if the character is whitespace
*/
private boolean whiteSpace(char c) {
return (c == SPACE ||
c == TAB ||
c == RETURN ||
c == NEWLINE ||
c == FORMFEED);
}
/**
* Advance the source stream to the end of a comment
* if it is on one, assuming the first character of
* a potential bracketed comment has been found.
* If it is not a comment, do not advance the stream.
*/
private void readBracketedComment() {
char nextChar = peekChar();
// if next char is EOF, we are done.
if (peekEOF()) return;
// if nextChar is not an asterisk, then not a comment.
if (nextChar != ASTERISK)
{
continuedStatement = true;
return;
}
// we are really in a comment
readChar(); // grab the asterisk for real.
int nestingLevel = 1;
while (true) {
nextChar = readChar();
if (atEOF()) {
// let the caller process the EOF, don't read it
state = IN_STATEMENT;
return;
}
char peek = peekChar();
if (nextChar == SLASH && peek == ASTERISK) {
readChar();
nestingLevel++;
} else if (nextChar == ASTERISK && peek == SLASH) {
readChar();
nestingLevel--;
if (nestingLevel == 0) {
state = IN_STATEMENT;
return;
}
} else if (nextChar == NEWLINE || nextChar == RETURN) {
if (doPrompt) {
utilMain.doPrompt(false, promptwriter, "");
// If the next character is a NEWLINE, we process
// it as well to account for Windows CRLFs.
if (nextChar == RETURN && peek == NEWLINE) {
readChar();
}
}
}
}
}
/**
Advance the source stream to the end of a comment if it
is on one, assuming the first character of
a potential single line comment has been found.
If it is not a comment, do not advance the stream.
<p>
The form of a single line comment is, in regexp, XX.*$,
where XX is two instances of commentChar.
@param commentChar the character whose duplication signifies
the start of the comment.
*/
private void readSingleLineComment(char commentChar) {
char nextChar;
nextChar = peekChar();
// if next char is EOF, we are done.
if (peekEOF()) return;
// if nextChar is not a minus, it was just a normal minus,
// nothing special to do
if (nextChar != commentChar)
{
continuedStatement=true;
return;
}
// we are really in a comment
readChar(); // grab the minus for real.
state = IN_SQLCOMMENT;
do {
nextChar = peekChar();
if (peekEOF()) {
// let the caller process the EOF, don't read it
state = IN_STATEMENT;
return;
}
switch (nextChar) {
case NEWLINE:
case RETURN:
readChar(); // okay to process the character
state = IN_STATEMENT;
if (doPrompt){
// If we had previously already started a statement,
// add the prompt.
// Otherwise, consider this a single line comment,
// and the next line should not get a prompt
if (continuedStatement)
utilMain.doPrompt(false, promptwriter, "");
else
utilMain.doPrompt(true, promptwriter, "");
/* If the next character is a NEWLINE, we process
* it as well to account for Windows CRLFs. */
if(nextChar == RETURN && peekChar() == NEWLINE) {
readChar();
}
}
return;
default:
readChar(); // process the character, still in comment
break;
}
} while (state == IN_SQLCOMMENT); // could be while true...
}
/**
Advance the stream to the end of the string.
Assumes the opening delimiter of the string has been read.
This handles the SQL ability to put the delimiter within
the string by doubling it, by reading those as two strings
sitting next to one another. I.e, 'Mary''s lamb' is read
by this class as two strings, 'Mary' and 's lamb'.
<p>
The delimiter of the string is expected to be repeated at
its other end. If the other flavor of delimiter occurs within
the string, it is just a normal character within it.
<p>
All characters except the delimiter are permitted within the
string. If EOF is hit before the closing delimiter is found,
the end of the string is assumed. Parsers using this parser
will detect the error in that case and return appropriate messages.
@param stringDelimiter the starting and ending character
for the string being read.
*/
private void readString(char stringDelimiter) {
state = IN_STRING;
do {
char nextChar = readChar();
if (atEOF()) {
state = END_OF_INPUT;
return;
}
if (nextChar == stringDelimiter) {
// we've reached the end of the string
state = IN_STATEMENT;
return;
}
// still in string
} while (state == IN_STRING); // could be while true...
}
private boolean atEOF() {
return atEOF;
}
private boolean peekEOF() {
return peekEOF;
}
/**
return the next character in the source stream and
append it to the statement buffer.
@return the next character in the source stream.
*/
private char readChar() {
if (!peeked) peekChar();
peeked = false;
atEOF = peekEOF;
if (!atEOF) statement.append(peekChar);
return peekChar;
}
/**
return the next character in the source stream, without
advancing.
@return the next character in the source stream.
*/
private char peekChar() {
peeked = true;
char c = '\00';
try {
int cInt;
// REMIND: this is assuming a flat ascii source file.
// will need to beef it up at some future point to
// understand whether the stream is ascii or something else.
cInt = source.read();
peekEOF = (cInt == -1);
if (!peekEOF) c = (char)cInt;
} catch (IOException ie) {
throw ijException.iOException(ie);
}
peekChar = c;
return c;
}
}