blob: b1c529e9b59ed1764d742683e09ada9f02045078 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.felix.gogo.runtime;
/**
* Bash-like tokenizer.
*
* Single and double quotes are just like Bash - single quotes escape everything
* (including backslashes), newlines are allowed in quotes.
* backslash-newline indicates a line continuation and is removed.
*
* Variable expansion is just like Bash: $NAME or ${NAME[[:][-+=?WORD]},
* except it can yield any Object. Variables expanded within double-quotes,
* or adjacent to a String are converted to String.
*
* Unlike bash, indirect variable expansion is supported using ${$NAME}.
*
* Only a single variable assignment is recognized, with '=' being the second token.
* (Bash allows name1=value1 name2=value2 ... command args)
*
* Comments can only start where white space is allowed:
* # or // starts a line comment, /* starts a block comment.
* The following common uses do NOT start comments:
* ls http://example.com#anchor
* ls $dir/*.java
*
* @see http://wiki.bash-hackers.org/syntax/basicgrammar
*/
public class Tokenizer
{
public enum Type
{
ASSIGN('='), PIPE('|'), SEMICOLON(';'), NEWLINE, ARRAY, CLOSURE, EXECUTION, WORD, EOT;
private char c;
Type()
{
}
Type(char c)
{
this.c = c;
}
@Override
public String toString()
{
return (c == 0 ? super.toString() : "'" + c + "'");
}
}
private static final boolean DEBUG = false;
private static final char EOT = (char) -1;
private final CharSequence text;
private final Evaluate evaluate;
private final boolean inArray;
private final boolean inQuote;
private Type type = Type.NEWLINE;
private CharSequence value;
private Token token;
private short line;
private short column;
private char ch;
private int index;
private boolean firstWord;
public Tokenizer(CharSequence text)
{
this(text, null, false);
}
public Tokenizer(CharSequence text, Evaluate evaluate, boolean inQuote)
{
this.text = text;
this.evaluate = evaluate;
this.inQuote = inQuote;
index = 0;
line = column = 1;
boolean array = false;
if (text instanceof Token)
{
Token t = (Token) text;
line = t.line;
column = t.column;
array = (Type.ARRAY == t.type);
}
inArray = array;
getch();
if (DEBUG)
{
if (inArray)
System.err.println("Tokenizer[" + text + "]");
else
System.err.println("Tokenizer<" + text + ">");
}
}
public Type type()
{
return type;
}
public CharSequence value()
{
return value;
}
public Token token()
{
return token;
}
public Type next()
{
final Type prevType = type;
token = null;
value = null;
short tLine;
short tColumn;
while (true)
{
skipSpace();
tLine = line;
tColumn = column;
switch (ch)
{
case EOT:
type = Type.EOT;
break;
case '\n':
getch();
if (inArray)
continue;
// only return NEWLINE once and not if not preceded by ; or |
switch (prevType)
{
case PIPE:
case SEMICOLON:
case NEWLINE:
continue;
default:
type = Type.NEWLINE;
break;
}
break;
case '{':
case '(':
case '[':
value = group();
getch();
break;
case ';':
getch();
type = Type.SEMICOLON;
break;
case '|':
getch();
type = Type.PIPE;
break;
case '=':
if (firstWord || inArray)
{
getch();
type = Type.ASSIGN;
break;
}
// fall through
default:
value = word();
type = Type.WORD;
}
firstWord = (Type.WORD == type && (Type.WORD != prevType && Type.ASSIGN != prevType));
token = new Token(type, value, tLine, tColumn);
if (DEBUG)
{
System.err.print("<" + type + ">");
if (Type.EOT == type)
{
System.err.println();
}
}
return type;
}
}
private CharSequence word()
{
int start = index - 1;
int skipCR = 0;
do
{
switch (ch)
{
case '\n':
if (index >= 2 && text.charAt(index - 2) == '\r')
skipCR = 1;
// fall through
case '=':
if ((Type.WORD == type || Type.ASSIGN == type) && '=' == ch
&& !inArray)
continue;
// fall through
case ' ':
case '\t':
case '|':
case ';':
return text.subSequence(start, index - 1 - skipCR);
case '{':
group();
break;
case '\\':
escape();
break;
case '\'':
case '"':
skipQuote();
break;
}
}
while (getch() != EOT);
return text.subSequence(start, index - 1);
}
private CharSequence group()
{
final char push = ch;
final char pop;
switch (ch)
{
case '{':
type = Type.CLOSURE;
pop = '}';
break;
case '(':
type = Type.EXECUTION;
pop = ')';
break;
case '[':
type = Type.ARRAY;
pop = ']';
break;
default:
assert false;
pop = 0;
}
short sLine = line;
short sCol = column;
int start = index;
int depth = 1;
while (true)
{
boolean comment = false;
switch (ch)
{
case '{':
case '(':
case '[':
case '\n':
comment = true;
break;
}
if (getch() == EOT)
{
throw new EOFError(sLine, sCol, "unexpected EOT looking for matching '"
+ pop + "'");
}
// don't recognize comments that start within a word
if (comment || isBlank(ch))
skipSpace();
switch (ch)
{
case '"':
case '\'':
skipQuote();
break;
case '\\':
ch = escape();
break;
default:
if (push == ch)
depth++;
else if (pop == ch && --depth == 0)
return text.subSequence(start, index - 1);
}
}
}
private char escape()
{
assert '\\' == ch;
switch (getch())
{
case 'u':
getch();
getch();
getch();
getch();
if (EOT == ch)
{
throw new EOFError(line, column, "unexpected EOT in \\u escape");
}
String u = text.subSequence(index - 4, index).toString();
try
{
return (char) Integer.parseInt(u, 16);
}
catch (NumberFormatException e)
{
throw new SyntaxError(line, column, "bad unicode escape: \\u" + u);
}
case EOT:
throw new EOFError(line, column, "unexpected EOT in \\ escape");
case '\n':
return '\0'; // line continuation
case '\\':
case '\'':
case '"':
case '$':
return ch;
default:
return ch;
}
}
private void skipQuote()
{
assert '\'' == ch || '"' == ch;
final char quote = ch;
final short sLine = line;
final short sCol = column;
while (getch() != EOT)
{
if (quote == ch)
return;
if ((quote == '"') && ('\\' == ch))
escape();
}
throw new EOFError(sLine, sCol, "unexpected EOT looking for matching quote: "
+ quote);
}
private void skipSpace()
{
while (true)
{
while (isBlank(ch))
{
getch();
}
// skip continuation lines, but not other escapes
if (('\\' == ch) && (peek() == '\n'))
{
getch();
getch();
continue;
}
// skip comments
if (('/' == ch) || ('#' == ch))
{
if (('#' == ch) || (peek() == '/'))
{
while ((getch() != EOT) && ('\n' != ch))
{
}
continue;
}
else if ('*' == peek())
{
short sLine = line;
short sCol = column;
getch();
while ((getch() != EOT) && !(('*' == ch) && (peek() == '/')))
{
}
if (EOT == ch)
{
throw new EOFError(sLine, sCol,
"unexpected EOT looking for closing comment: */");
}
getch();
getch();
continue;
}
}
break;
}
}
private boolean isBlank(char ch)
{
return ' ' == ch || '\t' == ch;
}
private boolean isName(char ch)
{
return Character.isJavaIdentifierPart(ch) && (ch != '$') || ('.' == ch);
}
/**
* expand variables, quotes and escapes in word.
* @param vars
* @return
* @throws Exception
*/
public static Object expand(CharSequence word, Evaluate eval) throws Exception
{
return expand(word, eval, false);
}
private static Object expand(CharSequence word, Evaluate eval, boolean inQuote) throws Exception
{
final String special = "$\\\"'";
int i = word.length();
while ((--i >= 0) && (special.indexOf(word.charAt(i)) == -1))
{
}
// shortcut if word doesn't contain any special characters
if (i < 0)
return word;
return new Tokenizer(word, eval, inQuote).expand();
}
public Object expand(CharSequence word, short line, short column) throws Exception
{
return expand(new Token(Type.WORD, word, line, column), evaluate, inQuote);
}
private Token word(CharSequence value)
{
return new Token(Type.WORD, value, line, column);
}
private Object expand() throws Exception
{
StringBuilder buf = new StringBuilder();
while (ch != EOT)
{
int start = index;
switch (ch)
{
case '$':
Object val = expandVar();
if (EOT == ch && buf.length() == 0)
{
return val;
}
if (null != val)
{
buf.append(val);
}
continue; // expandVar() has already read next char
case '\\':
ch = (inQuote && ("u$\\\n\"".indexOf(peek()) == -1)) ? '\\'
: escape();
if (ch != '\0') // ignore line continuation
{
buf.append(ch);
}
break;
case '"':
Token ww = word(null);
skipQuote();
ww.value = text.subSequence(start, index - 1);
value = ww;
Object expand = expand(value, evaluate, true);
if (eot() && buf.length() == 0 && value == expand)
{
// FELIX-2468 avoid returning CharSequence implementation
return ww.value.toString();
}
if (null != expand)
{
buf.append(expand.toString());
}
break;
case '\'':
if (!inQuote)
{
skipQuote();
value = text.subSequence(start, index - 1);
if (eot() && buf.length() == 0)
{
return value;
}
buf.append(value);
break;
}
// else fall through
default:
buf.append(ch);
}
getch();
}
return buf.toString();
}
private Object expandVar() throws Exception
{
assert '$' == ch;
Object val;
if (getch() != '{')
{
if ('(' == ch)
{ // support $(...) FELIX-2433
short sLine = line;
short sCol = column;
val = evaluate.eval(new Token(Type.EXECUTION, group(), sLine, sCol));
getch();
}
else
{
int start = index - 1;
while (isName(ch))
{
getch();
}
if (index - 1 == start)
{
val = "$";
}
else
{
String name = text.subSequence(start, index - 1).toString();
val = evaluate.get(name);
}
}
}
else
{
// ${NAME[[:]-+=?]WORD}
short sLine = line;
short sCol = column;
CharSequence group = group();
char c;
int i = 0;
while (i < group.length())
{
switch (group.charAt(i))
{
case ':':
case '-':
case '+':
case '=':
case '?':
break;
default:
++i;
continue;
}
break;
}
sCol += i;
String name = String.valueOf(expand(group.subSequence(0, i), sLine, sCol));
for (int j = 0; j < name.length(); ++j)
{
if (!isName(name.charAt(j)))
{
throw new SyntaxError(sLine, sCol, "bad name: ${" + group + "}");
}
}
val = evaluate.get(name);
if (i < group.length())
{
c = group.charAt(i++);
if (':' == c)
{
c = (i < group.length() ? group.charAt(i++) : EOT);
}
CharSequence word = group.subSequence(i, group.length());
switch (c)
{
case '-':
case '=':
if (null == val)
{
val = expand(word, evaluate, false);
if ('=' == c)
{
evaluate.put(name, val);
}
}
break;
case '+':
if (null != val)
{
val = expand(word, evaluate, false);
}
break;
case '?':
if (null == val)
{
val = expand(word, evaluate, false);
if (null == val || val.toString().length() == 0)
{
val = "parameter not set";
}
throw new IllegalArgumentException(name + ": " + val);
}
break;
default:
throw new SyntaxError(sLine, sCol, "bad substitution: ${" + group
+ "}");
}
}
getch();
}
return val;
}
/**
* returns true if getch() will return EOT
* @return
*/
private boolean eot()
{
return index >= text.length();
}
private char getch()
{
return ch = getch(false);
}
private char peek()
{
return getch(true);
}
private char getch(boolean peek)
{
if (eot())
{
if (!peek)
{
++index;
ch = EOT;
}
return EOT;
}
int current = index;
char c = text.charAt(index++);
if (('\r' == c) && !eot() && (text.charAt(index) == '\n'))
c = text.charAt(index++);
if (peek)
{
index = current;
}
else if ('\n' == c)
{
++line;
column = 0;
}
else
++column;
return c;
}
}