| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.drill.exec.compile; |
| |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.Map; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import com.google.common.base.Preconditions; |
| |
| /** |
| * Describes a finite state machine in terms of a mapping of tokens to |
| * characters, a regular expression describing the valid transitions |
| * using the characters, and an end state. Used to validate state |
| * transitions. One simple example is the implementation of the |
| * Check*VisitorFsm classes used to validate call sequences in ASM |
| * visitors (classes like CheckClassAdapter only validate per-call |
| * arguments, not the entire sequence of calls, according to |
| * http://asm.ow2.org/asm50/javadoc/user/org/objectweb/asm/util/CheckClassAdapter.html |
| * |
| * <p>The current implementation is very simple, and requires some user setup. |
| * Basically, we use Java's {@link java.util.regex.Pattern} and |
| * {@link java.util.regex.Matcher} to verify a regular expression on demand. |
| * The user must map state transitions' string names onto characters, and |
| * specify a regex that describes the state machine. Using this technique, we |
| * can only validate when we transition to an end state; we just check to see |
| * if accumulated characters comprise an allowed regular expression. |
| * |
| * <p>In this simple implementation, the tokens and characters may represent |
| * states or transitions, depending on what is most convenient. In either case, |
| * we just check that a sequence of them matches the regular expression governing |
| * this state machine. |
| */ |
| public class FsmDescriptor { |
| private final Map<String, Character> tokenMap; // state/transition -> character |
| private final Pattern fsmPattern; // compiled regex representing the FSM |
| private final char lastTransition; // the character that represents the last state/transition |
| |
| /** |
| * Create a finite state machine descriptor. The descriptor is immutable, and may |
| * be shared across many cursors that are executing the FSM. |
| * |
| * @param tokenMap mapping of transitions/states to characters |
| * @param fsmRegex the regular expression, defined using the characters from the tokenMap |
| * @param lastTransition the name of the final transition/state |
| */ |
| public FsmDescriptor(final Map<String, Character> tokenMap, |
| final String fsmRegex, final String lastTransition) { |
| Preconditions.checkNotNull(tokenMap); |
| Preconditions.checkNotNull(fsmRegex); |
| Preconditions.checkNotNull(lastTransition); |
| Preconditions.checkArgument(tokenMap.containsKey(lastTransition)); |
| |
| // make sure the characters in the tokenMap are unique |
| final HashSet<Character> charSet = new HashSet<>(); |
| for(Map.Entry<String, Character> me : tokenMap.entrySet()) { |
| final Character character = me.getValue(); |
| if (charSet.contains(character)) { |
| throw new IllegalArgumentException("Duplicate tokenMap char: '" + character + "'"); |
| } |
| charSet.add(character); |
| } |
| |
| this.tokenMap = Collections.unmodifiableMap(tokenMap); |
| this.fsmPattern = Pattern.compile(fsmRegex); |
| this.lastTransition = this.tokenMap.get(lastTransition).charValue(); |
| } |
| |
| /** |
| * Create a cursor for performing and validating transitions according to this |
| * state machine. |
| * |
| * @return the new cursor |
| */ |
| public FsmCursor createCursor() { |
| return new FsmCursor(this); |
| } |
| |
| /** |
| * Validate the given transitions against this state machine. |
| * |
| * @param transitions a character sequence indicating a set of transitions or |
| * states as defined by the tokenMap used at construction time. |
| * @throws IllegalStateException if the set of transitions is not allowed |
| */ |
| void validateTransitions(final CharSequence transitions) { |
| final long length = transitions.length(); |
| if (length == 0) { |
| return; // assume we haven't started yet |
| } |
| |
| final Matcher matcher = fsmPattern.matcher(transitions); |
| if (!matcher.find() || (matcher.start() != 0) || (matcher.end() != length)) { |
| throw new IllegalStateException("Illegal state transition(s): " + transitions); |
| } |
| } |
| |
| /** |
| * Look up the character used to represent the transition or state denoted by |
| * the given token. |
| * |
| * @param token the transition or state to look up |
| * @return the character used to represent that transition or state |
| */ |
| char getChar(final String token) { |
| Preconditions.checkNotNull(token); |
| final Character character = tokenMap.get(token); |
| Preconditions.checkNotNull(character); |
| return character.charValue(); |
| } |
| |
| /** |
| * Determine whether the indicated character represents the final transition or state. |
| * |
| * @param transitionChar the character to look up |
| * @return true if the character represents the final transition or state |
| */ |
| boolean isLastTransition(final char transitionChar) { |
| return transitionChar == lastTransition; |
| } |
| } |