| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.falcon.util; |
| |
| import org.apache.commons.lang3.StringUtils; |
| import org.apache.falcon.entity.store.FeedPathStore; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import javax.annotation.Nonnull; |
| import javax.annotation.Nullable; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Formattable; |
| import java.util.Formatter; |
| import java.util.Iterator; |
| import java.util.LinkedList; |
| import java.util.List; |
| |
| |
| /** |
| * A thread-safe Radix Tree implementation of the LocationStore. |
| * |
| * |
| * A radix tree (also patricia trie or radix trie or compact prefix tree) is a space-optimized |
| * trie data structure where each node with only one child is merged with its parent. |
| * |
| * For example the tree representation for the following (key,value) pairs - |
| * [("key1", "value1"), ("key123", "Key was key123"), ("key124", "Key was key124"), |
| * ("key2", "value2"), ("random", "random")] will be as below. |
| * |
| * | |
| * |-key |
| * |--1[[value1]]* |
| * |---2 |
| * |----3[[Key was key123]]* |
| * |----4[[Key was key124]]* |
| * |--2[[value2]]* |
| * |-random[[random]]* |
| * |
| * For more details on Radix Tree please refer |
| * <a href="http://en.wikipedia.org/wiki/Radix_tree">Radix Tree</a> |
| * @param <T> Type of value being stored against the key. |
| */ |
| public class RadixTree<T> implements FeedPathStore<T>, Formattable { |
| private static final Logger LOG = LoggerFactory.getLogger(RadixTree.class); |
| |
| protected RadixNode<T> root; |
| |
| private int size; |
| |
| public RadixTree(){ |
| root = new RadixNode<T>(); |
| root.setKey(""); |
| size = 0; |
| } |
| |
| /** |
| * Return the number of keys stored in the tree. |
| * |
| * Since all keys end in terminal nodes and duplicate keys are not allowed, |
| * size is equal to the number of terminal nodes in the tree. |
| * @return number of keys in the tree. |
| */ |
| @Override |
| public synchronized int getSize() { |
| return size; |
| } |
| |
| /** |
| * Insert a <key, value> pair in the Radix Tree. |
| * |
| * @param key Key to be stored |
| * @param value Value to be stored against that key |
| */ |
| @Override |
| public synchronized void insert(@Nullable String key, @Nonnull T value){ |
| if (key != null && !key.trim().isEmpty()){ |
| LOG.debug("Insert called for key: {} and value: {}", key.trim(), value); |
| insertKeyRecursive(key.trim(), value, root); |
| } |
| } |
| |
| private void insertKeyRecursive(String remainingText, T value, RadixNode<T> currentNode){ |
| |
| int currentMatchLength = currentNode.getMatchLength(remainingText); |
| String newRemainingText = remainingText.substring(currentMatchLength, remainingText.length()); |
| |
| // if root or current node key is subset of the input key GO DOWN |
| if (currentNode.isRoot() |
| || (currentMatchLength == currentNode.getKey().length() |
| && currentMatchLength < remainingText.length())){ |
| |
| // if a path to go down exists then go down that path |
| boolean foundPath = false; |
| for(RadixNode<T> child: currentNode.getChildren()){ |
| if (child.getKey().charAt(0) == newRemainingText.charAt(0)){ |
| insertKeyRecursive(newRemainingText, value, child); |
| foundPath = true; |
| break; |
| } |
| } |
| // else create a new node. |
| if (!foundPath){ |
| RadixNode<T> node = new RadixNode<T>(); |
| node.setKey(newRemainingText); |
| node.addValue(value); |
| node.setTerminal(true); |
| currentNode.getChildren().add(node); |
| size += 1; |
| } |
| }else if (currentMatchLength == remainingText.length() && currentMatchLength < currentNode.getKey().length()){ |
| // if remainingText is subset of the current node key |
| RadixNode<T> node = new RadixNode<T>(); |
| node.setChildren(currentNode.getChildren()); |
| node.setKey(currentNode.getKey().substring(currentMatchLength)); |
| node.setValues(currentNode.getValues()); |
| node.setTerminal(currentNode.isTerminal()); |
| |
| currentNode.setChildren(new LinkedList<RadixNode<T>>()); |
| currentNode.getChildren().add(node); |
| currentNode.setTerminal(true); |
| currentNode.setKey(currentNode.getKey().substring(0, currentMatchLength)); |
| currentNode.removeAll(); |
| currentNode.addValue(value); |
| |
| size += 1; |
| |
| }else if (currentMatchLength < remainingText.length() && currentMatchLength < currentNode.getKey().length()){ |
| |
| //add new Node and move all current node's children and value to it |
| RadixNode<T> node = new RadixNode<T>(); |
| node.setChildren(currentNode.getChildren()); |
| node.setTerminal(currentNode.isTerminal()); |
| node.setValues(currentNode.getValues()); |
| node.setKey(currentNode.getKey().substring(currentMatchLength, currentNode.getKey().length())); |
| |
| // add node for the text |
| RadixNode<T> node2 = new RadixNode<T>(); |
| node2.setKey(newRemainingText); |
| node2.setTerminal(true); |
| node2.addValue(value); |
| |
| //update current node to be new root |
| currentNode.setTerminal(false); |
| currentNode.setKey(currentNode.getKey().substring(0, currentMatchLength)); |
| currentNode.setChildren(new LinkedList<RadixNode<T>>()); |
| currentNode.getChildren().add(node); |
| currentNode.getChildren().add(node2); |
| |
| size += 1; |
| }else if (currentMatchLength == remainingText.length() && currentMatchLength == currentNode.getKey().length()){ |
| // if current node key and input key both match equally |
| if (currentNode.isTerminal()){ |
| currentNode.addValue(value); |
| }else { |
| currentNode.setTerminal(true); |
| currentNode.addValue(value); |
| } |
| size += 1; |
| } |
| } |
| |
| /** |
| * Find the value for the given key if it exists in the tree, null otherwise. |
| * |
| * A key is said to exist in the tree if we can generate exactly that string |
| * by going down from root to a terminal node. If a key exists we return the value |
| * stored at the terminal node. |
| * |
| * @param key - input key to be searched. |
| * @return T Value of the key if it exists, null otherwise |
| */ |
| @Override |
| @Nullable |
| public synchronized Collection<T> find(@Nonnull String key, FalconRadixUtils.INodeAlgorithm algorithm) { |
| if (key != null && !key.trim().isEmpty()) { |
| if (algorithm == null) { |
| algorithm = new FalconRadixUtils.StringAlgorithm(); |
| } |
| return recursiveFind(key.trim(), root, algorithm); |
| } |
| return null; |
| } |
| |
| @Nullable |
| @Override |
| public Collection<T> find(@Nonnull String key) { |
| if (key != null && !key.trim().isEmpty()) { |
| FalconRadixUtils.INodeAlgorithm algorithm = new FalconRadixUtils.StringAlgorithm(); |
| return recursiveFind(key.trim(), root, algorithm); |
| } |
| return null; |
| } |
| |
| private Collection<T> recursiveFind(String key, RadixNode<T> currentNode, |
| FalconRadixUtils.INodeAlgorithm algorithm){ |
| |
| if (!algorithm.startsWith(currentNode.getKey(), key)){ |
| LOG.debug("Current Node key: {} is not a prefix in the input key: {}", currentNode.getKey(), key); |
| return null; |
| } |
| |
| if (algorithm.match(currentNode.getKey(), key)){ |
| if (currentNode.isTerminal()){ |
| LOG.debug("Found the terminal node with key: {} for the given input.", currentNode.getKey()); |
| return currentNode.getValues(); |
| }else { |
| LOG.debug("currentNode is not terminal. Current node's key is {}", currentNode.getKey()); |
| return null; |
| } |
| } |
| |
| //find child to follow, using remaining Text |
| RadixNode<T> newRoot = algorithm.getNextCandidate(currentNode, key); |
| String remainingText = algorithm.getRemainingText(currentNode, key); |
| |
| if (newRoot == null){ |
| LOG.debug("No child found to follow for further processing. Current node key {}"); |
| return null; |
| }else { |
| LOG.debug("Recursing with new key: {} and new remainingText: {}", newRoot.getKey(), remainingText); |
| return recursiveFind(remainingText, newRoot, algorithm); |
| } |
| } |
| |
| /** |
| * Deletes a given key,value pair from the Radix Tree. |
| * |
| * @param key key to be deleted |
| * @param value value to be deleted |
| */ |
| @Override |
| public synchronized boolean delete(@Nonnull String key, @Nonnull T value) { |
| if (key != null && !key.trim().isEmpty()){ |
| LOG.debug("Delete called for key:{}", key.trim()); |
| return recursiveDelete(key, null, root, value); |
| } |
| return false; |
| } |
| |
| private boolean recursiveDelete(String key, RadixNode<T> parent, RadixNode<T> currentNode, T value){ |
| LOG.debug("Recursing with key: {}, currentNode: {}", key, currentNode.getKey()); |
| if (!key.startsWith(currentNode.getKey())){ |
| LOG.debug("Current node's key: {} is not a prefix of the remaining input key: {}", |
| currentNode.getKey(), key); |
| return false; |
| } |
| |
| if (StringUtils.equals(key, currentNode.getKey())){ |
| LOG.trace("Current node's key:{} and the input key:{} matched", currentNode.getKey(), key); |
| if (currentNode.getValues().contains(value)){ |
| LOG.debug("Given value is found in the collection of values against the given key"); |
| currentNode.removeValue(value); |
| size -= 1; |
| if (currentNode.getValues().size() == 0){ |
| LOG.debug("Exact match between current node's key: {} and remaining input key: {}", |
| currentNode.getKey(), key); |
| if (currentNode.isTerminal()){ |
| //if child has no children & only one value, then delete and compact parent if needed |
| if (currentNode.getChildren().size() == 0){ |
| Iterator<RadixNode<T>> it = parent.getChildren().iterator(); |
| while(it.hasNext()){ |
| if (StringUtils.equals(it.next().getKey(), currentNode.getKey())){ |
| it.remove(); |
| LOG.debug("Deleting the node"); |
| break; |
| } |
| } |
| }else if (currentNode.getChildren().size() > 1){ |
| // if child has more than one children just mark non terminal |
| currentNode.setTerminal(false); |
| }else if (currentNode.getChildren().size() == 1){ |
| // if child has only one child then compact node |
| LOG.debug("compacting node with child as node to be deleted has only 1 child"); |
| RadixNode<T> child = currentNode.getChildren().get(0); |
| currentNode.setChildren(child.getChildren()); |
| currentNode.setTerminal(child.isTerminal()); |
| currentNode.setKey(currentNode.getKey() + child.getKey()); |
| currentNode.setValues(child.getValues()); |
| } |
| |
| //parent can't be null as root will never match with input key as it is not a terminal node. |
| if (!parent.isTerminal() && !parent.isRoot()){ |
| // if only one child left in parent and parent is not root then join parent |
| // and the only child key |
| if (parent.getChildren().size() == 1){ |
| RadixNode<T> onlyChild = parent.getChildren().get(0); |
| String onlyChildKey = onlyChild.getKey(); |
| LOG.debug("Compacting child: {} and parent: {}", onlyChildKey, parent.getKey()); |
| parent.setKey(parent.getKey() + onlyChildKey); |
| parent.setChildren(onlyChild.getChildren()); |
| parent.setTerminal(onlyChild.isTerminal()); |
| parent.setValues(onlyChild.getValues()); |
| } |
| } |
| return true; |
| }else{ |
| LOG.debug("Key found only as a prefix and not at a terminal node"); |
| return false; |
| } |
| } |
| return true; |
| }else { |
| LOG.debug("Current value is not found in the collection of values against the given key, no-op"); |
| return false; |
| } |
| } |
| |
| LOG.debug("Current node's key: {} is a prefix of the input key: {}", currentNode.getKey(), key); |
| //find child to follow |
| RadixNode<T> newRoot = null; |
| String remainingKey = key.substring(currentNode.getMatchLength(key)); |
| for(RadixNode<T> el : currentNode.getChildren()){ |
| LOG.trace("Finding next child to follow. Current child's key:{}", el.getKey()); |
| if (el.getKey().charAt(0) == remainingKey.charAt(0)){ |
| newRoot = el; |
| break; |
| } |
| } |
| |
| if (newRoot == null){ |
| LOG.debug("No child was found with common prefix with the remainder key: {}", key); |
| return false; |
| }else { |
| LOG.debug("Found a child's key: {} with common prefix, recursing on it", newRoot.getKey()); |
| return recursiveDelete(remainingKey, currentNode, newRoot, value); |
| } |
| } |
| |
| |
| /** |
| * Useful for debugging. |
| */ |
| @Override |
| public void formatTo(Formatter formatter, int flags, int width, int precision) { |
| formatNodeTo(formatter, 0, root); |
| |
| } |
| |
| private void formatNodeTo(Formatter formatter, int level, RadixNode<T> node){ |
| for (int i = 0; i < level; i++) { |
| formatter.format(" "); |
| } |
| formatter.format("|"); |
| for (int i = 0; i < level; i++) { |
| formatter.format("-"); |
| } |
| |
| if (node.isTerminal()){ |
| formatter.format("%s[%s]*%n", node.getKey(), node.getValues()); |
| }else{ |
| formatter.format("%s%n", node.getKey()); |
| } |
| |
| for (RadixNode<T> child : node.getChildren()) { |
| formatNodeTo(formatter, level + 1, child); |
| } |
| } |
| |
| /** |
| * Find List of substring of keys which have given input as a prefix. |
| * |
| * @param key - Input string for which all Suffix Children should be returned |
| * @param limit - Maximum Number of results. If limit is less than 0 then all nodes are returned. |
| * If limit is 0 then returns null. |
| */ |
| @javax.annotation.Nullable |
| public List<String> findSuffixChildren(String key, int limit){ |
| if (key == null || limit == 0){ |
| return null; |
| } |
| RadixNode<T> currentNode = root; |
| String remainingText = key.trim(); |
| List<String> result = new LinkedList<String>(); |
| do{ |
| boolean flag = false; |
| // find the child with common prefix |
| for(RadixNode<T> child: currentNode.getChildren()){ |
| LOG.debug("Checking for child key: {} against remainingText: {}", child.getKey(), remainingText); |
| if (child.getKey().charAt(0) == remainingText.charAt(0)){ |
| LOG.debug("Child key: {} found to have overlap with the remainingText: {}", child.getKey(), |
| remainingText); |
| flag = true; |
| |
| //if entire key doesn't match return null |
| if (!remainingText.startsWith(child.getKey())){ |
| return null; |
| } |
| |
| // if entire key equals remainingText - return it's children up to the specified limit |
| if (StringUtils.equals(child.getKey(), remainingText)){ |
| int counter = 0; |
| |
| for(RadixNode<T> suffixChild: child.getChildren()){ |
| if (limit < 0 || counter < limit){ |
| result.add(suffixChild.getKey()); |
| } |
| } |
| return Collections.unmodifiableList(result); |
| } |
| |
| //if entire key matches but it is not equal to entire remainingText - repeat |
| remainingText = remainingText.substring(child.getKey().length()); |
| currentNode = child; |
| break; |
| |
| } |
| } |
| // if no child found with common prefix return null; |
| if (!flag){ |
| return null; |
| } |
| }while (true); |
| } |
| } |