blob: ae1d858b1c3ff5a3f78857de84b4a04dd7e385c5 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.orc.impl;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.hadoop.io.Text;
/**
* A red-black tree that stores strings. The strings are stored as UTF-8 bytes
* and an offset for each entry.
*/
public class StringRedBlackTree extends RedBlackTree {
private final DynamicByteArray byteArray = new DynamicByteArray();
private final DynamicIntArray keyOffsets;
private final Text newKey = new Text();
public StringRedBlackTree(int initialCapacity) {
super(initialCapacity);
keyOffsets = new DynamicIntArray(initialCapacity);
}
public int add(String value) {
newKey.set(value);
return addNewKey();
}
private int addNewKey() {
// if the newKey is actually new, add it to our byteArray and store the offset & length
if (add()) {
int len = newKey.getLength();
keyOffsets.add(byteArray.add(newKey.getBytes(), 0, len));
}
return lastAdd;
}
public int add(Text value) {
newKey.set(value);
return addNewKey();
}
public int add(byte[] bytes, int offset, int length) {
newKey.set(bytes, offset, length);
return addNewKey();
}
@Override
protected int compareValue(int position) {
int start = keyOffsets.get(position);
int end;
if (position + 1 == keyOffsets.size()) {
end = byteArray.size();
} else {
end = keyOffsets.get(position+1);
}
return byteArray.compare(newKey.getBytes(), 0, newKey.getLength(),
start, end - start);
}
/**
* The information about each node.
*/
public interface VisitorContext {
/**
* Get the position where the key was originally added.
* @return the number returned by add.
*/
int getOriginalPosition();
/**
* Write the bytes for the string to the given output stream.
* @param out the stream to write to.
* @throws IOException
*/
void writeBytes(OutputStream out) throws IOException;
/**
* Get the original string.
* @return the string
*/
Text getText();
/**
* Get the number of bytes.
* @return the string's length in bytes
*/
int getLength();
}
/**
* The interface for visitors.
*/
public interface Visitor {
/**
* Called once for each node of the tree in sort order.
* @param context the information about each node
* @throws IOException
*/
void visit(VisitorContext context) throws IOException;
}
private class VisitorContextImpl implements VisitorContext {
private int originalPosition;
private int start;
private int end;
private final Text text = new Text();
@Override
public int getOriginalPosition() {
return originalPosition;
}
@Override
public Text getText() {
byteArray.setText(text, start, end - start);
return text;
}
@Override
public void writeBytes(OutputStream out) throws IOException {
byteArray.write(out, start, end - start);
}
@Override
public int getLength() {
return end - start;
}
void setPosition(int position) {
originalPosition = position;
start = keyOffsets.get(originalPosition);
if (position + 1 == keyOffsets.size()) {
end = byteArray.size();
} else {
end = keyOffsets.get(originalPosition + 1);
}
}
}
private void recurse(int node, Visitor visitor, VisitorContextImpl context
) throws IOException {
if (node != NULL) {
recurse(getLeft(node), visitor, context);
context.setPosition(node);
visitor.visit(context);
recurse(getRight(node), visitor, context);
}
}
/**
* Visit all of the nodes in the tree in sorted order.
* @param visitor the action to be applied to each node
* @throws IOException
*/
public void visit(Visitor visitor) throws IOException {
recurse(root, visitor, new VisitorContextImpl());
}
/**
* Reset the table to empty.
*/
@Override
public void clear() {
super.clear();
byteArray.clear();
keyOffsets.clear();
}
public void getText(Text result, int originalPosition) {
int offset = keyOffsets.get(originalPosition);
int length;
if (originalPosition + 1 == keyOffsets.size()) {
length = byteArray.size() - offset;
} else {
length = keyOffsets.get(originalPosition + 1) - offset;
}
byteArray.setText(result, offset, length);
}
/**
* Get the size of the character data in the table.
* @return the bytes used by the table
*/
public int getCharacterSize() {
return byteArray.size();
}
/**
* Calculate the approximate size in memory.
* @return the number of bytes used in storing the tree.
*/
@Override
public long getSizeInBytes() {
return byteArray.getSizeInBytes() + keyOffsets.getSizeInBytes() +
super.getSizeInBytes();
}
}