blob: cdc4febbf1197fe6e8a21c2de154d653197e2cbb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.common;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.drill.exec.compile.TemplateClassDefinition;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.expr.ClassGenerator;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.record.RecordBatch;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.common.exceptions.RetryAfterSpillException;
public interface HashTable {
TemplateClassDefinition<HashTable> TEMPLATE_DEFINITION =
new TemplateClassDefinition<>(HashTable.class, HashTableTemplate.class);
/**
* The maximum capacity of the hash table (in terms of number of buckets).
*/
int MAXIMUM_CAPACITY = 1 << 30;
/**
* The default load factor of a hash table.
*/
float DEFAULT_LOAD_FACTOR = 0.75f;
enum PutStatus {KEY_PRESENT, KEY_ADDED, NEW_BATCH_ADDED, KEY_ADDED_LAST, PUT_FAILED;}
/**
* The batch size used for internal batch holders
*/
int BATCH_SIZE = Character.MAX_VALUE + 1;
int BATCH_MASK = 0x0000FFFF;
/**
* {@link HashTable#setup} must be called before anything can be done to the {@link HashTable}.
*
* @param htConfig
* @param allocator
* @param incomingBuild
* @param incomingProbe
* @param outgoing
* @param htContainerOrig
* @param context
* @param cg
*/
void setup(HashTableConfig htConfig, BufferAllocator allocator, VectorContainer incomingBuild, RecordBatch incomingProbe, RecordBatch outgoing,
VectorContainer htContainerOrig, FragmentContext context, ClassGenerator<?> cg);
/**
* Updates the incoming (build and probe side) value vectors references in the {@link HashTableTemplate.BatchHolder}s.
* This is useful on OK_NEW_SCHEMA (need to verify).
* @throws SchemaChangeException
*/
void updateBatches() throws SchemaChangeException;
/**
* Computes the hash code for the record at the given index in the build side batch.
* @param incomingRowIdx The index of the build side record of interest.
* @return The hash code for the record at the given index in the build side batch.
* @throws SchemaChangeException
*/
int getBuildHashCode(int incomingRowIdx) throws SchemaChangeException;
/**
* Computes the hash code for the record at the given index in the probe side batch.
* @param incomingRowIdx The index of the probe side record of interest.
* @return The hash code for the record at the given index in the probe side batch.
* @throws SchemaChangeException
*/
int getProbeHashCode(int incomingRowIdx) throws SchemaChangeException;
PutStatus put(int incomingRowIdx, IndexPointer htIdxHolder, int hashCode, int batchSize) throws SchemaChangeException, RetryAfterSpillException;
/**
* @param incomingRowIdx The index of the key in the probe batch.
* @param hashCode The hashCode of the key.
* @return Returns -1 if the data in the probe batch at the given incomingRowIdx is not in the hash table. Otherwise returns
* the composite index of the key in the hash table (index of BatchHolder and record in Batch Holder).
* @throws SchemaChangeException
*/
int probeForKey(int incomingRowIdx, int hashCode) throws SchemaChangeException;
/**
* @param currentIndex The composite index of the key in the hash table (index of BatchHolder and record in Batch Holder).
* @return Returns -1 if the count of records for a specific key is not computed. Otherwise returns
* the count of records for a specific key.
*/
int getRecordNumForKey(int currentIndex);
/**
* Set the count of records for a specific key to num.
* @param currentIndex The composite index of the key in the hash table (index of BatchHolder and record in Batch Holder).
* @param num The count of records for a specific key to be set.
*/
void setRecordNumForKey(int currentIndex, int num);
/**
* Decrease the count of records for a specific key by one.
* @param currentIndex The composite index of the key in the hash table (index of BatchHolder and record in Batch Holder).
*/
void decreaseRecordNumForKey(int currentIndex);
void getStats(HashTableStats stats);
int size();
boolean isEmpty();
/**
* Frees all the direct memory consumed by the {@link HashTable}.
*/
void clear();
/**
* Update the initial capacity for the hash table. This method will be removed after the key vectors are removed from the hash table. It is used
* to allocate {@link HashTableTemplate.BatchHolder}s of appropriate size when the final size of the HashTable is known.
*
* <b>Warning!</b> Only call this method before you have inserted elements into the HashTable.
*
* @param initialCapacity The new initial capacity to use.
*/
void updateInitialCapacity(int initialCapacity);
/**
* Changes the incoming probe and build side batches, and then updates all the value vector references in the {@link HashTableTemplate.BatchHolder}s.
* @param newIncoming The new build side batch.
* @param newIncomingProbe The new probe side batch.
*/
void updateIncoming(VectorContainer newIncoming, RecordBatch newIncomingProbe);
/**
* Clears all the memory used by the {@link HashTable} and re-initializes it.
*/
void reset();
/**
* Retrieves the key columns and transfers them to the output container. Note this operation removes the key columns from the {@link HashTable}.
* @param batchIdx The index of a {@link HashTableTemplate.BatchHolder} in the HashTable.
* @param outContainer The destination container for the key columns.
* @param numRecords The number of key recorts to transfer.
* @return
*/
boolean outputKeys(int batchIdx, VectorContainer outContainer, int numRecords);
/**
* Returns a message containing memory usage statistics. Intended to be used for printing debugging or error messages.
* @return A debug string.
*/
String makeDebugString();
/**
* The amount of direct memory consumed by the hash table.
* @return
*/
long getActualSize();
void setTargetBatchRowCount(int batchRowCount);
int getTargetBatchRowCount();
Pair<VectorContainer, Integer> nextBatch();
}