blob: d3afc54508631eeb33ef18c7ea5c1e29a9f7f2d2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.reader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.format.ColumnDictionaryChunkMeta;
import org.apache.thrift.TBase;
/**
* This class perform the functionality of reading the dictionary metadata file
*/
public class CarbonDictionaryMetadataReaderImpl implements CarbonDictionaryMetadataReader {
/**
* column identifier
*/
protected DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier;
/**
* dictionary metadata file path
*/
protected String columnDictionaryMetadataFilePath;
/**
* dictionary metadata thrift file reader
*/
private ThriftReader dictionaryMetadataFileReader;
/**
* Constructor
*
* @param dictionaryColumnUniqueIdentifier column unique identifier
*/
public CarbonDictionaryMetadataReaderImpl(
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier) {
this.dictionaryColumnUniqueIdentifier = dictionaryColumnUniqueIdentifier;
initFileLocation();
}
/**
* This method will be used to read complete metadata file.
* Applicable scenarios:
* 1. Query execution. Whenever a query is executed then to read the dictionary file
* and define the query scope first dictionary metadata has to be read first.
* 2. If dictionary file is read using start and end offset then using this meta list
* we can count the total number of dictionary chunks present between the 2 offsets
*
* @return list of all dictionary meta chunks which contains information for each segment
* @throws IOException if an I/O error occurs
*/
@Override
public List<CarbonDictionaryColumnMetaChunk> read() throws IOException {
List<CarbonDictionaryColumnMetaChunk> dictionaryMetaChunks =
new ArrayList<CarbonDictionaryColumnMetaChunk>(
CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
CarbonDictionaryColumnMetaChunk columnMetaChunk = null;
ColumnDictionaryChunkMeta dictionaryChunkMeta = null;
// open dictionary meta thrift reader
openThriftReader();
// read till dictionary chunk count
while (dictionaryMetadataFileReader.hasNext()) {
// get the thrift object for dictionary chunk
dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read();
// create a new instance of chunk meta wrapper using thrift object
columnMetaChunk = getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
dictionaryMetaChunks.add(columnMetaChunk);
}
return dictionaryMetaChunks;
}
/**
* This method will be used to read only the last entry of dictionary meta chunk.
* Applicable scenarios :
* 1. Global dictionary generation for incremental load. In this case only the
* last dictionary chunk meta entry has to be read to calculate min, max surrogate
* key and start and end offset for the new dictionary chunk.
* 2. Truncate operation. While writing dictionary file in case of incremental load
* dictionary file needs to be validated for any inconsistency. Here end offset of last
* dictionary chunk meta is validated with file size.
*
* @return last segment entry for dictionary chunk
* @throws IOException if an I/O error occurs
*/
@Override
public CarbonDictionaryColumnMetaChunk readLastEntryOfDictionaryMetaChunk()
throws IOException {
ColumnDictionaryChunkMeta dictionaryChunkMeta = null;
// open dictionary meta thrift reader
openThriftReader();
// at the completion of while loop we will get the last dictionary chunk entry
while (dictionaryMetadataFileReader.hasNext()) {
// get the thrift object for dictionary chunk
dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read();
}
if (null == dictionaryChunkMeta) {
throw new IOException("Last dictionary chunk does not exist");
}
// create a new instance of chunk meta wrapper using thrift object
return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
}
@Override
public CarbonDictionaryColumnMetaChunk readEntryOfDictionaryMetaChunk(long end_Offset)
throws IOException {
ColumnDictionaryChunkMeta dictionaryChunkMeta = null;
// open dictionary meta thrift reader
openThriftReader();
// at the completion of while loop we will get the last dictionary chunk entry
while (dictionaryMetadataFileReader.hasNext()) {
// get the thrift object for dictionary chunk
dictionaryChunkMeta = (ColumnDictionaryChunkMeta) dictionaryMetadataFileReader.read();
if (dictionaryChunkMeta.end_offset >= end_Offset) {
break;
}
}
if (null == dictionaryChunkMeta) {
throw new IOException("Matching dictionary chunk does not exist");
}
// create a new instance of chunk meta wrapper using thrift object
return getNewInstanceOfCarbonDictionaryColumnMetaChunk(dictionaryChunkMeta);
}
/**
* Closes this stream and releases any system resources associated
* with it. If the stream is already closed then invoking this
* method has no effect.
*
* @throws IOException if an I/O error occurs
*/
@Override
public void close() throws IOException {
if (null != dictionaryMetadataFileReader) {
dictionaryMetadataFileReader.close();
dictionaryMetadataFileReader = null;
}
}
/**
* This method will form the path for dictionary metadata file for a given column
*/
protected void initFileLocation() {
this.columnDictionaryMetadataFilePath =
dictionaryColumnUniqueIdentifier.getDictionaryMetaFilePath();
}
/**
* This method will open the dictionary file stream for reading
*
* @throws IOException thrift reader open method throws IOException
*/
private void openThriftReader() throws IOException {
// initialise dictionary file reader which will return dictionary thrift object
// dictionary thrift object contains a list of byte buffer
if (null == dictionaryMetadataFileReader) {
dictionaryMetadataFileReader =
new ThriftReader(this.columnDictionaryMetadataFilePath, new ThriftReader.TBaseCreator() {
@Override
public TBase create() {
return new ColumnDictionaryChunkMeta();
}
});
// Open it
dictionaryMetadataFileReader.open();
}
}
/**
* Given a thrift object thie method will create a new wrapper class object
* for dictionary chunk
*
* @param dictionaryChunkMeta reference for chunk meta thrift object
* @return wrapper object of dictionary chunk meta
*/
private CarbonDictionaryColumnMetaChunk getNewInstanceOfCarbonDictionaryColumnMetaChunk(
ColumnDictionaryChunkMeta dictionaryChunkMeta) {
return new CarbonDictionaryColumnMetaChunk(dictionaryChunkMeta.getMin_surrogate_key(),
dictionaryChunkMeta.getMax_surrogate_key(), dictionaryChunkMeta.getStart_offset(),
dictionaryChunkMeta.getEnd_offset(), dictionaryChunkMeta.getChunk_count());
}
}