blob: bbe76596f422f995ebacac036c740b7b1cb1bb92 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.carbondata.core.writer.sortindex;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.carbondata.core.cache.dictionary.Dictionary;
import org.apache.carbondata.core.cache.dictionary.DictionaryChunksWrapper;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.commons.lang.ArrayUtils;
/**
* The class prepares the column sort info ie sortIndex
* and inverted sort index info
*/
public class CarbonDictionarySortInfoPreparator {
/**
* The method returns the column Sort Info
*
* @param newDistinctValues new distinct value to be added
* @param dictionary old distinct values
* @param dataType DataType of columns
* @return CarbonDictionarySortInfo returns the column Sort Info
*/
public CarbonDictionarySortInfo getDictionarySortInfo(List<String> newDistinctValues,
Dictionary dictionary, DataType dataType) {
CarbonDictionarySortModel[] dictionarySortModels =
prepareDictionarySortModels(newDistinctValues, dictionary, dataType);
return createColumnSortInfo(dictionarySortModels);
}
/**
* The method prepares the sort_index and sort_index_inverted data
*
* @param dictionarySortModels
*/
private CarbonDictionarySortInfo createColumnSortInfo(
CarbonDictionarySortModel[] dictionarySortModels) {
//Sort index after members are sorted
int[] sortIndex;
//inverted sort index to get the member
int[] sortIndexInverted;
Arrays.sort(dictionarySortModels);
sortIndex = new int[dictionarySortModels.length];
sortIndexInverted = new int[dictionarySortModels.length];
for (int i = 0; i < dictionarySortModels.length; i++) {
CarbonDictionarySortModel dictionarySortModel = dictionarySortModels[i];
sortIndex[i] = dictionarySortModel.getKey();
// the array index starts from 0 therefore -1 is done to avoid wastage
// of 0th index in array and surrogate key starts from 1 there 1 is added to i
// which is a counter starting from 0
sortIndexInverted[dictionarySortModel.getKey() - 1] = i + 1;
}
dictionarySortModels = null;
List<Integer> sortIndexList = convertToList(sortIndex);
List<Integer> sortIndexInvertedList = convertToList(sortIndexInverted);
return new CarbonDictionarySortInfo(sortIndexList, sortIndexInvertedList);
}
/**
* The method converts the int[] to List<Integer>
*
* @param data
* @return
*/
private List<Integer> convertToList(int[] data) {
Integer[] wrapperType = ArrayUtils.toObject(data);
return Arrays.asList(wrapperType);
}
/**
* The method returns the array of CarbonDictionarySortModel
*
* @param distinctValues new distinct values
* @param dictionary The wrapper wraps the list<list<bye[]>> and provide the
* iterator to retrieve the chunks members.
* @param dataType DataType of columns
* @return CarbonDictionarySortModel[] CarbonDictionarySortModel[] the model
* CarbonDictionarySortModel contains the member's surrogate and
* its byte value
*/
private CarbonDictionarySortModel[] prepareDictionarySortModels(List<String> distinctValues,
Dictionary dictionary, DataType dataType) {
CarbonDictionarySortModel[] dictionarySortModels = null;
//The wrapper wraps the list<list<bye[]>> and provide the iterator to
// retrieve the chunks members.
int surrogate = 1;
if (null != dictionary) {
DictionaryChunksWrapper dictionaryChunksWrapper = dictionary.getDictionaryChunks();
dictionarySortModels =
new CarbonDictionarySortModel[dictionaryChunksWrapper.getSize() + distinctValues.size()];
while (dictionaryChunksWrapper.hasNext()) {
dictionarySortModels[surrogate - 1] =
createDictionarySortModel(surrogate, dataType, dictionaryChunksWrapper.next());
surrogate++;
}
} else {
dictionarySortModels = new CarbonDictionarySortModel[distinctValues.size()];
}
// for new distinct values
Iterator<String> distinctValue = distinctValues.iterator();
while (distinctValue.hasNext()) {
dictionarySortModels[surrogate - 1] = createDictionarySortModel(surrogate, dataType,
distinctValue.next().getBytes(Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET)));
surrogate++;
}
return dictionarySortModels;
}
/**
*
* @param surrogate
* @param dataType
* @param value member value
* @return CarbonDictionarySortModel
*/
private CarbonDictionarySortModel createDictionarySortModel(int surrogate, DataType dataType,
byte[] value) {
String memberValue = new String(value, Charset.forName(CarbonCommonConstants.DEFAULT_CHARSET));
return new CarbonDictionarySortModel(surrogate, dataType, memberValue);
}
}