| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.carbondata.datamap.bloom; |
| |
| import java.io.IOException; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.carbondata.common.annotations.InterfaceAudience; |
| import org.apache.carbondata.core.datamap.Segment; |
| import org.apache.carbondata.core.datastore.block.SegmentProperties; |
| import org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter; |
| import org.apache.carbondata.core.metadata.datatype.DataTypes; |
| import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn; |
| import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension; |
| import org.apache.carbondata.core.util.CarbonUtil; |
| import org.apache.carbondata.core.util.DataTypeUtil; |
| |
| import org.apache.commons.collections.CollectionUtils; |
| import org.apache.commons.collections.Predicate; |
| |
| /** |
| * BloomDataMap is constructed in CG level (blocklet level). |
| * For each indexed column, a bloom filter is constructed to indicate whether a value |
| * belongs to this blocklet. Bloom filter of blocklet that belongs to same block will |
| * be written to one index file suffixed with .bloomindex. So the number |
| * of bloom index file will be equal to that of the blocks. |
| */ |
| @InterfaceAudience.Internal |
| public class BloomDataMapWriter extends AbstractBloomDataMapWriter { |
| private ColumnarSplitter columnarSplitter; |
| // for the dict/sort/date column, they are encoded in MDK, |
| // this maps the index column name to the index in MDK |
| private Map<String, Integer> indexCol2MdkIdx; |
| |
| BloomDataMapWriter(String tablePath, String dataMapName, List<CarbonColumn> indexColumns, |
| Segment segment, String shardName, SegmentProperties segmentProperties, |
| int bloomFilterSize, double bloomFilterFpp, boolean compressBloom) |
| throws IOException { |
| super(tablePath, dataMapName, indexColumns, segment, shardName, segmentProperties, |
| bloomFilterSize, bloomFilterFpp, compressBloom); |
| |
| columnarSplitter = segmentProperties.getFixedLengthKeySplitter(); |
| this.indexCol2MdkIdx = new HashMap<>(); |
| int idx = 0; |
| for (final CarbonDimension dimension : segmentProperties.getDimensions()) { |
| if (!dimension.isGlobalDictionaryEncoding() && !dimension.isDirectDictionaryEncoding()) { |
| continue; |
| } |
| boolean isExistInIndex = CollectionUtils.exists(indexColumns, new Predicate() { |
| @Override |
| public boolean evaluate(Object object) { |
| return ((CarbonColumn) object).getColName().equalsIgnoreCase(dimension.getColName()); |
| } |
| }); |
| if (isExistInIndex) { |
| this.indexCol2MdkIdx.put(dimension.getColName(), idx); |
| } |
| idx++; |
| } |
| } |
| |
| protected byte[] convertNonDictionaryValue(int indexColIdx, Object value) { |
| if (DataTypes.VARCHAR == indexColumns.get(indexColIdx).getDataType()) { |
| return DataConvertUtil.getRawBytesForVarchar((byte[]) value); |
| } else if (DataTypeUtil.isPrimitiveColumn(indexColumns.get(indexColIdx).getDataType())) { |
| // get bytes for the original value of the no dictionary column |
| return CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); |
| } else { |
| return DataConvertUtil.getRawBytes((byte[]) value); |
| } |
| } |
| |
| @Override |
| protected byte[] convertDictionaryValue(int indexColIdx, Object value) { |
| // input value from onPageAdded in load process is byte[] |
| |
| // for dict columns including dictionary and date columns decode value to get the surrogate key |
| int thisKeyIdx = indexCol2MdkIdx.get(indexColumns.get(indexColIdx).getColName()); |
| int surrogateKey = CarbonUtil.getSurrogateInternal((byte[]) value, 0, |
| columnarSplitter.getBlockKeySize()[thisKeyIdx]); |
| // store the dictionary key in bloom |
| return CarbonUtil.getValueAsBytes(DataTypes.INT, surrogateKey); |
| } |
| } |