processing/src/main/java/org/apache/druid/query/groupby/epinephelinae/vector/DictionaryBuildingSingleValueStringGroupByVectorColumnSelector.java - druid - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.druid.query.groupby.epinephelinae.vector;

 import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
 import org.apache.datasketches.memory.WritableMemory;
 import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.query.groupby.ResultRow;
 import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils;
 import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer;
 import org.apache.druid.segment.DimensionHandlerUtils;
 import org.apache.druid.segment.vector.VectorObjectSelector;

 import java.util.ArrayList;
 import java.util.List;

 /**
  * A {@link GroupByVectorColumnSelector} that builds an internal String<->Integer dictionary, used for grouping
  * single-valued STRING columns which are not natively dictionary encoded, e.g. expression virtual columns.
  *
  * This is effectively the {@link VectorGroupByEngine} analog of
  * {@link org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy} for
  * String columns
  */
 public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector implements GroupByVectorColumnSelector
 {
   private static final int GROUP_BY_MISSING_VALUE = -1;

   private final VectorObjectSelector selector;

   private final List<String> dictionary = new ArrayList<>();
   private final Object2IntOpenHashMap<String> reverseDictionary = new Object2IntOpenHashMap<>();

   public DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(VectorObjectSelector selector)
   {
     this.selector = selector;
     this.reverseDictionary.defaultReturnValue(-1);
   }

   @Override
   public int getGroupingKeySize()
   {
     return Integer.BYTES;
   }

   @Override
   public int writeKeys(
       final WritableMemory keySpace,
       final int keySize,
       final int keyOffset,
       final int startRow,
       final int endRow
   )
   {
     final Object[] vector = selector.getObjectVector();
     int stateFootprintIncrease = 0;

     for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) {
       final String value = DimensionHandlerUtils.convertObjectToString(vector[i]);
       final int dictId = reverseDictionary.getInt(value);
       if (dictId < 0) {
         final int nextId = dictionary.size();
         dictionary.add(value);
         reverseDictionary.put(value, nextId);
         keySpace.putInt(j, nextId);

         // Use same ROUGH_OVERHEAD_PER_DICTIONARY_ENTRY as the nonvectorized version; dictionary structure is the same.
         stateFootprintIncrease +=
             DictionaryBuildingUtils.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES);
       } else {
         keySpace.putInt(j, dictId);
       }
     }

     return stateFootprintIncrease;
   }

   @Override
   public void writeKeyToResultRow(
       final MemoryPointer keyMemory,
       final int keyOffset,
       final ResultRow resultRow,
       final int resultRowPosition
   )
   {
     final int id = keyMemory.memory().getInt(keyMemory.position() + keyOffset);
     // GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map.
     if (id != GROUP_BY_MISSING_VALUE) {
       final String value = dictionary.get(id);
       resultRow.set(resultRowPosition, value);
     } else {
       resultRow.set(resultRowPosition, NullHandling.defaultStringValue());
     }
   }

   @Override
   public void reset()
   {
     dictionary.clear();
     reverseDictionary.clear();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.druid.query.groupby.epinephelinae.vector;

	import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
	import org.apache.datasketches.memory.WritableMemory;
	import org.apache.druid.common.config.NullHandling;
	import org.apache.druid.query.groupby.ResultRow;
	import org.apache.druid.query.groupby.epinephelinae.DictionaryBuildingUtils;
	import org.apache.druid.query.groupby.epinephelinae.collection.MemoryPointer;
	import org.apache.druid.segment.DimensionHandlerUtils;
	import org.apache.druid.segment.vector.VectorObjectSelector;

	import java.util.ArrayList;
	import java.util.List;

	/**
	* A {@link GroupByVectorColumnSelector} that builds an internal String<->Integer dictionary, used for grouping
	* single-valued STRING columns which are not natively dictionary encoded, e.g. expression virtual columns.
	*
	* This is effectively the {@link VectorGroupByEngine} analog of
	* {@link org.apache.druid.query.groupby.epinephelinae.column.DictionaryBuildingGroupByColumnSelectorStrategy} for
	* String columns
	*/
	public class DictionaryBuildingSingleValueStringGroupByVectorColumnSelector implements GroupByVectorColumnSelector
	{
	private static final int GROUP_BY_MISSING_VALUE = -1;

	private final VectorObjectSelector selector;

	private final List<String> dictionary = new ArrayList<>();
	private final Object2IntOpenHashMap<String> reverseDictionary = new Object2IntOpenHashMap<>();

	public DictionaryBuildingSingleValueStringGroupByVectorColumnSelector(VectorObjectSelector selector)
	{
	this.selector = selector;
	this.reverseDictionary.defaultReturnValue(-1);
	}

	@Override
	public int getGroupingKeySize()
	{
	return Integer.BYTES;
	}

	@Override
	public int writeKeys(
	final WritableMemory keySpace,
	final int keySize,
	final int keyOffset,
	final int startRow,
	final int endRow
	)
	{
	final Object[] vector = selector.getObjectVector();
	int stateFootprintIncrease = 0;

	for (int i = startRow, j = keyOffset; i < endRow; i++, j += keySize) {
	final String value = DimensionHandlerUtils.convertObjectToString(vector[i]);
	final int dictId = reverseDictionary.getInt(value);
	if (dictId < 0) {
	final int nextId = dictionary.size();
	dictionary.add(value);
	reverseDictionary.put(value, nextId);
	keySpace.putInt(j, nextId);

	// Use same ROUGH_OVERHEAD_PER_DICTIONARY_ENTRY as the nonvectorized version; dictionary structure is the same.
	stateFootprintIncrease +=
	DictionaryBuildingUtils.estimateEntryFootprint((value == null ? 0 : value.length()) * Character.BYTES);
	} else {
	keySpace.putInt(j, dictId);
	}
	}

	return stateFootprintIncrease;
	}

	@Override
	public void writeKeyToResultRow(
	final MemoryPointer keyMemory,
	final int keyOffset,
	final ResultRow resultRow,
	final int resultRowPosition
	)
	{
	final int id = keyMemory.memory().getInt(keyMemory.position() + keyOffset);
	// GROUP_BY_MISSING_VALUE is used to indicate empty rows, which are omitted from the result map.
	if (id != GROUP_BY_MISSING_VALUE) {
	final String value = dictionary.get(id);
	resultRow.set(resultRowPosition, value);
	} else {
	resultRow.set(resultRowPosition, NullHandling.defaultStringValue());
	}
	}

	@Override
	public void reset()
	{
	dictionary.clear();
	reverseDictionary.clear();
	}
	}