src/java/org/apache/cassandra/io/sstable/Downsampling.java - cassandra - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.cassandra.io.sstable;

 import java.util.*;

 public class Downsampling
 {
     /**
      * The base (down)sampling level determines the granularity at which we can down/upsample.
      *
      * A higher number allows us to approximate more closely the ideal sampling.  (It could also mean we do a lot of
      * expensive almost-no-op resamplings from N to N-1, but the thresholds in IndexSummaryManager prevent that.)
      *
      * BSL must be a power of two in order to have good sampling patterns. This cannot be changed without rebuilding
      * all index summaries at full sampling; for now we treat it as a constant.
      */
     public static final int BASE_SAMPLING_LEVEL = 128;

     private static final Map<Integer, List<Integer>> samplePatternCache = new HashMap<>();

     private static final Map<Integer, List<Integer>> originalIndexCache = new HashMap<>();

     /**
      * Gets a list L of starting indices for downsampling rounds: the first round should start with the offset
      * given by L[0], the second by the offset in L[1], etc.
      *
      * @param samplingLevel the base sampling level
      *
      * @return A list of `samplingLevel` unique indices between 0 and `samplingLevel`
      */
     public static List<Integer> getSamplingPattern(int samplingLevel)
     {
         List<Integer> pattern = samplePatternCache.get(samplingLevel);
         if (pattern != null)
             return pattern;

         if (samplingLevel <= 1)
             return Arrays.asList(0);

         int[] odds = new int[samplingLevel / 2];
         int[] evens = new int[samplingLevel / 2];
         for (int i = 1; i < samplingLevel; i += 2)
             odds[i/2] = i;
         for (int i = 0; i < samplingLevel; i += 2)
             evens[i/2] = i;

         // especially for latter rounds, it's important that we spread out the start points, so we'll
         // make a recursive call to get an ordering for this list of start points
         List<Integer> ordering = getSamplingPattern(samplingLevel/2);
         List<Integer> startIndices = new ArrayList<>(samplingLevel);

         for (Integer index : ordering)
             startIndices.add(odds[index]);
         for (Integer index : ordering)
             startIndices.add(evens[index]);

         samplePatternCache.put(samplingLevel, startIndices);
         return startIndices;
     }

     /**
      * Returns a list that can be used to translate current index summary indexes to their original index before
      * downsampling.  (This repeats every `samplingLevel`, so that's how many entries we return.)
      *
      * For example, if [0, 64] is returned, the current index summary entry at index 0 was originally
      * at index 0, and the current index 1 was originally at index 64.
      *
      * @param samplingLevel the current sampling level for the index summary
      *
      * @return a list of original indexes for current summary entries
      */
     public static List<Integer> getOriginalIndexes(int samplingLevel)
     {
         List<Integer> originalIndexes = originalIndexCache.get(samplingLevel);
         if (originalIndexes != null)
             return originalIndexes;

         List<Integer> pattern = getSamplingPattern(BASE_SAMPLING_LEVEL).subList(0, BASE_SAMPLING_LEVEL - samplingLevel);
         originalIndexes = new ArrayList<>(samplingLevel);
         for (int j = 0; j < BASE_SAMPLING_LEVEL; j++)
         {
             if (!pattern.contains(j))
                 originalIndexes.add(j);
         }

         originalIndexCache.put(samplingLevel, originalIndexes);
         return originalIndexes;
     }

     /**
      * Calculates the effective index interval after the entry at `index` in an IndexSummary.  In other words, this
      * returns the number of partitions in the primary on-disk index before the next partition that has an entry in
      * the index summary.  If samplingLevel == BASE_SAMPLING_LEVEL, this will be equal to the index interval.
      * @param index an index into an IndexSummary
      * @param samplingLevel the current sampling level for that IndexSummary
      * @param minIndexInterval the min index interval (effective index interval at full sampling)
      * @return the number of partitions before the next index summary entry, inclusive on one end
      */
     public static int getEffectiveIndexIntervalAfterIndex(int index, int samplingLevel, int minIndexInterval)
     {
         assert index >= 0;
         index %= samplingLevel;
         List<Integer> originalIndexes = getOriginalIndexes(samplingLevel);
         int nextEntryOriginalIndex = (index == originalIndexes.size() - 1) ? BASE_SAMPLING_LEVEL : originalIndexes.get(index + 1);
         return (nextEntryOriginalIndex - originalIndexes.get(index)) * minIndexInterval;
     }

     public static int[] getStartPoints(int currentSamplingLevel, int newSamplingLevel)
     {
         List<Integer> allStartPoints = getSamplingPattern(BASE_SAMPLING_LEVEL);

         // calculate starting indexes for sampling rounds
         int initialRound = BASE_SAMPLING_LEVEL - currentSamplingLevel;
         int numRounds = Math.abs(currentSamplingLevel - newSamplingLevel);
         int[] startPoints = new int[numRounds];
         for (int i = 0; i < numRounds; ++i)
         {
             int start = allStartPoints.get(initialRound + i);

             // our "ideal" start points will be affected by the removal of items in earlier rounds, so go through all
             // earlier rounds, and if we see an index that comes before our ideal start point, decrement the start point
             int adjustment = 0;
             for (int j = 0; j < initialRound; ++j)
             {
                 if (allStartPoints.get(j) < start)
                     adjustment++;
             }
             startPoints[i] = start - adjustment;
         }
         return startPoints;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.cassandra.io.sstable;

	import java.util.*;

	public class Downsampling
	{
	/**
	* The base (down)sampling level determines the granularity at which we can down/upsample.
	*
	* A higher number allows us to approximate more closely the ideal sampling. (It could also mean we do a lot of
	* expensive almost-no-op resamplings from N to N-1, but the thresholds in IndexSummaryManager prevent that.)
	*
	* BSL must be a power of two in order to have good sampling patterns. This cannot be changed without rebuilding
	* all index summaries at full sampling; for now we treat it as a constant.
	*/
	public static final int BASE_SAMPLING_LEVEL = 128;

	private static final Map<Integer, List<Integer>> samplePatternCache = new HashMap<>();

	private static final Map<Integer, List<Integer>> originalIndexCache = new HashMap<>();

	/**
	* Gets a list L of starting indices for downsampling rounds: the first round should start with the offset
	* given by L[0], the second by the offset in L[1], etc.
	*
	* @param samplingLevel the base sampling level
	*
	* @return A list of `samplingLevel` unique indices between 0 and `samplingLevel`
	*/
	public static List<Integer> getSamplingPattern(int samplingLevel)
	{
	List<Integer> pattern = samplePatternCache.get(samplingLevel);
	if (pattern != null)
	return pattern;

	if (samplingLevel <= 1)
	return Arrays.asList(0);

	int[] odds = new int[samplingLevel / 2];
	int[] evens = new int[samplingLevel / 2];
	for (int i = 1; i < samplingLevel; i += 2)
	odds[i/2] = i;
	for (int i = 0; i < samplingLevel; i += 2)
	evens[i/2] = i;

	// especially for latter rounds, it's important that we spread out the start points, so we'll
	// make a recursive call to get an ordering for this list of start points
	List<Integer> ordering = getSamplingPattern(samplingLevel/2);
	List<Integer> startIndices = new ArrayList<>(samplingLevel);

	for (Integer index : ordering)
	startIndices.add(odds[index]);
	for (Integer index : ordering)
	startIndices.add(evens[index]);

	samplePatternCache.put(samplingLevel, startIndices);
	return startIndices;
	}

	/**
	* Returns a list that can be used to translate current index summary indexes to their original index before
	* downsampling. (This repeats every `samplingLevel`, so that's how many entries we return.)
	*
	* For example, if [0, 64] is returned, the current index summary entry at index 0 was originally
	* at index 0, and the current index 1 was originally at index 64.
	*
	* @param samplingLevel the current sampling level for the index summary
	*
	* @return a list of original indexes for current summary entries
	*/
	public static List<Integer> getOriginalIndexes(int samplingLevel)
	{
	List<Integer> originalIndexes = originalIndexCache.get(samplingLevel);
	if (originalIndexes != null)
	return originalIndexes;

	List<Integer> pattern = getSamplingPattern(BASE_SAMPLING_LEVEL).subList(0, BASE_SAMPLING_LEVEL - samplingLevel);
	originalIndexes = new ArrayList<>(samplingLevel);
	for (int j = 0; j < BASE_SAMPLING_LEVEL; j++)
	{
	if (!pattern.contains(j))
	originalIndexes.add(j);
	}

	originalIndexCache.put(samplingLevel, originalIndexes);
	return originalIndexes;
	}

	/**
	* Calculates the effective index interval after the entry at `index` in an IndexSummary. In other words, this
	* returns the number of partitions in the primary on-disk index before the next partition that has an entry in
	* the index summary. If samplingLevel == BASE_SAMPLING_LEVEL, this will be equal to the index interval.
	* @param index an index into an IndexSummary
	* @param samplingLevel the current sampling level for that IndexSummary
	* @param minIndexInterval the min index interval (effective index interval at full sampling)
	* @return the number of partitions before the next index summary entry, inclusive on one end
	*/
	public static int getEffectiveIndexIntervalAfterIndex(int index, int samplingLevel, int minIndexInterval)
	{
	assert index >= 0;
	index %= samplingLevel;
	List<Integer> originalIndexes = getOriginalIndexes(samplingLevel);
	int nextEntryOriginalIndex = (index == originalIndexes.size() - 1) ? BASE_SAMPLING_LEVEL : originalIndexes.get(index + 1);
	return (nextEntryOriginalIndex - originalIndexes.get(index)) * minIndexInterval;
	}

	public static int[] getStartPoints(int currentSamplingLevel, int newSamplingLevel)
	{
	List<Integer> allStartPoints = getSamplingPattern(BASE_SAMPLING_LEVEL);

	// calculate starting indexes for sampling rounds
	int initialRound = BASE_SAMPLING_LEVEL - currentSamplingLevel;
	int numRounds = Math.abs(currentSamplingLevel - newSamplingLevel);
	int[] startPoints = new int[numRounds];
	for (int i = 0; i < numRounds; ++i)
	{
	int start = allStartPoints.get(initialRound + i);

	// our "ideal" start points will be affected by the removal of items in earlier rounds, so go through all
	// earlier rounds, and if we see an index that comes before our ideal start point, decrement the start point
	int adjustment = 0;
	for (int j = 0; j < initialRound; ++j)
	{
	if (allStartPoints.get(j) < start)
	adjustment++;
	}
	startPoints[i] = start - adjustment;
	}
	return startPoints;
	}
	}