blob: ea2a312fe911ab486ce777a1a56fe8527c1a81ce [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.crunch.lib;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import org.apache.crunch.PTable;
import org.apache.crunch.Pair;
import org.apache.crunch.impl.mem.MemPipeline;
import org.apache.crunch.lib.Quantiles.Result;
import org.junit.Test;
import java.util.Map;
import static org.apache.crunch.types.avro.Avros.*;
import static org.junit.Assert.assertEquals;
public class QuantilesTest {
private static <T> Quantiles.Result<T> result(long count, Pair<Double, T>... quantiles) {
return new Quantiles.Result<T>(count, Lists.newArrayList(quantiles));
}
@Test
public void testQuantilesExact() {
PTable<String, Integer> testTable = MemPipeline.typedTableOf(
tableOf(strings(), ints()),
"a", 5,
"a", 2,
"a", 3,
"a", 4,
"a", 1);
Map<String, Result<Integer>> actualS = Quantiles.distributed(testTable, 0, 0.5, 1.0).materializeToMap();
Map<String, Result<Integer>> actualM = Quantiles.inMemory(testTable, 0, 0.5, 1.0).materializeToMap();
Map<String, Result<Integer>> expected = ImmutableMap.of(
"a", result(5, Pair.of(0.0, 1), Pair.of(0.5, 3), Pair.of(1.0, 5))
);
assertEquals(expected, actualS);
assertEquals(expected, actualM);
}
@Test
public void testQuantilesBetween() {
PTable<String, Integer> testTable = MemPipeline.typedTableOf(
tableOf(strings(), ints()),
"a", 5,
"a", 2, // We expect the 0.5 to correspond to this element, according to the "nearest rank" %ile definition.
"a", 4,
"a", 1);
Map<String, Result<Integer>> actualS = Quantiles.distributed(testTable, 0.5).materializeToMap();
Map<String, Result<Integer>> actualM = Quantiles.inMemory(testTable, 0.5).materializeToMap();
Map<String, Result<Integer>> expected = ImmutableMap.of(
"a", result(4, Pair.of(0.5, 2))
);
assertEquals(expected, actualS);
assertEquals(expected, actualM);
}
@Test
public void testQuantilesNines() {
PTable<String, Integer> testTable = MemPipeline.typedTableOf(
tableOf(strings(), ints()),
"a", 10,
"a", 20,
"a", 30,
"a", 40,
"a", 50,
"a", 60,
"a", 70,
"a", 80,
"a", 90,
"a", 100);
Map<String, Result<Integer>> actualS = Quantiles.distributed(testTable, 0.9, 0.99).materializeToMap();
Map<String, Result<Integer>> actualM = Quantiles.inMemory(testTable, 0.9, 0.99).materializeToMap();
Map<String, Result<Integer>> expected = ImmutableMap.of(
"a", result(10, Pair.of(0.9, 90), Pair.of(0.99, 100))
);
assertEquals(expected, actualS);
assertEquals(expected, actualM);
}
@Test
public void testQuantilesLessThanOrEqual() {
PTable<String, Integer> testTable = MemPipeline.typedTableOf(
tableOf(strings(), ints()),
"a", 10,
"a", 20,
"a", 30,
"a", 40,
"a", 50,
"a", 60,
"a", 70,
"a", 80,
"a", 90,
"a", 100);
Map<String, Result<Integer>> actualS = Quantiles.distributed(testTable, 0.5).materializeToMap();
Map<String, Result<Integer>> actualM = Quantiles.inMemory(testTable, 0.5).materializeToMap();
Map<String, Result<Integer>> expected = ImmutableMap.of(
"a", result(10, Pair.of(0.5, 50))
);
assertEquals(expected, actualS);
assertEquals(expected, actualM);
}
}