blob: 573f8628ee5d74f69e14161dd391c14f4d6f8e23 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.rumen;
import java.util.ArrayList;
import org.junit.Test;
import static org.junit.Assert.*;
public class TestPiecewiseLinearInterpolation {
static private double maximumRelativeError = 0.002D;
static private LoggedSingleRelativeRanking makeRR(double ranking, long datum) {
LoggedSingleRelativeRanking result = new LoggedSingleRelativeRanking();
result.setDatum(datum);
result.setRelativeRanking(ranking);
return result;
}
@Test
public void testOneRun() {
LoggedDiscreteCDF input = new LoggedDiscreteCDF();
input.setMinimum(100000L);
input.setMaximum(1100000L);
ArrayList<LoggedSingleRelativeRanking> rankings = new ArrayList<LoggedSingleRelativeRanking>();
rankings.add(makeRR(0.1, 200000L));
rankings.add(makeRR(0.5, 800000L));
rankings.add(makeRR(0.9, 1000000L));
input.setRankings(rankings);
input.setNumberValues(3);
CDFRandomGenerator gen = new CDFPiecewiseLinearRandomGenerator(input);
Histogram values = new Histogram();
for (int i = 0; i < 1000000; ++i) {
long value = gen.randomValue();
values.enter(value);
}
/*
* Now we build a percentiles CDF, and compute the sum of the squares of the
* actual percentiles vrs. the predicted percentiles
*/
int[] percentiles = new int[99];
for (int i = 0; i < 99; ++i) {
percentiles[i] = i + 1;
}
long[] result = values.getCDF(100, percentiles);
long sumErrorSquares = 0L;
for (int i = 0; i < 10; ++i) {
long error = result[i] - (10000L * i + 100000L);
System.out.println("element " + i + ", got " + result[i] + ", expected "
+ (10000L * i + 100000L) + ", error = " + error);
sumErrorSquares += error * error;
}
for (int i = 10; i < 50; ++i) {
long error = result[i] - (15000L * i + 50000L);
System.out.println("element " + i + ", got " + result[i] + ", expected "
+ (15000L * i + 50000L) + ", error = " + error);
sumErrorSquares += error * error;
}
for (int i = 50; i < 90; ++i) {
long error = result[i] - (5000L * i + 550000L);
System.out.println("element " + i + ", got " + result[i] + ", expected "
+ (5000L * i + 550000L) + ", error = " + error);
sumErrorSquares += error * error;
}
for (int i = 90; i <= 100; ++i) {
long error = result[i] - (10000L * i + 100000L);
System.out.println("element " + i + ", got " + result[i] + ", expected "
+ (10000L * i + 100000L) + ", error = " + error);
sumErrorSquares += error * error;
}
// normalize the error
double realSumErrorSquares = (double) sumErrorSquares;
double normalizedError = realSumErrorSquares / 100
/ rankings.get(1).getDatum() / rankings.get(1).getDatum();
double RMSNormalizedError = Math.sqrt(normalizedError);
System.out.println("sumErrorSquares = " + sumErrorSquares);
System.out.println("normalizedError: " + normalizedError
+ ", RMSNormalizedError: " + RMSNormalizedError);
System.out.println("Cumulative error is " + RMSNormalizedError);
assertTrue("The RMS relative error per bucket, " + RMSNormalizedError
+ ", exceeds our tolerance of " + maximumRelativeError,
RMSNormalizedError <= maximumRelativeError);
}
}