blob: ac6b8542e0614fa3f05ef0d6d28ff6d71bee6cfa [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.math3.distribution.IntegerDistribution;
import org.apache.commons.math3.distribution.AbstractRealDistribution;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import org.apache.solr.client.solrj.io.Tuple;
public class OutliersEvaluator extends RecursiveObjectEvaluator implements ManyValueWorker {
protected static final long serialVersionUID = 1L;
public OutliersEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{
super(expression, factory);
}
@Override
@SuppressWarnings({"unchecked", "rawtypes"})
public Object doWork(Object... values) throws IOException{
if(values.length < 4) {
throw new IOException("The outliers function requires 4 parameters");
}
Object dist = values[0];
List<Number> vec = null;
if(values[1] instanceof List) {
vec = (List<Number>)values[1];
} else {
throw new IOException("The second parameter of the outliers function is the numeric array to be tested for outliers.");
}
double low = 0.0;
if(values[2] instanceof Number) {
low = ((Number)values[2]).doubleValue();
} else {
throw new IOException("The third parameter of the outliers function is a number for the low outlier threshold.");
}
double hi = 0.0;
if(values[3] instanceof Number) {
hi = ((Number)values[3]).doubleValue();
} else {
throw new IOException("The fourth parameter of the outliers function is a number for the high outlier threshold");
}
List<Tuple> tuples = null;
if(values.length ==5) {
if(values[4] instanceof List) {
tuples = (List<Tuple>) values[4];
} else {
throw new IOException("The optional fifth parameter of the outliers function is an array of Tuples that are paired with the numeric array of values to be tested.");
}
} else {
tuples = new ArrayList<>();
for(int i=0; i<vec.size(); i++) {
tuples.add(new Tuple());
}
}
List<Tuple> outliers = new ArrayList<>();
if(dist instanceof IntegerDistribution) {
IntegerDistribution d = (IntegerDistribution) dist;
for(int i=0; i<vec.size(); i++) {
Number n = vec.get(i);
Tuple t = tuples.get(i);
double cumProb = d.cumulativeProbability(n.intValue());
if(low >= 0 && cumProb <= low) {
t.put("lowOutlierValue_d", n);
t.put("cumulativeProbablity_d", cumProb);
outliers.add(t);
}
if(hi >= 0 && cumProb >= hi) {
t.put("highOutlierValue_d", n);
t.put("cumulativeProbablity_d", cumProb);
outliers.add(t);
}
}
return outliers;
} else if(dist instanceof AbstractRealDistribution) {
AbstractRealDistribution d = (AbstractRealDistribution)dist;
for(int i=0; i<vec.size(); i++) {
Number n = vec.get(i);
Tuple t = tuples.get(i);
double cumProb = d.cumulativeProbability(n.doubleValue());
if(low >= 0 && cumProb <= low) {
t.put("lowOutlierValue_d", n);
t.put("cumulativeProbablity_d", cumProb);
outliers.add(t);
}
if(hi >= 0 && cumProb >= hi) {
t.put("highOutlierValue_d", n);
t.put("cumulativeProbablity_d", cumProb);
outliers.add(t);
}
}
return outliers;
} else {
throw new IOException("The first parameter of the outliers function must be a real or integer probability distribution");
}
}
}