blob: 8fb7dc62303c4340c06fa1eefe6c651556dd1d36 [file] [log] [blame]
package edu.uci.ics.asterix.runtime.evaluators.functions;
import java.io.IOException;
import edu.uci.ics.asterix.builders.IAOrderedListBuilder;
import edu.uci.ics.asterix.builders.OrderedListBuilder;
import edu.uci.ics.asterix.common.functions.FunctionConstants;
import edu.uci.ics.asterix.dataflow.data.nontagged.serde.AFloatSerializerDeserializer;
import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
import edu.uci.ics.asterix.om.base.ABoolean;
import edu.uci.ics.asterix.om.types.AOrderedListType;
import edu.uci.ics.asterix.om.types.ATypeTag;
import edu.uci.ics.asterix.om.types.BuiltinType;
import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
import edu.uci.ics.asterix.runtime.evaluators.common.SimilarityJaccardEvaluator;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import edu.uci.ics.hyracks.algebricks.runtime.base.IEvaluator;
import edu.uci.ics.hyracks.algebricks.runtime.base.IEvaluatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ArrayBackedValueStorage;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IDataOutputProvider;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
//assumes that both arguments are sorted by the same ordering
public class SimilarityJaccardCheckDescriptor extends AbstractScalarFunctionDynamicDescriptor {
private static final long serialVersionUID = 1L;
private final static FunctionIdentifier FID = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
"similarity-jaccard-check", 3, true);
@Override
public IEvaluatorFactory createEvaluatorFactory(final IEvaluatorFactory[] args) throws AlgebricksException {
return new IEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
return new SimilarityJaccardCheckEvaluator(args, output);
}
};
}
@Override
public FunctionIdentifier getIdentifier() {
return FID;
}
private static class SimilarityJaccardCheckEvaluator extends SimilarityJaccardEvaluator {
private final IEvaluator jaccThreshEval;
private float jaccThresh = -1f;
private IAOrderedListBuilder listBuilder;
private ArrayBackedValueStorage inputVal;
@SuppressWarnings("unchecked")
private final ISerializerDeserializer<ABoolean> booleanSerde = AqlSerializerDeserializerProvider.INSTANCE
.getSerializerDeserializer(BuiltinType.ABOOLEAN);
private final AOrderedListType listType = new AOrderedListType(BuiltinType.ANY, "list");
public SimilarityJaccardCheckEvaluator(IEvaluatorFactory[] args, IDataOutputProvider output)
throws AlgebricksException {
super(args, output);
jaccThreshEval = args[2].createEvaluator(argOut);
listBuilder = new OrderedListBuilder();
inputVal = new ArrayBackedValueStorage();
}
@Override
protected void runArgEvals(IFrameTupleReference tuple) throws AlgebricksException {
super.runArgEvals(tuple);
int jaccThreshStart = argOut.getLength();
jaccThreshEval.evaluate(tuple);
jaccThresh = (float) AFloatSerializerDeserializer.getFloat(argOut.getBytes(), jaccThreshStart
+ typeIndicatorSize);
}
@Override
protected float computeResult(byte[] bytes, int firstStart, int secondStart, ATypeTag argType)
throws AlgebricksException {
firstListIter.reset(bytes, firstStart);
secondListIter.reset(bytes, secondStart);
// Check for special case where one of the lists is empty, since
// list types won't match.
if (firstListIter.size() == 0 || secondListIter.size() == 0) {
return (jaccThresh == 0.0f) ? 0.0f : -1.0f;
}
if (firstTypeTag == ATypeTag.ANY || secondTypeTag == ATypeTag.ANY)
throw new AlgebricksException("\n Jaccard can only be called on homogenous lists");
return jaccard.getSimilarity(firstListIter, secondListIter, jaccThresh);
}
@Override
protected void writeResult(float jacc) throws IOException {
listBuilder.reset(listType);
boolean matches = (jacc < 0) ? false : true;
inputVal.reset();
booleanSerde.serialize(matches ? ABoolean.TRUE : ABoolean.FALSE, inputVal.getDataOutput());
listBuilder.addItem(inputVal);
inputVal.reset();
aFloat.setValue((matches) ? jacc : 0.0f);
floatSerde.serialize(aFloat, inputVal.getDataOutput());
listBuilder.addItem(inputVal);
listBuilder.write(out, true);
}
}
}