blob: 0f8be91ee217d4d6384bbc189909587405863e6c [file] [log] [blame]
package edu.uci.ics.asterix.runtime.evaluators.common;
import java.io.DataOutput;
import java.io.IOException;
import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
import edu.uci.ics.asterix.om.base.AInt32;
import edu.uci.ics.asterix.om.base.AMutableInt32;
import edu.uci.ics.asterix.om.types.ATypeTag;
import edu.uci.ics.asterix.om.types.BuiltinType;
import edu.uci.ics.asterix.om.types.EnumDeserializer;
import edu.uci.ics.fuzzyjoin.similarity.SimilarityMetricEditDistance;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.runtime.base.IEvaluator;
import edu.uci.ics.hyracks.algebricks.runtime.base.IEvaluatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ArrayBackedValueStorage;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IDataOutputProvider;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
public class EditDistanceEvaluator implements IEvaluator {
// assuming type indicator in serde format
protected final int typeIndicatorSize = 1;
protected final DataOutput out;
protected final ArrayBackedValueStorage argOut = new ArrayBackedValueStorage();
protected final IEvaluator firstStringEval;
protected final IEvaluator secondStringEval;
protected final SimilarityMetricEditDistance ed = new SimilarityMetricEditDistance();
protected final AsterixOrderedListIterator firstOrdListIter = new AsterixOrderedListIterator();
protected final AsterixOrderedListIterator secondOrdListIter = new AsterixOrderedListIterator();
protected int editDistance = 0;
protected final AMutableInt32 aInt32 = new AMutableInt32(-1);
@SuppressWarnings("unchecked")
protected final ISerializerDeserializer<AInt32> int32Serde = AqlSerializerDeserializerProvider.INSTANCE
.getSerializerDeserializer(BuiltinType.AINT32);
protected ATypeTag itemTypeTag;
protected int firstStart = -1;
protected int secondStart = -1;
protected ATypeTag firstTypeTag;
protected ATypeTag secondTypeTag;
public EditDistanceEvaluator(IEvaluatorFactory[] args, IDataOutputProvider output) throws AlgebricksException {
out = output.getDataOutput();
firstStringEval = args[0].createEvaluator(argOut);
secondStringEval = args[1].createEvaluator(argOut);
}
@Override
public void evaluate(IFrameTupleReference tuple) throws AlgebricksException {
runArgEvals(tuple);
if (!checkArgTypes(firstTypeTag, secondTypeTag))
return;
itemTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getBytes()[firstStart + 1]);
if (itemTypeTag == ATypeTag.ANY)
throw new AlgebricksException("\n Edit Distance can only be called on homogenous lists");
itemTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getBytes()[secondStart + 1]);
if (itemTypeTag == ATypeTag.ANY)
throw new AlgebricksException("\n Edit Distance can only be called on homogenous lists");
editDistance = computeResult(argOut.getBytes(), firstStart, secondStart, firstTypeTag);
try {
writeResult(editDistance);
} catch (IOException e) {
throw new AlgebricksException(e);
}
}
protected void runArgEvals(IFrameTupleReference tuple) throws AlgebricksException {
argOut.reset();
firstStart = argOut.getLength();
firstStringEval.evaluate(tuple);
secondStart = argOut.getLength();
secondStringEval.evaluate(tuple);
firstTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getBytes()[firstStart]);
secondTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getBytes()[secondStart]);
}
protected int computeResult(byte[] bytes, int firstStart, int secondStart, ATypeTag argType)
throws AlgebricksException {
switch (argType) {
case STRING: {
return ed
.UTF8StringEditDistance(bytes, firstStart + typeIndicatorSize, secondStart + typeIndicatorSize);
}
case ORDEREDLIST: {
firstOrdListIter.reset(bytes, firstStart);
secondOrdListIter.reset(bytes, secondStart);
return (int) ed.getSimilarity(firstOrdListIter, secondOrdListIter);
}
default: {
throw new AlgebricksException("Invalid type " + argType
+ " passed as argument to edit distance function.");
}
}
}
protected boolean checkArgTypes(ATypeTag typeTag1, ATypeTag typeTag2) throws AlgebricksException {
// edit distance between null and anything else is 0
if (typeTag1 == ATypeTag.NULL || typeTag2 == ATypeTag.NULL) {
try {
writeResult(0);
} catch (IOException e) {
throw new AlgebricksException(e);
}
return false;
}
if (typeTag1 != typeTag2) {
throw new AlgebricksException("Incompatible argument types given in edit distance: " + typeTag1 + " "
+ typeTag2);
}
return true;
}
protected void writeResult(int ed) throws IOException {
aInt32.setValue(ed);
int32Serde.serialize(aInt32, out);
}
}