blob: 261039af12ab024aa06db9babd4d6aaab7ec318e [file] [log] [blame]
package edu.uci.ics.asterix.runtime.evaluators.common;
import java.io.DataOutput;
import java.io.IOException;
import edu.uci.ics.asterix.builders.OrderedListBuilder;
import edu.uci.ics.asterix.om.types.AOrderedListType;
import edu.uci.ics.asterix.om.types.BuiltinType;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.BooleanSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizer;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
public class GramTokensEvaluator implements ICopyEvaluator {
// assuming type indicator in serde format
private final int typeIndicatorSize = 1;
private final DataOutput out;
private final ArrayBackedValueStorage argOut = new ArrayBackedValueStorage();
private final ICopyEvaluator stringEval;
private final ICopyEvaluator gramLengthEval;
private final ICopyEvaluator prePostEval;
private final NGramUTF8StringBinaryTokenizer tokenizer;
private final OrderedListBuilder listBuilder = new OrderedListBuilder();
private final AOrderedListType listType;
public GramTokensEvaluator(ICopyEvaluatorFactory[] args, IDataOutputProvider output, IBinaryTokenizer tokenizer,
BuiltinType itemType) throws AlgebricksException {
out = output.getDataOutput();
stringEval = args[0].createEvaluator(argOut);
gramLengthEval = args[1].createEvaluator(argOut);
prePostEval = args[2].createEvaluator(argOut);
this.tokenizer = (NGramUTF8StringBinaryTokenizer) tokenizer;
this.listType = new AOrderedListType(itemType, null);
}
@Override
public void evaluate(IFrameTupleReference tuple) throws AlgebricksException {
argOut.reset();
stringEval.evaluate(tuple);
int gramLengthOff = argOut.getLength();
gramLengthEval.evaluate(tuple);
int prePostOff = argOut.getLength();
prePostEval.evaluate(tuple);
byte[] bytes = argOut.getByteArray();
int gramLength = IntegerSerializerDeserializer.getInt(bytes, gramLengthOff + typeIndicatorSize);
tokenizer.setGramlength(gramLength);
boolean prePost = BooleanSerializerDeserializer.getBoolean(bytes, prePostOff + typeIndicatorSize);
tokenizer.setPrePost(prePost);
tokenizer.reset(bytes, 0, gramLengthOff);
try {
listBuilder.reset(listType);
while (tokenizer.hasNext()) {
tokenizer.next();
listBuilder.addItem(tokenizer.getToken());
}
listBuilder.write(out, true);
} catch (IOException e) {
throw new AlgebricksException(e);
}
}
}