blob: 7dc8ec8f39ee2c99f237b39ccac0cdfc36b53baa [file] [log] [blame]
package edu.uci.ics.asterix.runtime.evaluators.functions;
import edu.uci.ics.asterix.common.functions.FunctionConstants;
import edu.uci.ics.asterix.om.types.ATypeTag;
import edu.uci.ics.asterix.om.types.BuiltinType;
import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
import edu.uci.ics.asterix.runtime.evaluators.common.WordTokensEvaluator;
import edu.uci.ics.fuzzyjoin.tokenizer.DelimitedUTF8StringBinaryTokenizer;
import edu.uci.ics.fuzzyjoin.tokenizer.HashedUTF8WordTokenFactory;
import edu.uci.ics.fuzzyjoin.tokenizer.IBinaryTokenizer;
import edu.uci.ics.fuzzyjoin.tokenizer.ITokenFactory;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import edu.uci.ics.hyracks.algebricks.runtime.base.IEvaluator;
import edu.uci.ics.hyracks.algebricks.runtime.base.IEvaluatorFactory;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IDataOutputProvider;
public class HashedWordTokensDescriptor extends AbstractScalarFunctionDynamicDescriptor {
private static final long serialVersionUID = 1L;
private final static FunctionIdentifier FID = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
"hashed-word-tokens", 1, true);
@Override
public FunctionIdentifier getIdentifier() {
return FID;
}
@Override
public IEvaluatorFactory createEvaluatorFactory(final IEvaluatorFactory[] args) throws AlgebricksException {
return new IEvaluatorFactory() {
private static final long serialVersionUID = 1L;
@Override
public IEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory(ATypeTag.INT32.serialize(),
ATypeTag.INT32.serialize());
IBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, true, tokenFactory);
return new WordTokensEvaluator(args, output, tokenizer, BuiltinType.AINT32);
}
};
}
}