| package edu.uci.ics.asterix.runtime.evaluators.functions; |
| |
| |
| import edu.uci.ics.asterix.common.functions.FunctionConstants; |
| import edu.uci.ics.asterix.om.types.ATypeTag; |
| import edu.uci.ics.asterix.om.types.BuiltinType; |
| import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor; |
| import edu.uci.ics.asterix.runtime.evaluators.common.WordTokensEvaluator; |
| import edu.uci.ics.fuzzyjoin.tokenizer.DelimitedUTF8StringBinaryTokenizer; |
| import edu.uci.ics.fuzzyjoin.tokenizer.HashedUTF8WordTokenFactory; |
| import edu.uci.ics.fuzzyjoin.tokenizer.IBinaryTokenizer; |
| import edu.uci.ics.fuzzyjoin.tokenizer.ITokenFactory; |
| import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException; |
| import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier; |
| import edu.uci.ics.hyracks.algebricks.runtime.base.IEvaluator; |
| import edu.uci.ics.hyracks.algebricks.runtime.base.IEvaluatorFactory; |
| import edu.uci.ics.hyracks.dataflow.common.data.accessors.IDataOutputProvider; |
| |
| public class CountHashedWordTokensDescriptor extends AbstractScalarFunctionDynamicDescriptor { |
| |
| private static final long serialVersionUID = 1L; |
| private final static FunctionIdentifier FID = new FunctionIdentifier(FunctionConstants.ASTERIX_NS, |
| "counthashed-word-tokens", 1, true); |
| |
| @Override |
| public FunctionIdentifier getIdentifier() { |
| return FID; |
| } |
| |
| @Override |
| public IEvaluatorFactory createEvaluatorFactory(final IEvaluatorFactory[] args) throws AlgebricksException { |
| return new IEvaluatorFactory() { |
| private static final long serialVersionUID = 1L; |
| |
| @Override |
| public IEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException { |
| ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory(ATypeTag.INT32.serialize(), |
| ATypeTag.INT32.serialize()); |
| IBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, true, tokenFactory); |
| return new WordTokensEvaluator(args, output, tokenizer, BuiltinType.AINT32); |
| } |
| }; |
| } |
| |
| } |