Merged asterix_stabilization r945:r952.

git-svn-id: https://asterixdb.googlecode.com/svn/branches/asterix_tokenizer_opt@953 eaa15691-b419-025a-1212-ee371bd00084
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/builders/OrderedListBuilder.java b/asterix-om/src/main/java/edu/uci/ics/asterix/builders/OrderedListBuilder.java
index 9fb422d..58761b4 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/builders/OrderedListBuilder.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/builders/OrderedListBuilder.java
@@ -1,7 +1,7 @@
 package edu.uci.ics.asterix.builders;
 
-import java.io.ByteArrayOutputStream;
 import java.io.DataOutput;
+import java.io.DataOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 
@@ -11,10 +11,13 @@
 import edu.uci.ics.asterix.om.util.NonTaggedFormatUtil;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.data.std.api.IValueReference;
+import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IToken;
 
 public class OrderedListBuilder implements IAOrderedListBuilder {
 
-    private ByteArrayOutputStream outputStream;
+    private ArrayBackedValueStorage outputValStorage;
+    private DataOutputStream outputStream;
     private ArrayList<Short> offsets;
     private int metadataInfoSize;
     private byte[] offsetArray;
@@ -28,7 +31,8 @@
     private int numberOfItems;
 
     public OrderedListBuilder() {
-        this.outputStream = new ByteArrayOutputStream();
+        this.outputValStorage = new ArrayBackedValueStorage();
+        this.outputStream = (DataOutputStream) outputValStorage.getDataOutput();
         this.offsets = new ArrayList<Short>();
         this.metadataInfoSize = 0;
         this.offsetArray = null;
@@ -37,7 +41,7 @@
 
     @Override
     public void reset(AOrderedListType orderedlistType) throws HyracksDataException {
-        this.outputStream.reset();
+        this.outputValStorage.reset();
         this.offsetArray = null;
         this.offsets.clear();
         this.offsetPosition = 0;
@@ -56,17 +60,30 @@
 
     @Override
     public void addItem(IValueReference item) throws HyracksDataException {
-        if (!fixedSize)
-            this.offsets.add((short) outputStream.size());
-        if (itemTypeTag == ATypeTag.ANY || (itemTypeTag == ATypeTag.NULL && item.getByteArray()[0] == serNullTypeTag)) {
-            this.numberOfItems++;
-            this.outputStream.write(item.getByteArray(), item.getStartOffset(), item.getLength());
-        } else if (item.getByteArray()[0] != serNullTypeTag) {
-            this.numberOfItems++;
-            this.outputStream.write(item.getByteArray(), item.getStartOffset() + 1, item.getLength() - 1);
+        try {
+            if (!fixedSize)
+                this.offsets.add((short) outputValStorage.getLength());
+            if (itemTypeTag == ATypeTag.ANY
+                    || (itemTypeTag == ATypeTag.NULL && item.getByteArray()[0] == serNullTypeTag)) {
+                this.numberOfItems++;
+                this.outputStream.write(item.getByteArray(), item.getStartOffset(), item.getLength());
+            } else if (item.getByteArray()[0] != serNullTypeTag) {
+                this.numberOfItems++;
+                this.outputStream.write(item.getByteArray(), item.getStartOffset() + 1, item.getLength() - 1);
+            }
+        } catch (IOException e) {
+            throw new HyracksDataException(e);
         }
     }
 
+    public void addItem(IToken token) throws IOException {
+        if (!fixedSize) {
+            offsets.add((short) outputValStorage.getLength());
+        }
+        numberOfItems++;
+        token.serializeToken(outputValStorage);
+    }
+
     @Override
     public void write(DataOutput out, boolean writeTypeTag) throws HyracksDataException {
         try {
@@ -75,8 +92,8 @@
             if (offsetArray == null || offsetArray.length < metadataInfoSize)
                 offsetArray = new byte[metadataInfoSize];
 
-            SerializerDeserializerUtil.writeIntToByteArray(offsetArray,
-                    headerSize + metadataInfoSize + outputStream.size(), offsetPosition);
+            SerializerDeserializerUtil.writeIntToByteArray(offsetArray, headerSize + metadataInfoSize
+                    + outputValStorage.getLength(), offsetPosition);
             SerializerDeserializerUtil.writeIntToByteArray(offsetArray, this.numberOfItems, offsetPosition + 4);
 
             if (!fixedSize) {
@@ -92,7 +109,7 @@
             }
             out.writeByte(itemTypeTag.serialize());
             out.write(offsetArray, 0, metadataInfoSize);
-            out.write(outputStream.toByteArray(), 0, outputStream.size());
+            out.write(outputValStorage.getByteArray(), 0, outputValStorage.getLength());
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/common/AListElementToken.java b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/common/AListElementToken.java
index cb6838e..ec4450a 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/common/AListElementToken.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/common/AListElementToken.java
@@ -1,8 +1,8 @@
 package edu.uci.ics.asterix.dataflow.data.common;
 
-import java.io.DataOutput;
 import java.io.IOException;
 
+import edu.uci.ics.hyracks.data.std.api.IMutableValueStorage;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IToken;
 
 public class AListElementToken implements IToken {
@@ -44,14 +44,13 @@
     }
 
     @Override
-    public void serializeToken(DataOutput dos) throws IOException {
-        dos.writeByte(typeTag);
-        dos.write(data, start, length);
+    public void serializeToken(IMutableValueStorage outVal) throws IOException {
+        outVal.getDataOutput().writeByte(typeTag);
+        outVal.getDataOutput().write(data, start, length);
     }
 
     @Override
-    public void serializeTokenCount(DataOutput dos) throws IOException {
+    public void serializeTokenCount(IMutableValueStorage outVal) throws IOException {
         throw new UnsupportedOperationException("Token count not implemented.");
     }
-
 }
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/GramTokensEvaluator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/GramTokensEvaluator.java
index 188135c..261039a 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/GramTokensEvaluator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/GramTokensEvaluator.java
@@ -3,7 +3,6 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
-import edu.uci.ics.asterix.builders.IAOrderedListBuilder;
 import edu.uci.ics.asterix.builders.OrderedListBuilder;
 import edu.uci.ics.asterix.om.types.AOrderedListType;
 import edu.uci.ics.asterix.om.types.BuiltinType;
@@ -16,8 +15,6 @@
 import edu.uci.ics.hyracks.dataflow.common.data.marshalling.BooleanSerializerDeserializer;
 import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizer;
-import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IToken;
-import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IntArray;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
 
 public class GramTokensEvaluator implements ICopyEvaluator {
@@ -25,20 +22,15 @@
     // assuming type indicator in serde format
     private final int typeIndicatorSize = 1;
 
-    protected final DataOutput out;
-    protected final ArrayBackedValueStorage argOut = new ArrayBackedValueStorage();
-    protected final ICopyEvaluator stringEval;
-    protected final ICopyEvaluator gramLengthEval;
-    protected final ICopyEvaluator prePostEval;
+    private final DataOutput out;
+    private final ArrayBackedValueStorage argOut = new ArrayBackedValueStorage();
+    private final ICopyEvaluator stringEval;
+    private final ICopyEvaluator gramLengthEval;
+    private final ICopyEvaluator prePostEval;
 
     private final NGramUTF8StringBinaryTokenizer tokenizer;
-
-    protected final IntArray itemOffsets = new IntArray();
-    protected final ArrayBackedValueStorage tokenBuffer = new ArrayBackedValueStorage();
-
-    private IAOrderedListBuilder listBuilder = new OrderedListBuilder();
-    private ArrayBackedValueStorage inputVal = new ArrayBackedValueStorage();
-    private BuiltinType itemType;
+    private final OrderedListBuilder listBuilder = new OrderedListBuilder();
+    private final AOrderedListType listType;
 
     public GramTokensEvaluator(ICopyEvaluatorFactory[] args, IDataOutputProvider output, IBinaryTokenizer tokenizer,
             BuiltinType itemType) throws AlgebricksException {
@@ -47,7 +39,7 @@
         gramLengthEval = args[1].createEvaluator(argOut);
         prePostEval = args[2].createEvaluator(argOut);
         this.tokenizer = (NGramUTF8StringBinaryTokenizer) tokenizer;
-        this.itemType = itemType;
+        this.listType = new AOrderedListType(itemType, null);
     }
 
     @Override
@@ -65,16 +57,12 @@
         boolean prePost = BooleanSerializerDeserializer.getBoolean(bytes, prePostOff + typeIndicatorSize);
         tokenizer.setPrePost(prePost);
         tokenizer.reset(bytes, 0, gramLengthOff);
-        tokenBuffer.reset();
 
         try {
-            listBuilder.reset(new AOrderedListType(itemType, null));
+            listBuilder.reset(listType);
             while (tokenizer.hasNext()) {
-                inputVal.reset();
                 tokenizer.next();
-                IToken token = tokenizer.getToken();
-                token.serializeToken(inputVal.getDataOutput());
-                listBuilder.addItem(inputVal);
+                listBuilder.addItem(tokenizer.getToken());
             }
             listBuilder.write(out, true);
         } catch (IOException e) {
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/WordTokensEvaluator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/WordTokensEvaluator.java
index f0f36f5..c316fb5 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/WordTokensEvaluator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/WordTokensEvaluator.java
@@ -3,7 +3,6 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
-import edu.uci.ics.asterix.builders.IAOrderedListBuilder;
 import edu.uci.ics.asterix.builders.OrderedListBuilder;
 import edu.uci.ics.asterix.om.types.AOrderedListType;
 import edu.uci.ics.asterix.om.types.BuiltinType;
@@ -14,29 +13,22 @@
 import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizer;
-import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IToken;
-import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IntArray;
 
 public class WordTokensEvaluator implements ICopyEvaluator {
-    protected final DataOutput out;
-    protected final ArrayBackedValueStorage argOut = new ArrayBackedValueStorage();
-    protected final ICopyEvaluator stringEval;
+    private final DataOutput out;
+    private final ArrayBackedValueStorage argOut = new ArrayBackedValueStorage();
+    private final ICopyEvaluator stringEval;
 
-    protected final IBinaryTokenizer tokenizer;
-
-    protected final IntArray itemOffsets = new IntArray();
-    protected final ArrayBackedValueStorage tokenBuffer = new ArrayBackedValueStorage();
-
-    private IAOrderedListBuilder listBuilder = new OrderedListBuilder();
-    private ArrayBackedValueStorage inputVal = new ArrayBackedValueStorage();
-    private BuiltinType itemType;
+    private final IBinaryTokenizer tokenizer;
+    private final OrderedListBuilder listBuilder = new OrderedListBuilder();
+    private final AOrderedListType listType;
 
     public WordTokensEvaluator(ICopyEvaluatorFactory[] args, IDataOutputProvider output, IBinaryTokenizer tokenizer,
             BuiltinType itemType) throws AlgebricksException {
         out = output.getDataOutput();
         stringEval = args[0].createEvaluator(argOut);
         this.tokenizer = tokenizer;
-        this.itemType = itemType;
+        this.listType = new AOrderedListType(itemType, null);
     }
 
     @Override
@@ -45,16 +37,11 @@
         stringEval.evaluate(tuple);
         byte[] bytes = argOut.getByteArray();
         tokenizer.reset(bytes, 0, argOut.getLength());
-        tokenBuffer.reset();
-
         try {
-            listBuilder.reset(new AOrderedListType(itemType, null));
+            listBuilder.reset(listType);
             while (tokenizer.hasNext()) {
-                inputVal.reset();
                 tokenizer.next();
-                IToken token = tokenizer.getToken();
-                token.serializeToken(inputVal.getDataOutput());
-                listBuilder.addItem(inputVal);
+                listBuilder.addItem(tokenizer.getToken());
             }
             listBuilder.write(out, true);
         } catch (IOException e) {
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java
index a54515d..c4e8387 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/CountHashedGramTokensDescriptor.java
@@ -3,7 +3,6 @@
 import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptor;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptorFactory;
-import edu.uci.ics.asterix.om.types.ATypeTag;
 import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.common.GramTokensEvaluator;
@@ -37,8 +36,7 @@
 
             @Override
             public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
-                ITokenFactory tokenFactory = new HashedUTF8NGramTokenFactory(ATypeTag.INT32.serialize(),
-                        ATypeTag.INT32.serialize());
+                ITokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
                 NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(3, true, false, true,
                         tokenFactory);
                 return new GramTokensEvaluator(args, output, tokenizer, BuiltinType.AINT32);
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java
index 4ddc57a..90a4293 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/CountHashedWordTokensDescriptor.java
@@ -3,7 +3,6 @@
 import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptor;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptorFactory;
-import edu.uci.ics.asterix.om.types.ATypeTag;
 import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.common.WordTokensEvaluator;
@@ -38,8 +37,7 @@
 
             @Override
             public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
-                ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory(ATypeTag.INT32.serialize(),
-                        ATypeTag.INT32.serialize());
+                ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
                 IBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, true, tokenFactory);
                 return new WordTokensEvaluator(args, output, tokenizer, BuiltinType.AINT32);
             }
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/GramTokensDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/GramTokensDescriptor.java
index 1e3ea24..20a2977 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/GramTokensDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/GramTokensDescriptor.java
@@ -3,7 +3,6 @@
 import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptor;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptorFactory;
-import edu.uci.ics.asterix.om.types.ATypeTag;
 import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.common.GramTokensEvaluator;
@@ -37,8 +36,7 @@
 
             @Override
             public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
-                ITokenFactory tokenFactory = new UTF8NGramTokenFactory(ATypeTag.STRING.serialize(),
-                        ATypeTag.INT32.serialize());
+                ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
                 NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(3, true, true, true,
                         tokenFactory);
                 return new GramTokensEvaluator(args, output, tokenizer, BuiltinType.ASTRING);
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/HashedGramTokensDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/HashedGramTokensDescriptor.java
index 66e2f5f..e86f86a6 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/HashedGramTokensDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/HashedGramTokensDescriptor.java
@@ -3,7 +3,6 @@
 import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptor;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptorFactory;
-import edu.uci.ics.asterix.om.types.ATypeTag;
 import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.common.GramTokensEvaluator;
@@ -37,8 +36,7 @@
 
             @Override
             public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
-                ITokenFactory tokenFactory = new HashedUTF8NGramTokenFactory(ATypeTag.INT32.serialize(),
-                        ATypeTag.INT32.serialize());
+                ITokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
                 NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(3, true, true, true,
                         tokenFactory);
                 return new GramTokensEvaluator(args, output, tokenizer, BuiltinType.AINT32);
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/HashedWordTokensDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/HashedWordTokensDescriptor.java
index 848fb68..d6da522 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/HashedWordTokensDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/HashedWordTokensDescriptor.java
@@ -3,7 +3,6 @@
 import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptor;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptorFactory;
-import edu.uci.ics.asterix.om.types.ATypeTag;
 import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.common.WordTokensEvaluator;
@@ -38,8 +37,7 @@
 
             @Override
             public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
-                ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory(ATypeTag.INT32.serialize(),
-                        ATypeTag.INT32.serialize());
+                ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
                 IBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, true, tokenFactory);
                 return new WordTokensEvaluator(args, output, tokenizer, BuiltinType.AINT32);
             }
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/WordTokensDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/WordTokensDescriptor.java
index b0ece87..791ee6b 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/WordTokensDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/WordTokensDescriptor.java
@@ -3,7 +3,6 @@
 import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptor;
 import edu.uci.ics.asterix.om.functions.IFunctionDescriptorFactory;
-import edu.uci.ics.asterix.om.types.ATypeTag;
 import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
 import edu.uci.ics.asterix.runtime.evaluators.common.WordTokensEvaluator;
@@ -38,8 +37,7 @@
 
             @Override
             public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
-                ITokenFactory tokenFactory = new UTF8WordTokenFactory(ATypeTag.STRING.serialize(),
-                        ATypeTag.INT32.serialize());
+                ITokenFactory tokenFactory = new UTF8WordTokenFactory();
                 IBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, true, tokenFactory);
                 return new WordTokensEvaluator(args, output, tokenizer, BuiltinType.ASTRING);
             }