blob: b46bb3ac6b00297109594a53efdb752804124e87 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.iogen;
import org.apache.sysds.common.Types;
import org.apache.sysds.runtime.frame.data.FrameBlock;
import org.apache.sysds.runtime.io.IOUtilFunctions;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.matrix.data.Pair;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
public abstract class ReaderMapping {
protected int[][] mapRow;
protected int[][] mapCol;
protected boolean symmetric;
protected boolean skewSymmetric;
protected boolean isUpperTriangular;
protected int skewCoefficient;
protected final ArrayList<RawRow> sampleRawRows;
protected boolean mapped;
protected static int nrows;
protected static int ncols;
protected final int nlines;
protected int firstRowIndex;
protected int firstColIndex;
protected ValueTrimFormat[][] VTF;
protected ValueTrimFormat[][] VTFClone = null;
public ReaderMapping(String raw) throws Exception {
InputStream is = IOUtilFunctions.toInputStream(raw);
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String value;
int nlines = 0;
sampleRawRows = new ArrayList<>();
while((value = br.readLine()) != null) {
sampleRawRows.add(new RawRow(value));
nlines++;
}
this.nlines = nlines;
firstColIndex = 0;
firstRowIndex = 0;
}
protected abstract boolean isSchemaNumeric();
protected void cloneSample() {
if(VTFClone == null) {
VTFClone = new ValueTrimFormat[nrows][ncols];
for(int r = 0; r < nrows; r++)
for(int c = 0; c < ncols; c++)
VTFClone[r][c] = VTF[r][c].getACopy();
}
}
protected void retrieveSample() {
for(int r = 0; r < nrows; r++)
for(int c = 0; c < ncols; c++)
VTF[r][c] = VTFClone[r][c].getACopy();
}
protected void transferSampleTriangular(boolean isUpper) throws Exception {
if(nrows != ncols)
throw new Exception("For upper triangular both Row and Col should be same!");
for(int r = 0; r < nrows; r++) {
if(isUpper) {
for(int c = 0; c < r; c++) {
VTF[r][c].setNoSet();
}
}
else {
for(int c = r + 1; c < ncols; c++) {
VTF[r][c].setNoSet();
}
}
}
}
protected void transferSampleSkew(int coefficient) throws Exception {
if(coefficient != 1 && coefficient != -1)
throw new Exception("The value of Coefficient have to be 1 or -1!");
for(int r = 0; r < nrows; r++)
for(int c = 0; c < ncols; c++) {
if(!VTF[r][c].isNotSet() && VTF[r][c].getValueType().isNumeric())
VTF[r][c] = new ValueTrimFormat(VTF[r][c].getColIndex(), VTF[r][c].getValueType(),
VTF[r][c].getDoubleActualValue() * coefficient);
}
}
protected abstract ValueTrimFormat[][] convertSampleTOValueTrimFormat();
// Matrix Reader Mapping
public static class MatrixReaderMapping extends ReaderMapping {
private MatrixBlock sampleMatrix;
public MatrixReaderMapping(String raw, MatrixBlock matrix) throws Exception {
super(raw);
this.sampleMatrix = matrix;
nrows = sampleMatrix.getNumRows();
ncols = sampleMatrix.getNumColumns();
VTF = convertSampleTOValueTrimFormat();
runMapping();
}
// Convert: convert each value of a sample matrix to NumberTrimFormat
@Override
protected ValueTrimFormat[][] convertSampleTOValueTrimFormat() {
ValueTrimFormat[][] result = new ValueTrimFormat[nrows][ncols];
for(int r = 0; r < nrows; r++)
for(int c = 0; c < ncols; c++) {
result[r][c] = new ValueTrimFormat(c, Types.ValueType.FP64, sampleMatrix.getValue(r, c));
}
return result;
}
@Override
protected boolean isSchemaNumeric() {
return true;
}
}
// Frame Reader Mapping
public static class FrameReaderMapping extends ReaderMapping {
private FrameBlock sampleFrame;
private Types.ValueType[] schema;
public FrameReaderMapping(String raw, FrameBlock frame) throws Exception {
super(raw);
this.sampleFrame = frame;
nrows = sampleFrame.getNumRows();
ncols = sampleFrame.getNumColumns();
schema = sampleFrame.getSchema();
VTF = convertSampleTOValueTrimFormat();
//TODO: set NNZ for Frame !!??
runMapping();
}
// Convert: convert each value of a sample Frame to ValueTrimFormat(Number, String, and Boolean)
@Override
protected ValueTrimFormat[][] convertSampleTOValueTrimFormat() {
ValueTrimFormat[][] result = new ValueTrimFormat[nrows][ncols];
for(int r = 0; r < nrows; r++)
for(int c = 0; c < ncols; c++) {
result[r][c] = new ValueTrimFormat(c, schema[c], sampleFrame.get(r, c));
}
return result;
}
@Override
protected boolean isSchemaNumeric() {
boolean result = true;
for(Types.ValueType vt : schema)
result &= vt.isNumeric();
return result;
}
}
public void runMapping() throws Exception {
mapped = findMapping();
boolean schemaNumeric = isSchemaNumeric();
if(!mapped) {
// Clone Sample Matrix/Frame
cloneSample();
// Symmetric and Skew-Symmetric check:
symmetric = nrows == ncols;
skewSymmetric = nrows == ncols && schemaNumeric;
for(int r = 0; r < nrows; r++) {
for(int c = 0; c < ncols; c++) {
if(symmetric)
symmetric = VTF[r][c].isEqual(VTF[c][r]);
if(skewSymmetric) {
if(r != c)
skewSymmetric = VTF[r][c].getDoubleActualValue() == VTF[c][r].getDoubleActualValue() * -1;
else
skewSymmetric = VTF[r][c].isNotSet();
}
}
}
boolean isRR = isRowRegular();
if(symmetric) {
// Lower Triangular
isUpperTriangular = false;
transferSampleTriangular(isUpperTriangular);
mapped = isRR ? findMapping() : findMapping() && verifyRISymmetricMapping(isUpperTriangular);
// Upper Triangular
if(!mapped) {
isUpperTriangular = true;
retrieveSample();
transferSampleTriangular(isUpperTriangular);
mapped = isRR ? findMapping() : findMapping() && verifyRISymmetricMapping(isUpperTriangular);
}
}
// Skew-Symmetric check:
else if(skewSymmetric) {
// Lower Triangular
isUpperTriangular = false;
transferSampleTriangular(isUpperTriangular);
mapped = isRR ? findMapping() : findMapping() && verifyRISymmetricMapping(isUpperTriangular);
// Lower Triangular Skew
if(!mapped) {
skewCoefficient = -1;
transferSampleSkew(skewCoefficient);
mapped = isRR ? findMapping() : findMapping() && verifyRISymmetricMapping(isUpperTriangular);
}
// Upper Triangular
if(!mapped) {
isUpperTriangular = true;
skewCoefficient = 1;
retrieveSample();
transferSampleTriangular(isUpperTriangular);
mapped = isRR ? findMapping() : findMapping() && verifyRISymmetricMapping(isUpperTriangular);
}
// Upper Triangular Skew
if(!mapped) {
skewCoefficient = -1;
transferSampleSkew(skewCoefficient);
mapped = isRR ? findMapping() : findMapping() && verifyRISymmetricMapping(isUpperTriangular);
}
}
}
}
protected boolean findMapping() {
mapRow = new int[nrows][ncols];
mapCol = new int[nrows][ncols];
// Set "-1" as default value for all defined matrix
for(int r = 0; r < nrows; r++)
for(int c = 0; c < ncols; c++)
mapRow[r][c] = mapCol[r][c] = -1;
for(int i = 0; i < nlines; i++) {
sampleRawRows.get(i).resetReserved();
}
int itRow = 0;
for(int r = 0; r < nrows; r++) {
ArrayList<ValueTrimFormat> vtfRow = new ArrayList<>();
for(int i = 0; i < ncols; i++) {
if(!VTF[r][i].isNotSet())
vtfRow.add(VTF[r][i]);
}
Collections.sort(vtfRow);
for(ValueTrimFormat vtf : vtfRow) {
int c = vtf.getColIndex();
HashSet<Integer> checkedLines = new HashSet<>();
while(checkedLines.size() < nlines) {
RawRow row = sampleRawRows.get(itRow);
Pair<Integer, Integer> mi = row.findValue(vtf, false);
if(mi.getKey() != -1) {
mapRow[r][c] = itRow;
mapCol[r][c] = mi.getKey();
break;
}
else {
checkedLines.add(itRow);
itRow++;
if(itRow == nlines)
itRow = 0;
}
}
}
}
boolean flagMap = true;
for(int r = 0; r < nrows && flagMap; r++)
for(int c = 0; c < ncols && flagMap; c++)
if(mapRow[r][c] == -1 && !VTF[r][c].isNotSet()) {
flagMap = false;
}
return flagMap;
}
private boolean verifyRISymmetricMapping(boolean upperTriangular) {
boolean result = false;
int[] rowIndex = {0, 1, 0, 1};
int[] colIndex = {0, 1, 1, 0};
for(int i = 0; i < rowIndex.length && !result; i++) {
result = verifyRISymmetricMapping(upperTriangular, rowIndex[i], colIndex[i]);
if(result) {
firstRowIndex = rowIndex[i];
firstColIndex = colIndex[i];
}
}
return result;
}
private boolean verifyRISymmetricMapping(boolean upperTriangular, int firstRowIndex, int firstColIndex) {
HashSet<Integer> checkedRow = new HashSet<>();
boolean rcvMapped = true;
int selectedIndex;
for(int r = nrows - 2; r >= 0 && rcvMapped; r--) {
selectedIndex = upperTriangular ? Math.min(r + 1, nrows - 1) : Math.max(r - 1, 0);
if(r == selectedIndex)
break;
int lindeIndex = 0;
rcvMapped = false;
do {
if(checkedRow.contains(lindeIndex) || VTF[r][selectedIndex].isNotSet())
continue;
RawRow row = sampleRawRows.get(lindeIndex).getResetClone();
if(isMapRowColValue(row, r + firstRowIndex, selectedIndex + firstColIndex, VTF[r][selectedIndex])) {
checkedRow.add(lindeIndex);
rcvMapped = true;
}
}
while(++lindeIndex < nlines && !rcvMapped);
}
return rcvMapped;
}
public final CustomProperties getFormatProperties() throws Exception {
CustomProperties ffp;
if(isRowRegular()) {
ffp = getFileFormatPropertiesOfRRCRMapping();
if(ffp == null) {
ffp = getFileFormatPropertiesOfRRCIMapping();
}
}
else {
ffp = getFileFormatPropertiesOfRIMapping();
}
return ffp;
}
public final boolean isRowRegular() {
int nrows = mapRow.length;
int ncols = mapRow[0].length;
boolean result = true;
int rValue = -1;
for(int c = 0; c < ncols; c++) {
if(mapRow[0][c] != -1) {
rValue = mapRow[0][c];
break;
}
}
for(int r = 0; r < nrows && result; r++) {
for(int c = 0; c < ncols && result; c++) {
if(mapRow[r][c] != -1 && mapRow[r][c] != rValue + r) {
result = false;
}
}
}
return result;
}
/* Get delimiters between two indexes.
Row String: 1,2,3,4,5
Sample Matrix: [1 2 3 4 5 ]
Map Col: [0 2 4 6 8 ]
result: ["," "," "," "," ","]
*/
public final CustomProperties getFileFormatPropertiesOfRRCRMapping() {
ArrayList<String> rowDelims = new ArrayList<>();
HashSet<String> naString = new HashSet<>();
String stringToken = null;
// append all delimiters as a string and then tokenize it
for(int r = 0; r < nrows; r++) {
RawRow rr = sampleRawRows.get(r);
Pair<String, String> pair = rr.getDelims();
rowDelims.add(pair.getValue());
if(stringToken == null || (pair.getKey().length() > 0 && stringToken.length() > pair.getKey().length()))
stringToken = pair.getKey();
}
if(stringToken.length() == 0)
stringToken = rowDelims.get(0);
String uniqueDelimiter = null;
StringBuilder token = new StringBuilder();
FastStringTokenizer fastStringTokenizer;
for(Character ch : stringToken.toCharArray()) {
token.append(ch);
boolean flagCurrToken = true;
HashSet<String> ns = new HashSet<>();
fastStringTokenizer = new FastStringTokenizer(token.toString());
for(int r = 0; r < nrows; r++) {
String row = rowDelims.get(r);
fastStringTokenizer.reset(row);
ArrayList<String> delimsOfToken = fastStringTokenizer.getTokens();
// remove numeric NA Strings
// This case can appear in Frame DataType
for(String s : delimsOfToken) {
try {
Double.parseDouble(s);
}
catch(Exception ex) {
ns.add(s);
}
}
if(fastStringTokenizer._count != ncols - 1) {
flagCurrToken = false;
break;
}
}
if(flagCurrToken) {
uniqueDelimiter = token.toString();
naString = ns;
}
}
if(uniqueDelimiter != null) {
CustomProperties ffpgr = new CustomProperties(CustomProperties.GRPattern.Regular, uniqueDelimiter,
naString);
ffpgr.setDescription("CSV Format Recognized");
return ffpgr;
}
else
return null;
}
private static class FastStringTokenizer implements Serializable {
private static final long serialVersionUID = -4698672725609750097L;
private String _string = null;
private String _del = "";
private int _pos = -1;
private int _count = 0;
public FastStringTokenizer(String delimiter) {
_del = delimiter;
reset(null);
}
public void reset(String string) {
_string = string;
_pos = 0;
_count = 0;
}
private String nextToken() {
int len = _string.length();
int start = _pos;
//find start (skip over leading delimiters)
while(start != -1 && start < len && _del
.equals(_string.substring(start, Math.min(start + _del.length(), _string.length())))) {
start += _del.length();
_count++;
}
//find end (next delimiter) and return
if(start < len && start != -1) {
_pos = _string.indexOf(_del, start);
if(start < _pos && _pos < len) {
return _string.substring(start, _pos);
}
else
return _string.substring(start);
}
//no next token
return null;
}
public ArrayList<String> getTokens() {
ArrayList<String> tokens = new ArrayList<>();
tokens.add("");
String token;
do {
token = nextToken();
if(token != null) {
tokens.add(token);
}
}
while(token != null);
return tokens;
}
}
private CustomProperties getFileFormatPropertiesOfRIMapping() {
int[] rowIndex = {0, 1, 0, 1};
int[] colIndex = {0, 1, 1, 0};
CustomProperties ffp = null;
for(int i = 0; i < rowIndex.length && ffp == null; i++) {
ffp = getDelimsOfMapping(rowIndex[i], colIndex[i]);
if(ffp != null) {
firstRowIndex = rowIndex[i];
firstColIndex = colIndex[i];
}
}
if(ffp != null) {
ffp.setFirstColIndex(firstColIndex);
ffp.setFirstRowIndex(firstRowIndex);
ffp.setDescription(
"Market Matrix Format Recognized: FirstRowIndex: " + firstRowIndex + " and FirstColIndex: " + firstColIndex);
}
return ffp;
}
private CustomProperties getDelimsOfMapping(int firstRowIndex, int firstColIndex) {
//HashSet<Integer> checkedRow = new HashSet<>();
HashSet<String> delims = new HashSet<>();
int minDelimLength = -1;
boolean rcvMapped = false;
int selectedRowIndex = nrows - 2;
int selectedColIndex = ncols - 1;
// select maximum none zero col index
for(int c = ncols - 1; c >= 0; c--) {
if(!VTF[selectedRowIndex][c].isNotSet()) {
selectedColIndex = c;
break;
}
}
int lindeIndex = 0;
do {
RawRow row = sampleRawRows.get(lindeIndex).getResetClone();
if(isMapRowColValue(row, selectedRowIndex + firstRowIndex, selectedColIndex + firstColIndex,
VTF[selectedRowIndex][selectedColIndex])) {
rcvMapped = true;
Pair<HashSet<String>, Integer> pair = row.getDelimsSet();
delims.addAll(pair.getKey());
minDelimLength = minDelimLength == -1 ? pair.getValue() : Math.min(minDelimLength, pair.getValue());
}
}
while(++lindeIndex < nlines && !rcvMapped);
if(!rcvMapped) {
return null;
}
else {
String uniqueDelim = null;
for(int l = 1; l < minDelimLength + 1; l++) {
boolean flagToken = true;
HashSet<String> token = new HashSet<>();
for(String delim : delims) {
if(delim.length() % l != 0) {
flagToken = false;
break;
}
for(int i = 0; i <= delim.length() - l; i += l)
token.add(delim.substring(i, i + l));
if(token.size() > 1) {
flagToken = false;
break;
}
}
if(flagToken) {
if(token.size() > 0)
uniqueDelim = token.iterator().next();
break;
}
}
if(uniqueDelim != null) {
CustomProperties.GRSymmetry symmetry;
if(symmetric)
symmetry = CustomProperties.GRSymmetry.SYMMETRIC;
else if(skewSymmetric)
symmetry = CustomProperties.GRSymmetry.SKEW_SYMMETRIC;
else
symmetry = CustomProperties.GRSymmetry.GENERAL;
return new CustomProperties(symmetry, uniqueDelim, firstRowIndex, firstColIndex);
}
else
return null;
}
}
public CustomProperties getFileFormatPropertiesOfRRCIMapping() {
CustomProperties ffplibsvm;
int firstColIndex = 0;
// FirstColIndex = 0
ffplibsvm = getDelimsOfRRCIMapping(firstColIndex);
// FirstColIndex = 1
if(ffplibsvm == null) {
firstColIndex = 1;
ffplibsvm = getDelimsOfRRCIMapping(firstColIndex);
}
if(ffplibsvm != null) {
ffplibsvm.setDescription("LibSVM Format Recognized: First Index Started From " + firstColIndex);
ffplibsvm.setFirstColIndex(firstColIndex);
}
return ffplibsvm;
}
private CustomProperties getDelimsOfRRCIMapping(int firstColIndex) {
HashMap<String, HashSet<String>> tokens = new HashMap<>();
HashSet<String> allTokens = new HashSet<>();
int maxNNZCount = 0;
int selectedRowIndex = 0;
for(int r = 0; r < nrows; r++) {
int rnnz = 0;
for(int c = 0; c < ncols; c++)
if(!VTF[r][c].isNotSet())
rnnz++;
if(maxNNZCount < rnnz) {
maxNNZCount = rnnz;
selectedRowIndex = r;
}
}
RawRow row = sampleRawRows.get(selectedRowIndex);
// For find index delimiter, we need to find all possible "Index Delim Value" tokens
for(int c = ncols - 1; c >= 0; c--) {
ValueTrimFormat v = VTF[selectedRowIndex][c];
if(v.isNotSet())
continue;
String key = (c + firstColIndex) + "," + v.getStringOfActualValue();
HashSet<String> token = tokens.computeIfAbsent(key, k -> new HashSet<>());
token.addAll(getColIndexValueMappedTokens(row, c + firstColIndex, v));
allTokens.addAll(token);
}
//After find all tokens the intersection of tokens is a good candidate for "Index delimiter"
// This part of code try to find the intersection of tokens
// In some cases like LobSVM label value don't have Index Delim token,
// So, we ignored this condition for some values
ArrayList<String> missedKeys = new ArrayList<>();
HashSet<Integer> labelIndex = new HashSet<>();
ArrayList<String> selectedTokens = new ArrayList<>();
for(String key : tokens.keySet()) {
if(tokens.get(key).size() == 0)
missedKeys.add(key);
}
if(missedKeys.size() > 1)
return null;
else {
for(String t : allTokens) {
missedKeys.clear();
for(String key : tokens.keySet()) {
if(!tokens.get(key).contains(t)) {
missedKeys.add(key);
}
}
if(missedKeys.size() == 1) {
int li = Integer.parseInt(missedKeys.iterator().next().split(",")[0]);
labelIndex.add(li);
selectedTokens.add(t);
}
}
}
/* After find index delim token, the next step is find Item Separator
The algorithm for find separator, mark all Indexes, Values and Index Delim on the raw string
Finally the reminder of the text is separator. In some cases(i.e., duplicated values)
there are more than on position for value and this cause wrong matching and finally wrong value
for separator. To avoid this type of problems, first looked for biggest char base size values
(for example a= 123.45 b= 1000000 a will match first because based on VariableTrimFormat algorithm
"a" have 5 char ad the length is 5, but b have 1 char and the length is one).
*/
String separator = null;
String indexSeparator = null;
boolean isVerify = false;
// Just one row of the sample raw is enough for finding item separator. "selectedRowIndex" mentioned
// first row of sample raw data
for(int i = 0; i < selectedTokens.size() && !isVerify; i++) {
isVerify = true;
indexSeparator = selectedTokens.get(i);
row = sampleRawRows.get(selectedRowIndex).getResetClone();
// find all values
ArrayList<ValueTrimFormat> vtfValueList = new ArrayList<>();
ValueTrimFormat vtfIndexDelim = new ValueTrimFormat(indexSeparator);
for(int c = 0; c < ncols; c++) {
if(!VTF[selectedRowIndex][c].isNotSet() && !labelIndex.contains(c + firstColIndex)) {
vtfValueList.add(VTF[selectedRowIndex][c].getACopy());
}
}
Collections.sort(vtfValueList);
for(ValueTrimFormat vtf : vtfValueList) {
ArrayList<ValueTrimFormat> indexDelimValue = new ArrayList<>();
ValueTrimFormat vtfColIndex = new ValueTrimFormat(vtf.getColIndex() + firstColIndex);
indexDelimValue.add(vtfColIndex);
indexDelimValue.add(vtfIndexDelim);
indexDelimValue.add(vtf);
row.findSequenceValues(indexDelimValue, 0, true);
}
for(Integer li : labelIndex) {
row.findValue(VTF[selectedRowIndex][li - firstColIndex], false);
}
//row.print();
separator = row.getDelims().getKey();
if(separator == null) {
isVerify = false;
break;
}
}
if(isVerify) {
return new CustomProperties(CustomProperties.GRPattern.Regular, separator, indexSeparator, firstColIndex);
}
else
return null;
}
private static boolean isMapRowColValue(RawRow rawrow, int row, int col, ValueTrimFormat value) {
ValueTrimFormat vtfRow = new ValueTrimFormat(row);
ValueTrimFormat vtfCol = new ValueTrimFormat(col);
ValueTrimFormat vtfValue = value.getACopy();
boolean mapped = true;
byte hasZero = 0b000;
if(vtfRow.isNotSet())
hasZero |= 0b100;
if(vtfCol.isNotSet())
hasZero |= 0b010;
if(vtfValue.isNotSet())
hasZero |= 0b001;
ValueTrimFormat[] order = new ValueTrimFormat[3];
/* valid formats:
Row, Col, Value
1. 0 , 0 , Value >> 110 -> 6
2. 0 , col, Value >> 100 -> 4
3. row, 0 , value >> 010 -> 2
4. row, col, value >> 000 -> 0
----------------- >> otherwise the value is not set.
*/
switch(hasZero) {
case 0:
order[0] = vtfRow;
order[1] = vtfCol;
order[2] = vtfValue;
break;
case 2:
order[0] = vtfRow;
order[1] = vtfValue;
order[2] = vtfCol;
break;
case 4:
order[0] = vtfCol;
order[1] = vtfValue;
order[2] = vtfRow;
break;
case 6:
order[0] = vtfValue;
order[1] = vtfRow;
order[2] = vtfCol;
break;
default:
throw new RuntimeException("Not set values can't be find on a string");
}
for(ValueTrimFormat vtf : order) {
if(rawrow.findValue(vtf, false).getKey() == -1) {
mapped = false;
break;
}
}
return mapped;
}
private static HashSet<String> getColIndexValueMappedTokens(RawRow rawrow, int col, ValueTrimFormat value) {
ValueTrimFormat vtfColIndex = new ValueTrimFormat(col);
ValueTrimFormat vtfColValue = value.getACopy();
Pair<Integer, Integer> pairCol;
Pair<Integer, Integer> pairValue;
HashSet<String> tokens = new HashSet<>();
RawRow row = rawrow.getResetClone();
int lastIndex = 0;
int lastTokenStart = -1;
int lastTokenEnd = -1;
int lastTokenID = -1;
do {
row.resetReserved();
row.setLastIndex(lastIndex);
pairCol = row.findValue(vtfColIndex, true);
if(pairCol.getKey() == -1)
break;
lastIndex = row.getNumericLastIndex();
pairValue = row.findValue(vtfColValue, true);
if(pairValue.getKey() == -1)
break;
int tl = pairValue.getKey() - pairCol.getKey() + pairCol.getValue();
if(tl > 0) {
if(lastTokenID == -1)
lastTokenID = pairValue.getKey();
if(lastTokenID != pairValue.getKey()) {
String token = row.getRaw().substring(lastTokenStart, lastTokenEnd);
tokens.add(token);
}
lastTokenStart = pairCol.getKey() + pairCol.getValue();
lastTokenEnd = pairValue.getKey();
}
}
while(true);
if(lastTokenEnd - lastTokenStart > 0) {
String token = row.getRaw().substring(lastTokenStart, lastTokenEnd);
tokens.add(token);
}
return tokens;
}
public boolean isSymmetric() {
return symmetric;
}
public boolean isMapped() {
return mapped;
}
}