blob: 635d730b24f91f158477933d502b994088a74e3d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.runtime.transform;
import java.io.Serializable;
import java.util.regex.Pattern;
import org.apache.sysds.lops.Lop;
public class TfUtils implements Serializable
{
private static final long serialVersionUID = 526252850872633125L;
protected enum ColumnTypes {
SCALE,
NOMINAL,
ORDINAL,
DUMMYCODED;
protected byte toID() {
switch(this) {
case SCALE: return 1;
case NOMINAL: return 2;
case ORDINAL: return 3;
// Ideally, dummycoded columns should be of a different type. Treating them as SCALE is incorrect, semantically.
case DUMMYCODED: return 1;
default:
throw new RuntimeException("Invalid Column Type: " + this);
}
}
}
//transform methods
public enum TfMethod {
IMPUTE, RECODE, HASH, BIN, DUMMYCODE, SCALE, OMIT;
@Override
public String toString() {
return name().toLowerCase();
}
}
//transform meta data constants (frame-based transform)
public static final String TXMTD_MVPREFIX = "#Meta"+Lop.DATATYPE_PREFIX+"MV";
public static final String TXMTD_NDPREFIX = "#Meta"+Lop.DATATYPE_PREFIX+"ND";
//transform meta data constants (old file-based transform)
public static final String TXMTD_SEP = ",";
public static final String TXMTD_COLNAMES = "column.names";
public static final String TXMTD_RCD_MAP_SUFFIX = ".map";
public static final String TXMTD_RCD_DISTINCT_SUFFIX = ".ndistinct";
public static final String TXMTD_BIN_FILE_SUFFIX = ".bin";
public static final String TXMTD_MV_FILE_SUFFIX = ".impute";
public static final String JSON_ATTRS = "attributes";
public static final String JSON_MTHD = "methods";
public static final String JSON_CONSTS = "constants";
public static final String JSON_NBINS = "numbins";
private String _headerLine = null;
private boolean _hasHeader;
private Pattern _delim = null;
private String _delimString = null;
private String[] _NAstrings = null;
private int _numInputCols = -1;
public String getHeader() { return _headerLine; }
public boolean hasHeader() { return _hasHeader; }
public String getDelimString() { return _delimString; }
public Pattern getDelim() { return _delim; }
public String[] getNAStrings() { return _NAstrings; }
public long getNumCols() { return _numInputCols; }
/**
* Function that checks if the given string is one of NA strings.
*
* @param NAstrings array of NA strings
* @param w string to check
* @return true if w is a NAstring
*/
public static boolean isNA(String[] NAstrings, String w) {
if(NAstrings == null)
return false;
for(String na : NAstrings) {
if(w.equals(na))
return true;
}
return false;
}
}