blob: d021597d7ff9bc8a71915feac4a1e32b12a73c3e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysds.common;
import java.util.EnumSet;
import java.util.HashMap;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.common.Types.ReturnType;
/**
* Enum to represent all builtin functions in the default name space.
* Each function is either native or implemented by a DML script. In
* case of DML script, these functions are loaded during parsing. As
* always, user-defined DML-bodied functions take precedence over all
* builtin functions.
*
* To add a new builtin script function, simply add the definition here
* as well as a dml file in scripts/builtin with a matching name. On
* building SystemDS, these scripts are packaged into the jar as well.
*/
public enum Builtins {
// Builtin functions without parameters
ABSTAIN("abstain", true),
ABS("abs", false),
ACOS("acos", false),
ALS("als", true),
ALS_CG("alsCG", true),
ALS_DS("alsDS", true),
ALS_PREDICT("alsPredict", true),
ALS_TOPK_PREDICT("alsTopkPredict", true),
APPLY_PIPELINE("apply_pipeline", true),
APPLY_SCHEMA("applySchema", false),
ARIMA("arima", true),
ASIN("asin", false),
ATAN("atan", false),
AUC("auc", true),
AUTOENCODER2LAYER("autoencoder_2layer", true),
AVG_POOL("avg_pool", false),
AVG_POOL_BACKWARD("avg_pool_backward", false),
BATCH_NORM2D("batch_norm2d", false, ReturnType.MULTI_RETURN),
BATCH_NORM2D_BACKWARD("batch_norm2d_backward", false, ReturnType.MULTI_RETURN),
BIASADD("bias_add", false),
BIASMULT("bias_multiply", false),
BANDIT("bandit", true),
BITWAND("bitwAnd", false),
BITWOR("bitwOr", false),
BITWXOR("bitwXor", false),
BITWSHIFTL("bitwShiftL", false),
BITWSHIFTR("bitwShiftR", false),
BIVAR("bivar", true),
CAST_AS_BOOLEAN("as.logical", "as.boolean", false),
CAST_AS_DOUBLE("as.double", false),
CAST_AS_FRAME("as.frame", false),
CAST_AS_INT("as.integer", false),
CAST_AS_LIST("as.list", false),
CAST_AS_MATRIX("as.matrix", false),
CAST_AS_SCALAR("as.scalar", "castAsScalar", false),
CBIND("cbind", "append", false),
CEIL("ceil", "ceiling", false),
CHOLESKY("cholesky", false),
COL_COUNT_DISTINCT("colCountDistinct",false),
COLMAX("colMaxs", false),
COLMEAN("colMeans", false),
COLMIN("colMins", false),
COLNAMES("colnames", false),
COLPROD("colProds", false),
COLSD("colSds", false),
COLSUM("colSums", false),
COLVAR("colVars", false),
COMPONENTS("components", true),
COMPRESS("compress", false, ReturnType.MULTI_RETURN),
CONFUSIONMATRIX("confusionMatrix", true),
CONV2D("conv2d", false),
CONV2D_BACKWARD_FILTER("conv2d_backward_filter", false),
CONV2D_BACKWARD_DATA("conv2d_backward_data", false),
COR("cor", true),
CORRECTTYPOS("correctTypos", true),
CORRECTTYPOSAPPLY("correctTyposApply", true),
COS("cos", false),
COSH("cosh", false),
COV("cov", false),
COX("cox", true),
CSPLINE("cspline", true),
CSPLINE_CG("csplineCG", true),
CSPLINE_DS("csplineDS", true),
CUMMAX("cummax", false),
CUMMIN("cummin", false),
CUMPROD("cumprod", false),
CUMSUM("cumsum", false),
CUMSUMPROD("cumsumprod", false),
DBSCAN("dbscan", true),
DBSCANAPPLY("dbscanApply", true),
DECISIONTREE("decisionTree", true),
DECISIONTREEPREDICT("decisionTreePredict", true),
DECOMPRESS("decompress", false),
DEEPWALK("deepWalk", true),
DETECTSCHEMA("detectSchema", false),
DENIALCONSTRAINTS("denialConstraints", true),
DIFFERENCESTATISTICS("differenceStatistics", true),
DIAG("diag", false),
DISCOVER_FD("discoverFD", true),
DISCOVER_MD("mdedup", true),
SETDIFF("setdiff", true),
DIST("dist", true),
DMV("dmv", true),
DROP_INVALID_TYPE("dropInvalidType", false),
DROP_INVALID_LENGTH("dropInvalidLength", false),
EIGEN("eigen", false, ReturnType.MULTI_RETURN),
EMA("ema", true),
EXISTS("exists", false),
EXECUTE_PIPELINE("executePipeline", true),
EXP("exp", false),
EVAL("eval", false),
EVALLIST("evalList", false),
F1SCORE("f1Score", true),
FDR("fdr", "FDR", true),
FIT_PIPELINE("fit_pipeline", true),
FIX_INVALID_LENGTHS("fixInvalidLengths", true),
FIX_INVALID_LENGTHS_APPLY("fixInvalidLengthsApply", true),
FFT("fft", false, ReturnType.MULTI_RETURN),
FFT_LINEARIZED("fft_linearized", false, ReturnType.MULTI_RETURN),
FF_TRAIN("ffTrain", true),
FF_PREDICT("ffPredict", true),
FLOOR("floor", false),
FLATTENQUANTILE("flattenQuantile", true),
FRAME_SORT("frameSort", true),
FRAME_ROW_REPLICATE("freplicate", false),
FREQUENCYENCODE("frequencyEncode", true),
FREQUENCYENCODEAPPLY("frequencyEncodeApply", true),
GARCH("garch", true),
GAUSSIAN_CLASSIFIER("gaussianClassifier", true),
GET_ACCURACY("getAccuracy", true),
GLM("glm", true),
GLM_PREDICT("glmPredict", true),
GMM("gmm", true),
GMM_PREDICT("gmmPredict", true),
GNMF("gnmf", true),
GRID_SEARCH("gridSearch", true),
TOPK_CLEANING("topk_cleaning", true),
HOSPITAL_RESIDENCY_MATCH("hospitalResidencyMatch", true),
HYPERBAND("hyperband", true),
IFELSE("ifelse", false),
IFFT("ifft", false, ReturnType.MULTI_RETURN),
IFFT_LINEARIZED("ifft_linearized", false, ReturnType.MULTI_RETURN),
IMG_MIRROR("img_mirror", true),
IMG_MIRROR_LINEARIZED("img_mirror_linearized", true),
IMG_BRIGHTNESS("img_brightness", true),
IMG_BRIGHTNESS_LINEARIZED("img_brightness_linearized", true),
IMG_CROP("img_crop", true),
IMG_CROP_LINEARIZED("img_crop_linearized", true),
IMG_TRANSFORM("img_transform", true),
IMG_TRANSFORM_LINEARIZED("img_transform_linearized", true),
IMG_TRANSLATE("img_translate", true),
IMG_TRANSLATE_LINEARIZED("img_translate_linearized", true),
IMG_ROTATE("img_rotate", true),
IMG_ROTATE_LINEARIZED("img_rotate_linearized", true),
IMG_SHEAR("img_shear", true),
IMG_SHEAR_LINEARIZED("img_shear_linearized", true),
IMG_CUTOUT("img_cutout", true),
IMG_CUTOUT_LINEARIZED("img_cutout_linearized", true),
IMG_SAMPLE_PAIRING("img_sample_pairing", true),
IMG_SAMPLE_PAIRING_LINEARIZED("img_sample_pairing_linearized", true),
IMG_INVERT("img_invert", true),
IMG_INVERT_LINEARIZED("img_invert_linearized", true),
IMG_POSTERIZE("img_posterize", true),
IMG_POSTERIZE_LINEARIZED("img_posterize_linearized", true),
IMPURITY_MEASURES("impurityMeasures", true),
IMPUTE_BY_KNN("imputeByKNN", true),
IMPUTE_BY_MEAN("imputeByMean", true),
IMPUTE_BY_MEAN_APPLY("imputeByMeanApply", true),
IMPUTE_BY_MEDIAN("imputeByMedian", true),
IMPUTE_BY_MEDIAN_APPLY("imputeByMedianApply", true),
IMPUTE_BY_MODE("imputeByMode", true),
IMPUTE_BY_MODE_APPLY("imputeByModeApply", true),
IMPUTE_FD("imputeByFD", true),
IMPUTE_FD_APPLY("imputeByFDApply", true),
INCSLICELINE("incSliceLine", true),
INTERQUANTILE("interQuantile", false),
INTERSECT("intersect", true),
INVERSE("inv", "inverse", false),
IQM("interQuartileMean", false),
ISNA("is.na", "isNA", false),
ISNAN("is.nan", "isNaN", false),
ISINF("is.infinite", "isInf", false),
KM("km", true),
KMEANS("kmeans", true),
KMEANSPREDICT("kmeansPredict", true),
KNNBF("knnbf", true),
KNNGRAPH("knnGraph", true),
KNN("knn", true),
L2SVM("l2svm", true),
L2SVMPREDICT("l2svmPredict", true),
LASSO("lasso", true),
LENET_TRAIN("lenetTrain", true),
LENET_PREDICT("lenetPredict", true),
LENGTH("length", false),
LINEAGE("lineage", false),
LIST("list", false), //note: builtin and parbuiltin
LM("lm", true),
LMCG("lmCG", true),
LMDS("lmDS", true),
LMPREDICT("lmPredict", true),
LMPREDICT_STATS("lmPredictStats", true),
LOCAL("local", false),
LOG("log", false),
LOGSUMEXP("logSumExp", true),
LSTM("lstm", false, ReturnType.MULTI_RETURN),
LSTM_BACKWARD("lstm_backward", false, ReturnType.MULTI_RETURN),
LU("lu", false, ReturnType.MULTI_RETURN),
MAP("map", false),
MATRIXPROFILE("matrixProfile", true),
MAX("max", "pmax", false),
MAX_POOL("max_pool", false),
MAX_POOL_BACKWARD("max_pool_backward", false),
MCC("mcc", true),
MAE("mae", true),
MAPE("mape", true),
MEAN("mean", "avg", false),
MEDIAN("median", false),
MICE("mice", true),
MICE_APPLY("miceApply", true),
MIN("min", "pmin", false),
MOMENT("moment", "centralMoment", false),
MSE("mse", true),
MSMAPE("msmape", true),
MSVM("msvm", true),
MSVMPREDICT("msvmPredict", true),
MULTILOGREG("multiLogReg", true),
MULTILOGREGPREDICT("multiLogRegPredict", true),
NA_LOCF("na_locf", true),
NAIVEBAYES("naiveBayes", true, false),
NAIVEBAYESPREDICT("naiveBayesPredict", true, false),
NCOL("ncol", false),
NORMALIZE("normalize", true),
NORMALIZEAPPLY("normalizeApply", true),
NROW("nrow", false),
NRMSE("nrmse", true),
OUTER("outer", false),
OUTLIER("outlier", true, false), //TODO parameterize opposite
OUTLIER_ARIMA("outlierByArima",true),
OUTLIER_IQR("outlierByIQR", true),
OUTLIER_IQR_APPLY("outlierByIQRApply", true),
OUTLIER_SD("outlierBySd", true),
OUTLIER_SD_APPLY("outlierBySdApply", true),
PAGERANK("pageRank", true),
PCA("pca", true),
PCAINVERSE("pcaInverse", true),
PCATRANSFORM("pcaTransform", true),
PNMF("pnmf", true),
PPCA("ppca", true),
PPRED("ppred", false),
PROD("prod", false),
PSNR("psnr", true),
QR("qr", false, ReturnType.MULTI_RETURN),
QUANTILE("quantile", false),
QUANTIZEBYCLUSTER("quantizeByCluster", true),
RANDOM_FOREST("randomForest", true),
RANDOM_FOREST_PREDICT("randomForestPredict", true),
RANGE("range", false),
RAGROUPBY("raGroupby", true),
RAJOIN("raJoin", true),
RASELECTION("raSelection", true),
RBIND("rbind", false),
RCM("rowClassMeet", "rcm", false, false, ReturnType.MULTI_RETURN),
REMOVE("remove", false, ReturnType.MULTI_RETURN),
REV("rev", false),
ROUND("round", false),
ROW_COUNT_DISTINCT("rowCountDistinct",false),
ROWINDEXMAX("rowIndexMax", false),
ROWINDEXMIN("rowIndexMin", false),
ROWMAX("rowMaxs", false),
ROWMEAN("rowMeans", false),
ROWMIN("rowMins", false),
ROWPROD("rowProds", false),
ROWSD("rowSds", false),
ROWSUM("rowSums", false),
ROWVAR("rowVars", false),
RMSE("rmse", true),
SAMPLE("sample", false),
SD("sd", false),
SELVARTHRESH("selectByVarThresh", true),
SEQ("seq", false),
SYMMETRICDIFFERENCE("symmetricDifference", true),
SHERLOCK("sherlock", true),
SHERLOCKPREDICT("sherlockPredict", true),
SHORTESTPATH("shortestPath", true),
SIGMOID("sigmoid", true), // 1 / (1 + exp(-X))
SIGN("sign", false),
SIN("sin", false),
SINH("sinh", false),
SLICEFINDER("slicefinder", true), //TODO remove
SLICELINE("sliceLine", true),
SLICELINE_DEBUG("sliceLineDebug", true),
SKEWNESS("skewness", true),
SMAPE("smape", true),
SMOTE("smote", true),
SOFTMAX("softmax", true),
SOLVE("solve", false),
SPLIT("split", true),
SPLIT_BALANCED("splitBalanced", true),
STABLE_MARRIAGE("stableMarriage", true),
STATSNA("statsNA", true),
STRATSTATS("stratstats", true),
STEPLM("steplm",true, ReturnType.MULTI_RETURN),
STFT("stft", false, ReturnType.MULTI_RETURN),
SQRT("sqrt", false),
SUM("sum", false),
SVD("svd", false, ReturnType.MULTI_RETURN),
TABLE("table", "ctable", false),
TAN("tan", false),
TANH("tanh", false),
TO_ONE_HOT("toOneHot", true),
TOMEKLINK("tomeklink", true),
TRACE("trace", false),
TRANS("t", false),
TSNE("tSNE", true),
TYPEOF("typeof", false),
UNIVAR("univar", true),
UNION("union", true),
VAR("var", false),
VALUE_SWAP("valueSwap", false),
VECTOR_TO_CSV("vectorToCsv", true),
WER("wer", true, false),
WINSORIZE("winsorize", true, false), //TODO parameterize w/ prob, min/max val
WINSORIZEAPPLY("winsorizeApply", true, false), //TODO parameterize w/ prob, min/max val
WOE("WoE", true ),
WOEAPPLY("WoEApply", true ),
XGBOOST("xgboost", true),
XGBOOSTPREDICT("xgboostPredict", true),
XGBOOSTPREDICT_REG("xgboostPredictRegression", true),
XGBOOSTPREDICT_CLASS("xgboostPredictClassification", true),
XOR("xor", false),
// Parameterized functions with parameters
AUTODIFF("autoDiff", false, true),
CDF("cdf", false, true),
CONTAINS("contains", false, true),
COUNT_DISTINCT("countDistinct",false, true),
COUNT_DISTINCT_APPROX("countDistinctApprox", false, true),
COUNT_DISTINCT_APPROX_ROW("rowCountDistinctApprox", false, true),
COUNT_DISTINCT_APPROX_COL("colCountDistinctApprox", false, true),
CVLM("cvlm", true, false),
GROUPEDAGG("aggregate", "groupedAggregate", false, true),
INVCDF("icdf", false, true),
LISTNV("list", false, true), //note: builtin and parbuiltin
LOWER_TRI("lower.tri", false, true),
ORDER("order", false, true),
PARAMSERV("paramserv", false, true),
PCHISQ("pchisq", false, true),
PEXP("pexp", false, true),
PF("pf", false, true),
PNORM("pnorm", false, true),
PT("pt", false, true),
QCHISQ("qchisq", false, true),
QEXP("qexp", false, true),
QF("qf", false, true),
QNORM("qnorm", false, true),
QT("qt", false, true),
REPLACE("replace", false, true),
RMEMPTY("removeEmpty", false, true),
SCALE("scale", true, false),
SCALEAPPLY("scaleApply", true, false),
SCALE_MINMAX("scaleMinMax", true, false),
TIME("time", false),
TOKENIZE("tokenize", false, true),
TOSTRING("toString", false, true),
TRANSFORMAPPLY("transformapply", false, true),
TRANSFORMCOLMAP("transformcolmap", false, true),
TRANSFORMDECODE("transformdecode", false, true),
TRANSFORMENCODE("transformencode", false, true),
TRANSFORMMETA("transformmeta", false, true),
UNDER_SAMPLING("underSampling", true),
UNIQUE("unique", false, true),
UPPER_TRI("upper.tri", false, true),
XDUMMY1("xdummy1", true), //error handling test
XDUMMY2("xdummy2", true); //error handling test
Builtins(String name, boolean script) {
this(name, null, script, false, ReturnType.SINGLE_RETURN);
}
Builtins(String name, boolean script, ReturnType retType) {
this(name, null, script, false, retType);
}
Builtins(String name, boolean script, boolean parameterized) {
this(name, null, script, parameterized, ReturnType.SINGLE_RETURN);
}
Builtins(String name, String alias, boolean script) {
this(name, alias, script, false, ReturnType.SINGLE_RETURN);
}
Builtins(String name, String alias, boolean script, boolean parameterized) {
this(name, alias, script, parameterized, ReturnType.SINGLE_RETURN);
}
Builtins(String name, String alias, boolean script, boolean parameterized, ReturnType retType) {
_name = name;
_alias = alias;
_script = script;
_parameterized = parameterized;
_retType = retType;
}
private final static String BUILTIN_DIR = "scripts/builtin/";
private final static HashMap<String, Builtins> _map = new HashMap<>();
static {
//materialize lookup map for all builtin names
for( Builtins b : EnumSet.allOf(Builtins.class) ) {
_map.put(b.getName(), b);
if( b.getAlias() != null )
_map.put(b.getAlias(), b);
}
}
private final String _name;
private final String _alias;
private final boolean _script;
private final boolean _parameterized;
private final ReturnType _retType;
public String getName() {
return _name;
}
public String getAlias() {
return _alias;
}
public boolean isScript() {
return _script;
}
public boolean isParameterized() {
return _parameterized;
}
public boolean isMultiReturn() {
return _retType == ReturnType.MULTI_RETURN;
}
public static boolean contains(String name, boolean script, boolean parameterized) {
Builtins tmp = get(name);
return tmp != null && script == tmp.isScript()
&& parameterized == tmp.isParameterized();
}
public static Builtins get(String name) {
if( name.equals("list") )
return LIST; //unparameterized
return _map.get(name);
}
public static Builtins get(String name, boolean params) {
if( name.equals("list") )
return params ? LISTNV : LIST;
Builtins tmp = get(name);
return tmp != null && (params == tmp.isParameterized()) ? tmp : null;
}
public static String getFilePath(String name) {
StringBuilder sb = new StringBuilder();
sb.append(BUILTIN_DIR);
sb.append(name);
sb.append(".dml");
return sb.toString();
}
public static String getInternalFName(String name, DataType dt) {
return !contains(name, true, false) ? name : // private builtin
(dt.isMatrix() ? "m_" : // public builtin
dt.isFrame() ? "f_" : "s_") + name;
}
}