blob: 2658e5e46052867b3f4c085f7b72e5694d32de00 [file] [log] [blame]
-----------------------------------------------------------------------------
-- Hivemall: Hive scalable Machine Learning Library
-----------------------------------------------------------------------------
drop temporary function if exists hivemall_version;
create temporary function hivemall_version as 'hivemall.HivemallVersionUDF';
---------------------------
-- binary classification --
---------------------------
drop temporary function if exists train_classifier;
create temporary function train_classifier as 'hivemall.classifier.GeneralClassifierUDTF';
drop temporary function if exists train_perceptron;
create temporary function train_perceptron as 'hivemall.classifier.PerceptronUDTF';
drop temporary function if exists train_pa;
create temporary function train_pa as 'hivemall.classifier.PassiveAggressiveUDTF';
drop temporary function if exists train_pa1;
create temporary function train_pa1 as 'hivemall.classifier.PassiveAggressiveUDTF$PA1';
drop temporary function if exists train_pa2;
create temporary function train_pa2 as 'hivemall.classifier.PassiveAggressiveUDTF$PA2';
drop temporary function if exists train_cw;
create temporary function train_cw as 'hivemall.classifier.ConfidenceWeightedUDTF';
drop temporary function if exists train_arow;
create temporary function train_arow as 'hivemall.classifier.AROWClassifierUDTF';
drop temporary function if exists train_arowh;
create temporary function train_arowh as 'hivemall.classifier.AROWClassifierUDTF$AROWh';
drop temporary function if exists train_scw;
create temporary function train_scw as 'hivemall.classifier.SoftConfideceWeightedUDTF$SCW1';
drop temporary function if exists train_scw2;
create temporary function train_scw2 as 'hivemall.classifier.SoftConfideceWeightedUDTF$SCW2';
drop temporary function if exists train_adagrad_rda;
create temporary function train_adagrad_rda as 'hivemall.classifier.AdaGradRDAUDTF';
drop temporary function if exists train_kpa;
create temporary function train_kpa as 'hivemall.classifier.KernelExpansionPassiveAggressiveUDTF';
drop temporary function if exists kpa_predict;
create temporary function kpa_predict as 'hivemall.classifier.KPAPredictUDAF';
--------------------------------
-- Multiclass classification --
--------------------------------
drop temporary function if exists train_multiclass_perceptron;
create temporary function train_multiclass_perceptron as 'hivemall.classifier.multiclass.MulticlassPerceptronUDTF';
drop temporary function if exists train_multiclass_pa;
create temporary function train_multiclass_pa as 'hivemall.classifier.multiclass.MulticlassPassiveAggressiveUDTF';
drop temporary function if exists train_multiclass_pa1;
create temporary function train_multiclass_pa1 as 'hivemall.classifier.multiclass.MulticlassPassiveAggressiveUDTF$PA1';
drop temporary function if exists train_multiclass_pa2;
create temporary function train_multiclass_pa2 as 'hivemall.classifier.multiclass.MulticlassPassiveAggressiveUDTF$PA2';
drop temporary function if exists train_multiclass_cw;
create temporary function train_multiclass_cw as 'hivemall.classifier.multiclass.MulticlassConfidenceWeightedUDTF';
drop temporary function if exists train_multiclass_arow;
create temporary function train_multiclass_arow as 'hivemall.classifier.multiclass.MulticlassAROWClassifierUDTF';
drop temporary function if exists train_multiclass_arowh;
create temporary function train_multiclass_arowh as 'hivemall.classifier.multiclass.MulticlassAROWClassifierUDTF$AROWh';
drop temporary function if exists train_multiclass_scw;
create temporary function train_multiclass_scw as 'hivemall.classifier.multiclass.MulticlassSoftConfidenceWeightedUDTF$SCW1';
drop temporary function if exists train_multiclass_scw2;
create temporary function train_multiclass_scw2 as 'hivemall.classifier.multiclass.MulticlassSoftConfidenceWeightedUDTF$SCW2';
--------------------------
-- similarity functions --
--------------------------
drop temporary function if exists cosine_similarity;
create temporary function cosine_similarity as 'hivemall.knn.similarity.CosineSimilarityUDF';
drop temporary function if exists jaccard_similarity;
create temporary function jaccard_similarity as 'hivemall.knn.similarity.JaccardIndexUDF';
drop temporary function if exists angular_similarity;
create temporary function angular_similarity as 'hivemall.knn.similarity.AngularSimilarityUDF';
drop temporary function if exists euclid_similarity;
create temporary function euclid_similarity as 'hivemall.knn.similarity.EuclidSimilarity';
drop temporary function if exists distance2similarity;
create temporary function distance2similarity as 'hivemall.knn.similarity.Distance2SimilarityUDF';
drop temporary function if exists dimsum_mapper;
create temporary function dimsum_mapper as 'hivemall.knn.similarity.DIMSUMMapperUDTF';
------------------------
-- distance functions --
------------------------
drop temporary function if exists popcnt;
create temporary function popcnt as 'hivemall.knn.distance.PopcountUDF';
drop temporary function if exists kld;
create temporary function kld as 'hivemall.knn.distance.KLDivergenceUDF';
drop temporary function if exists hamming_distance;
create temporary function hamming_distance as 'hivemall.knn.distance.HammingDistanceUDF';
drop temporary function if exists euclid_distance;
create temporary function euclid_distance as 'hivemall.knn.distance.EuclidDistanceUDF';
drop temporary function if exists cosine_distance;
create temporary function cosine_distance as 'hivemall.knn.distance.CosineDistanceUDF';
drop temporary function if exists angular_distance;
create temporary function angular_distance as 'hivemall.knn.distance.AngularDistanceUDF';
drop temporary function if exists jaccard_distance;
create temporary function jaccard_distance as 'hivemall.knn.distance.JaccardDistanceUDF';
drop temporary function if exists manhattan_distance;
create temporary function manhattan_distance as 'hivemall.knn.distance.ManhattanDistanceUDF';
drop temporary function if exists minkowski_distance;
create temporary function minkowski_distance as 'hivemall.knn.distance.MinkowskiDistanceUDF';
-------------------
-- LSH functions --
-------------------
drop temporary function if exists minhashes;
create temporary function minhashes as 'hivemall.knn.lsh.MinHashesUDF';
drop temporary function if exists minhash;
create temporary function minhash as 'hivemall.knn.lsh.MinHashUDTF';
drop temporary function if exists bbit_minhash;
create temporary function bbit_minhash as 'hivemall.knn.lsh.bBitMinHashUDF';
----------------------
-- voting functions --
----------------------
drop temporary function if exists voted_avg;
create temporary function voted_avg as 'hivemall.ensemble.bagging.VotedAvgUDAF';
drop temporary function if exists weight_voted_avg;
create temporary function weight_voted_avg as 'hivemall.ensemble.bagging.WeightVotedAvgUDAF';
--------------------
-- misc functions --
--------------------
drop temporary function if exists max_label;
create temporary function max_label as 'hivemall.ensemble.MaxValueLabelUDAF';
drop temporary function if exists maxrow;
create temporary function maxrow as 'hivemall.ensemble.MaxRowUDAF';
drop temporary function if exists argmin_kld;
create temporary function argmin_kld as 'hivemall.ensemble.ArgminKLDistanceUDAF';
-----------------------
-- hashing functions --
-----------------------
drop temporary function if exists mhash;
create temporary function mhash as 'hivemall.ftvec.hashing.MurmurHash3UDF';
drop temporary function if exists array_hash_values;
create temporary function array_hash_values as 'hivemall.ftvec.hashing.ArrayHashValuesUDF';
drop temporary function if exists prefixed_hash_values;
create temporary function prefixed_hash_values as 'hivemall.ftvec.hashing.ArrayPrefixedHashValuesUDF';
drop temporary function if exists feature_hashing;
create temporary function feature_hashing as 'hivemall.ftvec.hashing.FeatureHashingUDF';
-----------------------
-- pairing functions --
-----------------------
drop temporary function if exists polynomial_features;
create temporary function polynomial_features as 'hivemall.ftvec.pairing.PolynomialFeaturesUDF';
drop temporary function if exists powered_features;
create temporary function powered_features as 'hivemall.ftvec.pairing.PoweredFeaturesUDF';
drop temporary function if exists feature_pairs;
create temporary function feature_pairs as 'hivemall.ftvec.pairing.FeaturePairsUDTF';
-----------------------
-- scaling functions --
-----------------------
drop temporary function if exists rescale;
create temporary function rescale as 'hivemall.ftvec.scaling.RescaleUDF';
drop temporary function if exists zscore;
create temporary function zscore as 'hivemall.ftvec.scaling.ZScoreUDF';
drop temporary function if exists l1_normalize;
create temporary function l1_normalize as 'hivemall.ftvec.scaling.L1NormalizationUDF';
drop temporary function if exists l2_normalize;
create temporary function l2_normalize as 'hivemall.ftvec.scaling.L2NormalizationUDF';
---------------------------------
-- Feature Selection functions --
---------------------------------
drop temporary function if exists chi2;
create temporary function chi2 as 'hivemall.ftvec.selection.ChiSquareUDF';
drop temporary function if exists snr;
create temporary function snr as 'hivemall.ftvec.selection.SignalNoiseRatioUDAF';
-----------------------------------
-- Feature engineering functions --
-----------------------------------
drop temporary function if exists amplify;
create temporary function amplify as 'hivemall.ftvec.amplify.AmplifierUDTF';
drop temporary function if exists rand_amplify;
create temporary function rand_amplify as 'hivemall.ftvec.amplify.RandomAmplifierUDTF';
drop temporary function if exists add_bias;
create temporary function add_bias as 'hivemall.ftvec.AddBiasUDF';
drop temporary function if exists sort_by_feature;
create temporary function sort_by_feature as 'hivemall.ftvec.SortByFeatureUDF';
drop temporary function if exists extract_feature;
create temporary function extract_feature as 'hivemall.ftvec.ExtractFeatureUDF';
drop temporary function if exists extract_weight;
create temporary function extract_weight as 'hivemall.ftvec.ExtractWeightUDF';
drop temporary function if exists add_feature_index;
create temporary function add_feature_index as 'hivemall.ftvec.AddFeatureIndexUDF';
drop temporary function if exists feature;
create temporary function feature as 'hivemall.ftvec.FeatureUDF';
drop temporary function if exists feature_index;
create temporary function feature_index as 'hivemall.ftvec.FeatureIndexUDF';
----------------------------------
-- feature converting functions --
----------------------------------
drop temporary function if exists conv2dense;
create temporary function conv2dense as 'hivemall.ftvec.conv.ConvertToDenseModelUDAF';
drop temporary function if exists to_dense_features;
create temporary function to_dense_features as 'hivemall.ftvec.conv.ToDenseFeaturesUDF';
-- alias
drop temporary function if exists to_dense;
create temporary function to_dense as 'hivemall.ftvec.conv.ToDenseFeaturesUDF';
drop temporary function if exists to_sparse_features;
create temporary function to_sparse_features as 'hivemall.ftvec.conv.ToSparseFeaturesUDF';
-- alias
drop temporary function if exists to_sparse;
create temporary function to_sparse as 'hivemall.ftvec.conv.ToSparseFeaturesUDF';
drop temporary function if exists quantify;
create temporary function quantify as 'hivemall.ftvec.conv.QuantifyColumnsUDTF';
drop temporary function if exists build_bins;
create temporary function build_bins as 'hivemall.ftvec.binning.BuildBinsUDAF';
drop temporary function if exists feature_binning;
create temporary function feature_binning as 'hivemall.ftvec.binning.FeatureBinningUDF';
drop temporary function if exists to_libsvm_format;
create temporary function to_libsvm_format as 'hivemall.ftvec.conv.ToLibSVMFormatUDF';
--------------------------
-- feature transformers --
--------------------------
drop temporary function if exists vectorize_features;
create temporary function vectorize_features as 'hivemall.ftvec.trans.VectorizeFeaturesUDF';
drop temporary function if exists categorical_features;
create temporary function categorical_features as 'hivemall.ftvec.trans.CategoricalFeaturesUDF';
drop temporary function if exists ffm_features;
create temporary function ffm_features as 'hivemall.ftvec.trans.FFMFeaturesUDF';
drop temporary function if exists indexed_features;
create temporary function indexed_features as 'hivemall.ftvec.trans.IndexedFeatures';
drop temporary function if exists quantified_features;
create temporary function quantified_features as 'hivemall.ftvec.trans.QuantifiedFeaturesUDTF';
drop temporary function if exists quantitative_features;
create temporary function quantitative_features as 'hivemall.ftvec.trans.QuantitativeFeaturesUDF';
drop temporary function if exists binarize_label;
create temporary function binarize_label as 'hivemall.ftvec.trans.BinarizeLabelUDTF';
drop temporary function if exists onehot_encoding;
create temporary function onehot_encoding as 'hivemall.ftvec.trans.OnehotEncodingUDAF';
drop temporary function if exists add_field_indices;
create temporary function add_field_indices as 'hivemall.ftvec.trans.AddFieldIndicesUDF';
-- alias for backward compatibility
drop temporary function if exists add_field_indicies;
create temporary function add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF';
------------------------------
-- ranking helper functions --
------------------------------
drop temporary function if exists bpr_sampling;
create temporary function bpr_sampling as 'hivemall.ftvec.ranking.BprSamplingUDTF';
drop temporary function if exists item_pairs_sampling;
create temporary function item_pairs_sampling as 'hivemall.ftvec.ranking.ItemPairsSamplingUDTF';
drop temporary function if exists populate_not_in;
create temporary function populate_not_in as 'hivemall.ftvec.ranking.PopulateNotInUDTF';
--------------------------
-- ftvec/text functions --
--------------------------
drop temporary function if exists tf;
create temporary function tf as 'hivemall.ftvec.text.TermFrequencyUDAF';
drop temporary function if exists bm25;
create temporary function bm25 as 'hivemall.ftvec.text.OkapiBM25UDF';
--------------------------
-- Regression functions --
--------------------------
drop temporary function if exists train_regressor;
create temporary function train_regressor as 'hivemall.regression.GeneralRegressorUDTF';
drop temporary function if exists logress;
create temporary function logress as 'hivemall.regression.LogressUDTF';
drop temporary function if exists train_logistic_regr;
create temporary function train_logistic_regr as 'hivemall.regression.LogressUDTF';
drop temporary function if exists train_pa1_regr;
create temporary function train_pa1_regr as 'hivemall.regression.PassiveAggressiveRegressionUDTF';
drop temporary function if exists train_pa1a_regr;
create temporary function train_pa1a_regr as 'hivemall.regression.PassiveAggressiveRegressionUDTF$PA1a';
drop temporary function if exists train_pa2_regr;
create temporary function train_pa2_regr as 'hivemall.regression.PassiveAggressiveRegressionUDTF$PA2';
drop temporary function if exists train_pa2a_regr;
create temporary function train_pa2a_regr as 'hivemall.regression.PassiveAggressiveRegressionUDTF$PA2a';
drop temporary function if exists train_arow_regr;
create temporary function train_arow_regr as 'hivemall.regression.AROWRegressionUDTF';
drop temporary function if exists train_arowe_regr;
create temporary function train_arowe_regr as 'hivemall.regression.AROWRegressionUDTF$AROWe';
drop temporary function if exists train_arowe2_regr;
create temporary function train_arowe2_regr as 'hivemall.regression.AROWRegressionUDTF$AROWe2';
drop temporary function if exists train_adagrad_regr;
create temporary function train_adagrad_regr as 'hivemall.regression.AdaGradUDTF';
drop temporary function if exists train_adadelta_regr;
create temporary function train_adadelta_regr as 'hivemall.regression.AdaDeltaUDTF';
---------------------
-- array functions --
---------------------
drop temporary function if exists float_array;
create temporary function float_array as 'hivemall.tools.array.AllocFloatArrayUDF';
drop temporary function if exists array_remove;
create temporary function array_remove as 'hivemall.tools.array.ArrayRemoveUDF';
drop temporary function if exists sort_and_uniq_array;
create temporary function sort_and_uniq_array as 'hivemall.tools.array.SortAndUniqArrayUDF';
drop temporary function if exists subarray_endwith;
create temporary function subarray_endwith as 'hivemall.tools.array.SubarrayEndWithUDF';
drop temporary function if exists subarray_startwith;
create temporary function subarray_startwith as 'hivemall.tools.array.SubarrayStartWithUDF';
drop temporary function if exists array_concat;
create temporary function array_concat as 'hivemall.tools.array.ArrayConcatUDF';
-- alias for backward compatibility
drop temporary function if exists concat_array;
create temporary function concat_array as 'hivemall.tools.array.ArrayConcatUDF';
drop temporary function if exists subarray;
create temporary function subarray as 'hivemall.tools.array.SubarrayUDF';
drop temporary function if exists array_slice;
create temporary function array_slice as 'hivemall.tools.array.ArraySliceUDF';
drop temporary function if exists array_avg;
create temporary function array_avg as 'hivemall.tools.array.ArrayAvgGenericUDAF';
drop temporary function if exists array_sum;
create temporary function array_sum as 'hivemall.tools.array.ArraySumUDAF';
drop temporary function if exists to_string_array;
create temporary function to_string_array as 'hivemall.tools.array.ToStringArrayUDF';
drop temporary function if exists array_intersect;
create temporary function array_intersect as 'hivemall.tools.array.ArrayIntersectUDF';
drop temporary function if exists select_k_best;
create temporary function select_k_best as 'hivemall.tools.array.SelectKBestUDF';
drop temporary function if exists array_append;
create temporary function array_append as 'hivemall.tools.array.ArrayAppendUDF';
drop temporary function if exists element_at;
create temporary function element_at as 'hivemall.tools.array.ArrayElementAtUDF';
drop temporary function if exists array_union;
create temporary function array_union as 'hivemall.tools.array.ArrayUnionUDF';
drop temporary function if exists first_element;
create temporary function first_element as 'hivemall.tools.array.FirstElementUDF';
drop temporary function if exists last_element;
create temporary function last_element as 'hivemall.tools.array.LastElementUDF';
drop temporary function if exists array_flatten;
create temporary function array_flatten as 'hivemall.tools.array.ArrayFlattenUDF';
drop temporary function if exists array_to_str;
create temporary function array_to_str as 'hivemall.tools.array.ArrayToStrUDF';
drop temporary function if exists conditional_emit;
create temporary function conditional_emit as 'hivemall.tools.array.ConditionalEmitUDTF';
drop temporary function if exists argmin;
create temporary function argmin as 'hivemall.tools.array.ArgminUDF';
drop temporary function if exists argmax;
create temporary function argmax as 'hivemall.tools.array.ArgmaxUDF';
drop temporary function if exists arange;
create temporary function arange as 'hivemall.tools.array.ArangeUDF';
drop temporary function if exists argrank;
create temporary function argrank as 'hivemall.tools.array.ArgrankUDF';
drop temporary function if exists argsort;
create temporary function argsort as 'hivemall.tools.array.ArgsortUDF';
-----------------------------
-- bit operation functions --
-----------------------------
drop temporary function if exists bits_collect;
create temporary function bits_collect as 'hivemall.tools.bits.BitsCollectUDAF';
drop temporary function if exists to_bits;
create temporary function to_bits as 'hivemall.tools.bits.ToBitsUDF';
drop temporary function if exists unbits;
create temporary function unbits as 'hivemall.tools.bits.UnBitsUDF';
drop temporary function if exists bits_or;
create temporary function bits_or as 'hivemall.tools.bits.BitsORUDF';
---------------------------
-- compression functions --
---------------------------
drop temporary function if exists inflate;
create temporary function inflate as 'hivemall.tools.compress.InflateUDF';
drop temporary function if exists deflate;
create temporary function deflate as 'hivemall.tools.compress.DeflateUDF';
---------------------
-- map functions --
---------------------
drop temporary function if exists map_get_sum;
create temporary function map_get_sum as 'hivemall.tools.map.MapGetSumUDF';
drop temporary function if exists map_tail_n;
create temporary function map_tail_n as 'hivemall.tools.map.MapTailNUDF';
drop temporary function if exists to_map;
create temporary function to_map as 'hivemall.tools.map.UDAFToMap';
drop temporary function if exists to_ordered_map;
create temporary function to_ordered_map as 'hivemall.tools.map.UDAFToOrderedMap';
drop temporary function if exists map_include_keys;
create temporary function map_include_keys as 'hivemall.tools.map.MapIncludeKeysUDF';
drop temporary function if exists map_exclude_keys;
create temporary function map_exclude_keys as 'hivemall.tools.map.MapExcludeKeysUDF';
drop temporary function if exists map_get;
create temporary function map_get as 'hivemall.tools.map.MapGetUDF';
drop temporary function if exists map_key_values;
create temporary function map_key_values as 'hivemall.tools.map.MapKeyValuesUDF';
drop temporary function if exists map_roulette;
create temporary function map_roulette as 'hivemall.tools.map.MapRouletteUDF';
---------------------
-- list functions --
---------------------
drop temporary function if exists to_ordered_list;
create temporary function to_ordered_list as 'hivemall.tools.list.UDAFToOrderedList';
---------------------
-- Math functions --
---------------------
drop temporary function if exists sigmoid;
create temporary function sigmoid as 'hivemall.tools.math.SigmoidGenericUDF';
drop temporary function if exists l2_norm;
create temporary function l2_norm as 'hivemall.tools.math.L2NormUDAF';
drop temporary function if exists infinity;
create temporary function infinity as 'hivemall.tools.math.InfinityUDF';
drop temporary function if exists is_infinite;
create temporary function is_infinite as 'hivemall.tools.math.IsInfiniteUDF';
drop temporary function if exists is_finite;
create temporary function is_finite as 'hivemall.tools.math.IsFiniteUDF';
drop temporary function if exists nan;
create temporary function nan as 'hivemall.tools.math.NanUDF';
drop temporary function if exists is_nan;
create temporary function is_nan as 'hivemall.tools.math.IsNanUDF';
-----------------------------
-- Matrix/Vector functions --
-----------------------------
drop temporary function if exists transpose_and_dot;
create temporary function transpose_and_dot as 'hivemall.tools.matrix.TransposeAndDotUDAF';
drop temporary function if exists vector_add;
create temporary function vector_add as 'hivemall.tools.vector.VectorAddUDF';
drop temporary function if exists vector_dot;
create temporary function vector_dot as 'hivemall.tools.vector.VectorDotUDF';
----------------------
-- mapred functions --
----------------------
drop temporary function if exists taskid;
create temporary function taskid as 'hivemall.tools.mapred.TaskIdUDF';
drop temporary function if exists jobid;
create temporary function jobid as 'hivemall.tools.mapred.JobIdUDF';
drop temporary function if exists rowid;
create temporary function rowid as 'hivemall.tools.mapred.RowIdUDF';
drop temporary function if exists rownum;
create temporary function rownum as 'hivemall.tools.mapred.RowNumberUDF';
drop temporary function if exists distcache_gets;
create temporary function distcache_gets as 'hivemall.tools.mapred.DistributedCacheLookupUDF';
drop temporary function if exists jobconf_gets;
create temporary function jobconf_gets as 'hivemall.tools.mapred.JobConfGetsUDF';
--------------------
-- JSON functions --
--------------------
drop temporary function if exists to_json;
create temporary function to_json as 'hivemall.tools.json.ToJsonUDF';
drop temporary function if exists from_json;
create temporary function from_json as 'hivemall.tools.json.FromJsonUDF';
----------------------------
-- Sanity Check functions --
----------------------------
drop temporary function if exists assert;
create temporary function assert as 'hivemall.tools.sanity.AssertUDF';
drop temporary function if exists raise_error;
create temporary function raise_error as 'hivemall.tools.sanity.RaiseErrorUDF';
--------------------
-- misc functions --
--------------------
drop temporary function if exists generate_series;
create temporary function generate_series as 'hivemall.tools.GenerateSeriesUDTF';
drop temporary function if exists convert_label;
create temporary function convert_label as 'hivemall.tools.ConvertLabelUDF';
drop temporary function if exists x_rank;
create temporary function x_rank as 'hivemall.tools.RankSequenceUDF';
drop temporary function if exists each_top_k;
create temporary function each_top_k as 'hivemall.tools.EachTopKUDTF';
drop temporary function if exists try_cast;
create temporary function try_cast as 'hivemall.tools.TryCastUDF';
drop temporary function if exists sessionize;
create temporary function sessionize as 'hivemall.tools.datetime.SessionizeUDF';
drop temporary function if exists moving_avg;
create temporary function moving_avg as 'hivemall.tools.timeseries.MovingAverageUDTF';
-------------------------------
-- Text processing functions --
-------------------------------
drop temporary function if exists tokenize;
create temporary function tokenize as 'hivemall.tools.text.TokenizeUDF';
drop temporary function if exists is_stopword;
create temporary function is_stopword as 'hivemall.tools.text.StopwordUDF';
drop temporary function if exists singularize;
create temporary function singularize as 'hivemall.tools.text.SingularizeUDF';
drop temporary function if exists split_words;
create temporary function split_words as 'hivemall.tools.text.SplitWordsUDF';
drop temporary function if exists normalize_unicode;
create temporary function normalize_unicode as 'hivemall.tools.text.NormalizeUnicodeUDF';
drop temporary function if exists base91;
create temporary function base91 as 'hivemall.tools.text.Base91UDF';
drop temporary function if exists unbase91;
create temporary function unbase91 as 'hivemall.tools.text.Unbase91UDF';
drop temporary function if exists word_ngrams;
create temporary function word_ngrams as 'hivemall.tools.text.WordNgramsUDF';
---------------------------------
-- Dataset generator functions --
---------------------------------
drop temporary function if exists lr_datagen;
create temporary function lr_datagen as 'hivemall.dataset.LogisticRegressionDataGeneratorUDTF';
--------------------------
-- Evaluating functions --
--------------------------
drop temporary function if exists f1score;
create temporary function f1score as 'hivemall.evaluation.F1ScoreUDAF';
drop temporary function if exists fmeasure;
create temporary function fmeasure as 'hivemall.evaluation.FMeasureUDAF';
drop temporary function if exists mae;
create temporary function mae as 'hivemall.evaluation.MeanAbsoluteErrorUDAF';
drop temporary function if exists mse;
create temporary function mse as 'hivemall.evaluation.MeanSquaredErrorUDAF';
drop temporary function if exists rmse;
create temporary function rmse as 'hivemall.evaluation.RootMeanSquaredErrorUDAF';
drop temporary function if exists r2;
create temporary function r2 as 'hivemall.evaluation.R2UDAF';
drop temporary function if exists ndcg;
create temporary function ndcg as 'hivemall.evaluation.NDCGUDAF';
drop temporary function if exists precision_at;
create temporary function precision_at as 'hivemall.evaluation.PrecisionUDAF';
drop temporary function if exists recall_at;
create temporary function recall_at as 'hivemall.evaluation.RecallUDAF';
drop temporary function if exists hitrate;
create temporary function hitrate as 'hivemall.evaluation.HitRateUDAF';
drop temporary function if exists mrr;
create temporary function mrr as 'hivemall.evaluation.MRRUDAF';
drop temporary function if exists average_precision;
create temporary function average_precision as 'hivemall.evaluation.MAPUDAF';
drop temporary function if exists auc;
create temporary function auc as 'hivemall.evaluation.AUCUDAF';
drop temporary function if exists logloss;
create temporary function logloss as 'hivemall.evaluation.LogarithmicLossUDAF';
--------------------------
-- Matrix Factorization --
--------------------------
drop temporary function if exists mf_predict;
create temporary function mf_predict as 'hivemall.mf.MFPredictionUDF';
drop temporary function if exists train_mf_sgd;
create temporary function train_mf_sgd as 'hivemall.mf.MatrixFactorizationSGDUDTF';
drop temporary function if exists train_mf_adagrad;
create temporary function train_mf_adagrad as 'hivemall.mf.MatrixFactorizationAdaGradUDTF';
drop temporary function if exists train_bprmf;
create temporary function train_bprmf as 'hivemall.mf.BPRMatrixFactorizationUDTF';
drop temporary function if exists bprmf_predict;
create temporary function bprmf_predict as 'hivemall.mf.BPRMFPredictionUDF';
---------------------------
-- Factorization Machine --
---------------------------
drop temporary function if exists fm_predict;
create temporary function fm_predict as 'hivemall.fm.FMPredictGenericUDAF';
drop temporary function if exists train_fm;
create temporary function train_fm as 'hivemall.fm.FactorizationMachineUDTF';
drop temporary function if exists train_ffm;
create temporary function train_ffm as 'hivemall.fm.FieldAwareFactorizationMachineUDTF';
drop temporary function if exists ffm_predict;
create temporary function ffm_predict as 'hivemall.fm.FFMPredictGenericUDAF';
---------------------------
-- Anomaly Detection ------
---------------------------
drop temporary function if exists changefinder;
create temporary function changefinder as 'hivemall.anomaly.ChangeFinderUDF';
drop temporary function if exists sst;
create temporary function sst as 'hivemall.anomaly.SingularSpectrumTransformUDF';
--------------------
-- Topic Modeling --
--------------------
drop temporary function if exists train_lda;
create temporary function train_lda as 'hivemall.topicmodel.LDAUDTF';
drop temporary function if exists lda_predict;
create temporary function lda_predict as 'hivemall.topicmodel.LDAPredictUDAF';
drop temporary function if exists train_plsa;
create temporary function train_plsa as 'hivemall.topicmodel.PLSAUDTF';
drop temporary function if exists plsa_predict;
create temporary function plsa_predict as 'hivemall.topicmodel.PLSAPredictUDAF';
---------------------------
-- Geo-Spatial functions --
---------------------------
drop temporary function if exists tile;
create temporary function tile as 'hivemall.geospatial.TileUDF';
drop temporary function if exists map_url;
create temporary function map_url as 'hivemall.geospatial.MapURLUDF';
drop temporary function if exists lat2tiley;
create temporary function lat2tiley as 'hivemall.geospatial.Lat2TileYUDF';
drop temporary function if exists lon2tilex;
create temporary function lon2tilex as 'hivemall.geospatial.Lon2TileXUDF';
drop temporary function if exists tilex2lon;
create temporary function tilex2lon as 'hivemall.geospatial.TileX2LonUDF';
drop temporary function if exists tiley2lat;
create temporary function tiley2lat as 'hivemall.geospatial.TileY2LatUDF';
drop temporary function if exists haversine_distance;
create temporary function haversine_distance as 'hivemall.geospatial.HaversineDistanceUDF';
----------------------------
-- Smile related features --
----------------------------
drop temporary function if exists train_randomforest_classifier;
create temporary function train_randomforest_classifier as 'hivemall.smile.classification.RandomForestClassifierUDTF';
drop temporary function if exists train_randomforest_regressor;
create temporary function train_randomforest_regressor as 'hivemall.smile.regression.RandomForestRegressionUDTF';
drop temporary function if exists train_randomforest_regr;
create temporary function train_randomforest_regr as 'hivemall.smile.regression.RandomForestRegressionUDTF';
drop temporary function if exists tree_predict;
create temporary function tree_predict as 'hivemall.smile.tools.TreePredictUDF';
-- for backward compatibility
drop temporary function if exists tree_predict_v1;
create temporary function tree_predict_v1 as 'hivemall.smile.tools.TreePredictUDFv1';
drop temporary function if exists tree_export;
create temporary function tree_export as 'hivemall.smile.tools.TreeExportUDF';
drop temporary function if exists rf_ensemble;
create temporary function rf_ensemble as 'hivemall.smile.tools.RandomForestEnsembleUDAF';
drop temporary function if exists guess_attribute_types;
create temporary function guess_attribute_types as 'hivemall.smile.tools.GuessAttributesUDF';
drop temporary function if exists decision_path;
create temporary function decision_path as 'hivemall.smile.tools.DecisionPathUDF';
--------------------
-- Recommendation --
--------------------
drop temporary function if exists train_slim;
create temporary function train_slim as 'hivemall.recommend.SlimUDTF';
-----------------
-- Data Sketch --
-----------------
drop temporary function if exists approx_count_distinct;
create temporary function approx_count_distinct as 'hivemall.sketch.hll.ApproxCountDistinctUDAF';
------------------
-- Bloom Filter --
------------------
drop temporary function if exists bloom;
create temporary function bloom as 'hivemall.sketch.bloom.BloomFilterUDAF';
drop temporary function if exists bloom_and;
create temporary function bloom_and as 'hivemall.sketch.bloom.BloomAndUDF';
drop temporary function if exists bloom_contains;
create temporary function bloom_contains as 'hivemall.sketch.bloom.BloomContainsUDF';
drop temporary function if exists bloom_not;
create temporary function bloom_not as 'hivemall.sketch.bloom.BloomNotUDF';
drop temporary function if exists bloom_or;
create temporary function bloom_or as 'hivemall.sketch.bloom.BloomOrUDF';
drop temporary function if exists bloom_contains_any;
create temporary function bloom_contains_any as 'hivemall.sketch.bloom.BloomContainsAnyUDF';
--------------------------------------------------------------------------------------------------
-- macros available from hive 0.12.0
-- see https://issues.apache.org/jira/browse/HIVE-2655
--------------------
-- General Macros --
--------------------
create temporary macro java_min(x DOUBLE, y DOUBLE)
reflect("java.lang.Math", "min", x, y);
create temporary macro max2(x DOUBLE, y DOUBLE)
if(x>y,x,y);
create temporary macro min2(x DOUBLE, y DOUBLE)
if(x<y,x,y);
create temporary macro rand_gid(k INT)
floor(rand()*k);
create temporary macro rand_gid2(k INT, seed INT)
floor(rand(seed)*k);
--------------------------
-- Statistics functions --
--------------------------
create temporary macro idf(df_t DOUBLE, n_docs DOUBLE)
log(10, n_docs / max2(1,df_t)) + 1.0;
create temporary macro tfidf(tf FLOAT, df_t DOUBLE, n_docs DOUBLE)
tf * (log(10, n_docs / max2(1,df_t)) + 1.0);