blob: d8946bfe53e61701a20e9020de1e0f7aa7426b88 [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="" >
<head>
<meta charset="UTF-8">
<meta content="text/html; charset=utf-8" http-equiv="Content-Type">
<title>Input Format ยท Hivemall User Manual</title>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="description" content="">
<meta name="generator" content="GitBook 3.2.3">
<link rel="stylesheet" href="../gitbook/style.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-splitter/splitter.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-etoc/plugin.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-callouts/plugin.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-toggle-chapters/toggle.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-codeblock-filename/block.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-expandable-chapters/expandable-chapters.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-multipart/multipart.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-katex/katex.min.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-emphasize/plugin.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-highlight/website.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-search/search.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-fontsettings/website.css">
<link rel="stylesheet" href="../gitbook/gitbook-plugin-theme-api/theme-api.css">
<meta name="HandheldFriendly" content="true"/>
<meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="apple-mobile-web-app-status-bar-style" content="black">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
<link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
<link rel="next" href="../misc/funcs.html" />
<link rel="prev" href="permanent-functions.html" />
</head>
<body>
<div class="book">
<div class="book-summary">
<div id="book-search-input" role="search">
<input type="text" placeholder="Type to search" />
</div>
<nav role="navigation">
<ul class="summary">
<li>
<a href="https://hivemall.incubator.apache.org/" target="_blank" class="custom-link"><i class="fa fa-home"></i> Home</a>
</li>
<li class="divider"></li>
<li class="header">TABLE OF CONTENTS</li>
<li class="chapter " data-level="1.1" data-path="../">
<a href="../">
<b>1.1.</b>
Introduction
</a>
</li>
<li class="chapter " data-level="1.2" data-path="./">
<a href="./">
<b>1.2.</b>
Getting Started
</a>
<ul class="articles">
<li class="chapter " data-level="1.2.1" data-path="installation.html">
<a href="installation.html">
<b>1.2.1.</b>
Installation
</a>
</li>
<li class="chapter " data-level="1.2.2" data-path="permanent-functions.html">
<a href="permanent-functions.html">
<b>1.2.2.</b>
Install as permanent functions
</a>
</li>
<li class="chapter active" data-level="1.2.3" data-path="input-format.html">
<a href="input-format.html">
<b>1.2.3.</b>
Input Format
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.3" data-path="../misc/funcs.html">
<a href="../misc/funcs.html">
<b>1.3.</b>
List of Functions
</a>
</li>
<li class="chapter " data-level="1.4" data-path="../tips/">
<a href="../tips/">
<b>1.4.</b>
Tips for Effective Hivemall
</a>
<ul class="articles">
<li class="chapter " data-level="1.4.1" data-path="../tips/addbias.html">
<a href="../tips/addbias.html">
<b>1.4.1.</b>
Explicit add_bias() for better prediction
</a>
</li>
<li class="chapter " data-level="1.4.2" data-path="../tips/rand_amplify.html">
<a href="../tips/rand_amplify.html">
<b>1.4.2.</b>
Use rand_amplify() to better prediction results
</a>
</li>
<li class="chapter " data-level="1.4.3" data-path="../tips/rt_prediction.html">
<a href="../tips/rt_prediction.html">
<b>1.4.3.</b>
Real-time prediction on RDBMS
</a>
</li>
<li class="chapter " data-level="1.4.4" data-path="../tips/ensemble_learning.html">
<a href="../tips/ensemble_learning.html">
<b>1.4.4.</b>
Ensemble learning for stable prediction
</a>
</li>
<li class="chapter " data-level="1.4.5" data-path="../tips/mixserver.html">
<a href="../tips/mixserver.html">
<b>1.4.5.</b>
Mixing models for a better prediction convergence (MIX server)
</a>
</li>
<li class="chapter " data-level="1.4.6" data-path="../tips/emr.html">
<a href="../tips/emr.html">
<b>1.4.6.</b>
Run Hivemall on Amazon Elastic MapReduce
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.5" data-path="../tips/general_tips.html">
<a href="../tips/general_tips.html">
<b>1.5.</b>
General Hive/Hadoop Tips
</a>
<ul class="articles">
<li class="chapter " data-level="1.5.1" data-path="../tips/rowid.html">
<a href="../tips/rowid.html">
<b>1.5.1.</b>
Adding rowid for each row
</a>
</li>
<li class="chapter " data-level="1.5.2" data-path="../tips/hadoop_tuning.html">
<a href="../tips/hadoop_tuning.html">
<b>1.5.2.</b>
Hadoop tuning for Hivemall
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="1.6" data-path="../troubleshooting/">
<a href="../troubleshooting/">
<b>1.6.</b>
Troubleshooting
</a>
<ul class="articles">
<li class="chapter " data-level="1.6.1" data-path="../troubleshooting/oom.html">
<a href="../troubleshooting/oom.html">
<b>1.6.1.</b>
OutOfMemoryError in training
</a>
</li>
<li class="chapter " data-level="1.6.2" data-path="../troubleshooting/mapjoin_task_error.html">
<a href="../troubleshooting/mapjoin_task_error.html">
<b>1.6.2.</b>
SemanticException generate map join task error: Cannot serialize object
</a>
</li>
<li class="chapter " data-level="1.6.3" data-path="../troubleshooting/asterisk.html">
<a href="../troubleshooting/asterisk.html">
<b>1.6.3.</b>
Asterisk argument for UDTF does not work
</a>
</li>
<li class="chapter " data-level="1.6.4" data-path="../troubleshooting/num_mappers.html">
<a href="../troubleshooting/num_mappers.html">
<b>1.6.4.</b>
The number of mappers is less than input splits in Hadoop 2.x
</a>
</li>
<li class="chapter " data-level="1.6.5" data-path="../troubleshooting/mapjoin_classcastex.html">
<a href="../troubleshooting/mapjoin_classcastex.html">
<b>1.6.5.</b>
Map-side join causes ClassCastException on Tez
</a>
</li>
</ul>
</li>
<li class="header">Part II - Generic Features</li>
<li class="chapter " data-level="2.1" data-path="../misc/generic_funcs.html">
<a href="../misc/generic_funcs.html">
<b>2.1.</b>
List of Generic Hivemall Functions
</a>
</li>
<li class="chapter " data-level="2.2" data-path="../misc/topk.html">
<a href="../misc/topk.html">
<b>2.2.</b>
Efficient Top-K Query Processing
</a>
</li>
<li class="chapter " data-level="2.3" data-path="../misc/tokenizer.html">
<a href="../misc/tokenizer.html">
<b>2.3.</b>
Text Tokenizer
</a>
</li>
<li class="chapter " data-level="2.4" data-path="../misc/approx.html">
<a href="../misc/approx.html">
<b>2.4.</b>
Approximate Aggregate Functions
</a>
</li>
<li class="header">Part III - Feature Engineering</li>
<li class="chapter " data-level="3.1" data-path="../ft_engineering/scaling.html">
<a href="../ft_engineering/scaling.html">
<b>3.1.</b>
Feature Scaling
</a>
</li>
<li class="chapter " data-level="3.2" data-path="../ft_engineering/hashing.html">
<a href="../ft_engineering/hashing.html">
<b>3.2.</b>
Feature Hashing
</a>
</li>
<li class="chapter " data-level="3.3" data-path="../ft_engineering/selection.html">
<a href="../ft_engineering/selection.html">
<b>3.3.</b>
Feature Selection
</a>
</li>
<li class="chapter " data-level="3.4" data-path="../ft_engineering/binning.html">
<a href="../ft_engineering/binning.html">
<b>3.4.</b>
Feature Binning
</a>
</li>
<li class="chapter " data-level="3.5" data-path="../ft_engineering/pairing.html">
<a href="../ft_engineering/pairing.html">
<b>3.5.</b>
Feature Paring
</a>
<ul class="articles">
<li class="chapter " data-level="3.5.1" data-path="../ft_engineering/polynomial.html">
<a href="../ft_engineering/polynomial.html">
<b>3.5.1.</b>
Polynomial features
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="3.6" data-path="../ft_engineering/ft_trans.html">
<a href="../ft_engineering/ft_trans.html">
<b>3.6.</b>
Feature Transformation
</a>
<ul class="articles">
<li class="chapter " data-level="3.6.1" data-path="../ft_engineering/vectorization.html">
<a href="../ft_engineering/vectorization.html">
<b>3.6.1.</b>
Feature vectorization
</a>
</li>
<li class="chapter " data-level="3.6.2" data-path="../ft_engineering/quantify.html">
<a href="../ft_engineering/quantify.html">
<b>3.6.2.</b>
Quantify non-number features
</a>
</li>
<li class="chapter " data-level="3.6.3" data-path="../ft_engineering/binarize.html">
<a href="../ft_engineering/binarize.html">
<b>3.6.3.</b>
Binarize label
</a>
</li>
<li class="chapter " data-level="3.6.4" data-path="../ft_engineering/onehot.html">
<a href="../ft_engineering/onehot.html">
<b>3.6.4.</b>
One-hot encoding
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="3.7" data-path="../ft_engineering/term_vector.html">
<a href="../ft_engineering/term_vector.html">
<b>3.7.</b>
Term Vector Model
</a>
<ul class="articles">
<li class="chapter " data-level="3.7.1" data-path="../ft_engineering/tfidf.html">
<a href="../ft_engineering/tfidf.html">
<b>3.7.1.</b>
TF-IDF Term Weighting
</a>
</li>
<li class="chapter " data-level="3.7.2" data-path="../ft_engineering/bm25.html">
<a href="../ft_engineering/bm25.html">
<b>3.7.2.</b>
Okapi BM25 Term Weighting
</a>
</li>
</ul>
</li>
<li class="header">Part IV - Evaluation</li>
<li class="chapter " data-level="4.1" data-path="../eval/binary_classification_measures.html">
<a href="../eval/binary_classification_measures.html">
<b>4.1.</b>
Binary Classification Metrics
</a>
<ul class="articles">
<li class="chapter " data-level="4.1.1" data-path="../eval/auc.html">
<a href="../eval/auc.html">
<b>4.1.1.</b>
Area under the ROC curve
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="4.2" data-path="../eval/multilabel_classification_measures.html">
<a href="../eval/multilabel_classification_measures.html">
<b>4.2.</b>
Multi-label Classification Metrics
</a>
</li>
<li class="chapter " data-level="4.3" data-path="../eval/regression.html">
<a href="../eval/regression.html">
<b>4.3.</b>
Regression Metrics
</a>
</li>
<li class="chapter " data-level="4.4" data-path="../eval/rank.html">
<a href="../eval/rank.html">
<b>4.4.</b>
Ranking Measures
</a>
</li>
<li class="chapter " data-level="4.5" data-path="../eval/datagen.html">
<a href="../eval/datagen.html">
<b>4.5.</b>
Data Generation
</a>
<ul class="articles">
<li class="chapter " data-level="4.5.1" data-path="../eval/lr_datagen.html">
<a href="../eval/lr_datagen.html">
<b>4.5.1.</b>
Logistic Regression data generation
</a>
</li>
</ul>
</li>
<li class="header">Part V - Supervised Learning</li>
<li class="chapter " data-level="5.1" data-path="../supervised_learning/prediction.html">
<a href="../supervised_learning/prediction.html">
<b>5.1.</b>
How Prediction Works
</a>
</li>
<li class="chapter " data-level="5.2" data-path="../supervised_learning/tutorial.html">
<a href="../supervised_learning/tutorial.html">
<b>5.2.</b>
Step-by-Step Tutorial on Supervised Learning
</a>
</li>
<li class="header">Part VI - Binary Classification</li>
<li class="chapter " data-level="6.1" data-path="../binaryclass/general.html">
<a href="../binaryclass/general.html">
<b>6.1.</b>
Binary Classification
</a>
</li>
<li class="chapter " data-level="6.2" data-path="../binaryclass/a9a.html">
<a href="../binaryclass/a9a.html">
<b>6.2.</b>
a9a Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="6.2.1" data-path="../binaryclass/a9a_dataset.html">
<a href="../binaryclass/a9a_dataset.html">
<b>6.2.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="6.2.2" data-path="../binaryclass/a9a_generic.html">
<a href="../binaryclass/a9a_generic.html">
<b>6.2.2.</b>
General Binary Classifier
</a>
</li>
<li class="chapter " data-level="6.2.3" data-path="../binaryclass/a9a_lr.html">
<a href="../binaryclass/a9a_lr.html">
<b>6.2.3.</b>
Logistic Regression
</a>
</li>
<li class="chapter " data-level="6.2.4" data-path="../binaryclass/a9a_minibatch.html">
<a href="../binaryclass/a9a_minibatch.html">
<b>6.2.4.</b>
Mini-batch Gradient Descent
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="6.3" data-path="../binaryclass/news20.html">
<a href="../binaryclass/news20.html">
<b>6.3.</b>
News20 Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="6.3.1" data-path="../binaryclass/news20_dataset.html">
<a href="../binaryclass/news20_dataset.html">
<b>6.3.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="6.3.2" data-path="../binaryclass/news20_pa.html">
<a href="../binaryclass/news20_pa.html">
<b>6.3.2.</b>
Perceptron, Passive Aggressive
</a>
</li>
<li class="chapter " data-level="6.3.3" data-path="../binaryclass/news20_scw.html">
<a href="../binaryclass/news20_scw.html">
<b>6.3.3.</b>
CW, AROW, SCW
</a>
</li>
<li class="chapter " data-level="6.3.4" data-path="../binaryclass/news20_generic.html">
<a href="../binaryclass/news20_generic.html">
<b>6.3.4.</b>
General Binary Classifier
</a>
</li>
<li class="chapter " data-level="6.3.5" data-path="../binaryclass/news20_generic_bagging.html">
<a href="../binaryclass/news20_generic_bagging.html">
<b>6.3.5.</b>
Baggnig classiers
</a>
</li>
<li class="chapter " data-level="6.3.6" data-path="../binaryclass/news20_adagrad.html">
<a href="../binaryclass/news20_adagrad.html">
<b>6.3.6.</b>
AdaGradRDA, AdaGrad, AdaDelta
</a>
</li>
<li class="chapter " data-level="6.3.7" data-path="../binaryclass/news20_rf.html">
<a href="../binaryclass/news20_rf.html">
<b>6.3.7.</b>
Random Forest
</a>
</li>
<li class="chapter " data-level="6.3.8" data-path="../binaryclass/news20b_xgboost.html">
<a href="../binaryclass/news20b_xgboost.html">
<b>6.3.8.</b>
XGBoost
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="6.4" data-path="../binaryclass/kdd2010a.html">
<a href="../binaryclass/kdd2010a.html">
<b>6.4.</b>
KDD2010a Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="6.4.1" data-path="../binaryclass/kdd2010a_dataset.html">
<a href="../binaryclass/kdd2010a_dataset.html">
<b>6.4.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="6.4.2" data-path="../binaryclass/kdd2010a_scw.html">
<a href="../binaryclass/kdd2010a_scw.html">
<b>6.4.2.</b>
PA, CW, AROW, SCW
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="6.5" data-path="../binaryclass/kdd2010b.html">
<a href="../binaryclass/kdd2010b.html">
<b>6.5.</b>
KDD2010b Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="6.5.1" data-path="../binaryclass/kdd2010b_dataset.html">
<a href="../binaryclass/kdd2010b_dataset.html">
<b>6.5.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="6.5.2" data-path="../binaryclass/kdd2010b_arow.html">
<a href="../binaryclass/kdd2010b_arow.html">
<b>6.5.2.</b>
AROW
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="6.6" data-path="../binaryclass/webspam.html">
<a href="../binaryclass/webspam.html">
<b>6.6.</b>
Webspam Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="6.6.1" data-path="../binaryclass/webspam_dataset.html">
<a href="../binaryclass/webspam_dataset.html">
<b>6.6.1.</b>
Data Pareparation
</a>
</li>
<li class="chapter " data-level="6.6.2" data-path="../binaryclass/webspam_scw.html">
<a href="../binaryclass/webspam_scw.html">
<b>6.6.2.</b>
PA1, AROW, SCW
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="6.7" data-path="../binaryclass/titanic_rf.html">
<a href="../binaryclass/titanic_rf.html">
<b>6.7.</b>
Kaggle Titanic Tutorial
</a>
</li>
<li class="chapter " data-level="6.8" data-path="../binaryclass/criteo.html">
<a href="../binaryclass/criteo.html">
<b>6.8.</b>
Criteo Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="6.8.1" data-path="../binaryclass/criteo_dataset.html">
<a href="../binaryclass/criteo_dataset.html">
<b>6.8.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="6.8.2" data-path="../binaryclass/criteo_ffm.html">
<a href="../binaryclass/criteo_ffm.html">
<b>6.8.2.</b>
Field-Aware Factorization Machines
</a>
</li>
</ul>
</li>
<li class="header">Part VII - Multiclass Classification</li>
<li class="chapter " data-level="7.1" data-path="../multiclass/news20.html">
<a href="../multiclass/news20.html">
<b>7.1.</b>
News20 Multiclass Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="7.1.1" data-path="../multiclass/news20_dataset.html">
<a href="../multiclass/news20_dataset.html">
<b>7.1.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="7.1.2" data-path="../multiclass/news20_one-vs-the-rest_dataset.html">
<a href="../multiclass/news20_one-vs-the-rest_dataset.html">
<b>7.1.2.</b>
Data Preparation for one-vs-the-rest classifiers
</a>
</li>
<li class="chapter " data-level="7.1.3" data-path="../multiclass/news20_pa.html">
<a href="../multiclass/news20_pa.html">
<b>7.1.3.</b>
PA
</a>
</li>
<li class="chapter " data-level="7.1.4" data-path="../multiclass/news20_scw.html">
<a href="../multiclass/news20_scw.html">
<b>7.1.4.</b>
CW, AROW, SCW
</a>
</li>
<li class="chapter " data-level="7.1.5" data-path="../multiclass/news20_xgboost.html">
<a href="../multiclass/news20_xgboost.html">
<b>7.1.5.</b>
XGBoost
</a>
</li>
<li class="chapter " data-level="7.1.6" data-path="../multiclass/news20_ensemble.html">
<a href="../multiclass/news20_ensemble.html">
<b>7.1.6.</b>
Ensemble learning
</a>
</li>
<li class="chapter " data-level="7.1.7" data-path="../multiclass/news20_one-vs-the-rest.html">
<a href="../multiclass/news20_one-vs-the-rest.html">
<b>7.1.7.</b>
one-vs-the-rest Classifier
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="7.2" data-path="../multiclass/iris.html">
<a href="../multiclass/iris.html">
<b>7.2.</b>
Iris Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="7.2.1" data-path="../multiclass/iris_dataset.html">
<a href="../multiclass/iris_dataset.html">
<b>7.2.1.</b>
Data preparation
</a>
</li>
<li class="chapter " data-level="7.2.2" data-path="../multiclass/iris_scw.html">
<a href="../multiclass/iris_scw.html">
<b>7.2.2.</b>
SCW
</a>
</li>
<li class="chapter " data-level="7.2.3" data-path="../multiclass/iris_randomforest.html">
<a href="../multiclass/iris_randomforest.html">
<b>7.2.3.</b>
Random Forest
</a>
</li>
<li class="chapter " data-level="7.2.4" data-path="../multiclass/iris_xgboost.html">
<a href="../multiclass/iris_xgboost.html">
<b>7.2.4.</b>
XGBoost
</a>
</li>
</ul>
</li>
<li class="header">Part VIII - Regression</li>
<li class="chapter " data-level="8.1" data-path="../regression/general.html">
<a href="../regression/general.html">
<b>8.1.</b>
Regression
</a>
</li>
<li class="chapter " data-level="8.2" data-path="../regression/e2006.html">
<a href="../regression/e2006.html">
<b>8.2.</b>
E2006-tfidf Regression Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="8.2.1" data-path="../regression/e2006_dataset.html">
<a href="../regression/e2006_dataset.html">
<b>8.2.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="8.2.2" data-path="../regression/e2006_generic.html">
<a href="../regression/e2006_generic.html">
<b>8.2.2.</b>
General Regessor
</a>
</li>
<li class="chapter " data-level="8.2.3" data-path="../regression/e2006_arow.html">
<a href="../regression/e2006_arow.html">
<b>8.2.3.</b>
Passive Aggressive, AROW
</a>
</li>
<li class="chapter " data-level="8.2.4" data-path="../regression/e2006_xgboost.html">
<a href="../regression/e2006_xgboost.html">
<b>8.2.4.</b>
XGBoost
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="8.3" data-path="../regression/kddcup12tr2.html">
<a href="../regression/kddcup12tr2.html">
<b>8.3.</b>
KDDCup 2012 Track 2 CTR Prediction Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="8.3.1" data-path="../regression/kddcup12tr2_dataset.html">
<a href="../regression/kddcup12tr2_dataset.html">
<b>8.3.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="8.3.2" data-path="../regression/kddcup12tr2_lr.html">
<a href="../regression/kddcup12tr2_lr.html">
<b>8.3.2.</b>
Logistic Regression, Passive Aggressive
</a>
</li>
<li class="chapter " data-level="8.3.3" data-path="../regression/kddcup12tr2_lr_amplify.html">
<a href="../regression/kddcup12tr2_lr_amplify.html">
<b>8.3.3.</b>
Logistic Regression with amplifier
</a>
</li>
<li class="chapter " data-level="8.3.4" data-path="../regression/kddcup12tr2_adagrad.html">
<a href="../regression/kddcup12tr2_adagrad.html">
<b>8.3.4.</b>
AdaGrad, AdaDelta
</a>
</li>
</ul>
</li>
<li class="header">Part IX - Recommendation</li>
<li class="chapter " data-level="9.1" data-path="../recommend/cf.html">
<a href="../recommend/cf.html">
<b>9.1.</b>
Collaborative Filtering
</a>
<ul class="articles">
<li class="chapter " data-level="9.1.1" data-path="../recommend/item_based_cf.html">
<a href="../recommend/item_based_cf.html">
<b>9.1.1.</b>
Item-based Collaborative Filtering
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="9.2" data-path="../recommend/news20.html">
<a href="../recommend/news20.html">
<b>9.2.</b>
News20 Related Article Recommendation Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="9.2.1" data-path="../multiclass/news20_dataset.html">
<a href="../multiclass/news20_dataset.html">
<b>9.2.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="9.2.2" data-path="../recommend/news20_jaccard.html">
<a href="../recommend/news20_jaccard.html">
<b>9.2.2.</b>
LSH/MinHash and Jaccard Similarity
</a>
</li>
<li class="chapter " data-level="9.2.3" data-path="../recommend/news20_knn.html">
<a href="../recommend/news20_knn.html">
<b>9.2.3.</b>
LSH/MinHash and Brute-force Search
</a>
</li>
<li class="chapter " data-level="9.2.4" data-path="../recommend/news20_bbit_minhash.html">
<a href="../recommend/news20_bbit_minhash.html">
<b>9.2.4.</b>
kNN search using b-Bits MinHash
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="9.3" data-path="../recommend/movielens.html">
<a href="../recommend/movielens.html">
<b>9.3.</b>
MovieLens Movie Recommendation Tutorial
</a>
<ul class="articles">
<li class="chapter " data-level="9.3.1" data-path="../recommend/movielens_dataset.html">
<a href="../recommend/movielens_dataset.html">
<b>9.3.1.</b>
Data Preparation
</a>
</li>
<li class="chapter " data-level="9.3.2" data-path="../recommend/movielens_cf.html">
<a href="../recommend/movielens_cf.html">
<b>9.3.2.</b>
Item-based Collaborative Filtering
</a>
</li>
<li class="chapter " data-level="9.3.3" data-path="../recommend/movielens_mf.html">
<a href="../recommend/movielens_mf.html">
<b>9.3.3.</b>
Matrix Factorization
</a>
</li>
<li class="chapter " data-level="9.3.4" data-path="../recommend/movielens_fm.html">
<a href="../recommend/movielens_fm.html">
<b>9.3.4.</b>
Factorization Machine
</a>
</li>
<li class="chapter " data-level="9.3.5" data-path="../recommend/movielens_slim.html">
<a href="../recommend/movielens_slim.html">
<b>9.3.5.</b>
SLIM for fast top-k Recommendation
</a>
</li>
<li class="chapter " data-level="9.3.6" data-path="../recommend/movielens_cv.html">
<a href="../recommend/movielens_cv.html">
<b>9.3.6.</b>
10-fold Cross Validation (Matrix Factorization)
</a>
</li>
</ul>
</li>
<li class="header">Part X - Anomaly Detection</li>
<li class="chapter " data-level="10.1" data-path="../anomaly/lof.html">
<a href="../anomaly/lof.html">
<b>10.1.</b>
Outlier Detection using Local Outlier Factor (LOF)
</a>
</li>
<li class="chapter " data-level="10.2" data-path="../anomaly/sst.html">
<a href="../anomaly/sst.html">
<b>10.2.</b>
Change-Point Detection using Singular Spectrum Transformation (SST)
</a>
</li>
<li class="chapter " data-level="10.3" data-path="../anomaly/changefinder.html">
<a href="../anomaly/changefinder.html">
<b>10.3.</b>
ChangeFinder: Detecting Outlier and Change-Point Simultaneously
</a>
</li>
<li class="header">Part XI - Clustering</li>
<li class="chapter " data-level="11.1" data-path="../clustering/lda.html">
<a href="../clustering/lda.html">
<b>11.1.</b>
Latent Dirichlet Allocation
</a>
</li>
<li class="chapter " data-level="11.2" data-path="../clustering/plsa.html">
<a href="../clustering/plsa.html">
<b>11.2.</b>
Probabilistic Latent Semantic Analysis
</a>
</li>
<li class="header">Part XII - GeoSpatial Functions</li>
<li class="chapter " data-level="12.1" data-path="../geospatial/latlon.html">
<a href="../geospatial/latlon.html">
<b>12.1.</b>
Lat/Lon functions
</a>
</li>
<li class="header">Part XIII - Hivemall on SparkSQL</li>
<li class="chapter " data-level="13.1" data-path="../spark/getting_started/README.md">
<span>
<b>13.1.</b>
Getting Started
</a>
<ul class="articles">
<li class="chapter " data-level="13.1.1" data-path="../spark/getting_started/installation.html">
<a href="../spark/getting_started/installation.html">
<b>13.1.1.</b>
Installation
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="13.2" data-path="../spark/binaryclass/">
<a href="../spark/binaryclass/">
<b>13.2.</b>
Binary Classification
</a>
<ul class="articles">
<li class="chapter " data-level="13.2.1" data-path="../spark/binaryclass/a9a_sql.html">
<a href="../spark/binaryclass/a9a_sql.html">
<b>13.2.1.</b>
a9a Tutorial for SQL
</a>
</li>
</ul>
</li>
<li class="chapter " data-level="13.3" data-path="../spark/binaryclass/">
<a href="../spark/binaryclass/">
<b>13.3.</b>
Regression
</a>
<ul class="articles">
<li class="chapter " data-level="13.3.1" data-path="../spark/regression/e2006_sql.html">
<a href="../spark/regression/e2006_sql.html">
<b>13.3.1.</b>
E2006-tfidf Regression Tutorial for SQL
</a>
</li>
</ul>
</li>
<li class="header">Part XIV - Hivemall on Docker</li>
<li class="chapter " data-level="14.1" data-path="../docker/getting_started.html">
<a href="../docker/getting_started.html">
<b>14.1.</b>
Getting Started
</a>
</li>
<li class="header">Part XIV - External References</li>
<li class="chapter " data-level="15.1" >
<a target="_blank" href="https://github.com/daijyc/hivemall/wiki/PigHome">
<b>15.1.</b>
Hivemall on Apache Pig
</a>
</li>
<li class="divider"></li>
<li>
<a href="https://www.gitbook.com" target="blank" class="gitbook-link">
Published with GitBook
</a>
</li>
</ul>
</nav>
</div>
<div class="book-body">
<div class="body-inner">
<div class="book-header" role="navigation">
<!-- Title -->
<h1>
<i class="fa fa-circle-o-notch fa-spin"></i>
<a href=".." >Input Format</a>
</h1>
</div>
<div class="page-wrapper" tabindex="-1" role="main">
<div class="page-inner">
<div id="book-search-results">
<div class="search-noresults">
<section class="normal markdown-section">
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<p>This page explains the input format of training data in Hivemall.
Here, we use <a href="https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form" target="_blank">EBNF</a>-like notation for describing the format.</p>
<!-- toc --><div id="toc" class="toc">
<ul>
<li><a href="#input-format-for-classification">Input Format for Classification</a></li>
<li><a href="#features-format-for-classification-and-regression">Features format (for classification and regression)</a><ul>
<li><a href="#quantitative-and-categorical-variables">Quantitative and Categorical variables</a></li>
<li><a href="#biasdummy-variable-in-features">Bias/Dummy Variable in features</a></li>
<li><a href="#feature-hashing">Feature hashing</a></li>
<li><a href="#feature-normalization">Feature Normalization</a></li>
</ul>
</li>
<li><a href="#label-format-in-binary-classification">Label format in Binary Classification</a></li>
<li><a href="#label-format-in-multi-class-classification">Label format in Multi-class Classification</a></li>
<li><a href="#input-format-in-regression">Input format in Regression</a><ul>
<li><a href="#target-in-logistic-regression">Target in Logistic Regression</a></li>
</ul>
</li>
<li><a href="#helper-functions">Helper functions</a><ul>
<li><a href="#quantitative-features">Quantitative Features</a></li>
<li><a href="#categorical-features">Categorical Features</a></li>
<li><a href="#preparing-training-data-table">Preparing training data table</a></li>
</ul>
</li>
</ul>
</div><!-- tocstop -->
<h1 id="input-format-for-classification">Input Format for Classification</h1>
<p>The classifiers of Hivemall takes 2 (or 3) arguments: <em>features</em>, <em>label</em>, and <em>options</em> (a.k.a. <a href="https://en.wikipedia.org/wiki/Hyperparameter" target="_blank">hyperparameters</a>). The first two arguments of training functions represents training examples. </p>
<p>In Statistics, <em>features</em> and <em>label</em> are called <a href="http://www.oswego.edu/~srp/stats/variable_types.htm" target="_blank">Explanatory variable and Response Variable</a>, respectively.</p>
<h1 id="features-format-for-classification-and-regression">Features format (for classification and regression)</h1>
<p>The format of <em>features</em> is common between (binary and multi-class) classification and regression.
Hivemall accepts <code>ARRAY&lt;INT|BIGINT|TEXT&gt;</code> for the type of <em>features</em> column.</p>
<p>Hivemall uses a <em>sparse</em> data format (cf. <a href="https://netlib.org/linalg/html_templates/node91.html" target="_blank">Compressed Row Storage</a>) which is similar to <a href="https://stackoverflow.com/questions/12112558/read-write-data-in-libsvm-format" target="_blank">LIBSVM</a> and <a href="https://github.com/JohnLangford/vowpal_wabbit/wiki/Input-format" target="_blank">Vowpal Wabbit</a>.</p>
<p>The format of each feature in an array is as follows:</p>
<pre><code>feature ::= &lt;index&gt;:&lt;weight&gt; or &lt;index&gt;
</code></pre><p>Each element of <em>index</em> or <em>weight</em> then accepts the following format:</p>
<pre><code>index ::= &lt;INT | BIGINT | TEXT&gt;
weight ::= &lt;FLOAT&gt;
</code></pre><p>The <em>index</em> are usually a number (INT or BIGINT) starting from 1.
Here is an instance of a features.</p>
<pre><code>10:3.4 123:0.5 34567:0.231
</code></pre><p><em>Note:</em> As mentioned later, <em>index</em> &quot;0&quot; is reserved for a <a href="../tips/addbias.html">Bias/Dummy variable</a>.</p>
<p>In addition to numbers, you can use a TEXT value for an index. For example, you can use array(&quot;height:1.5&quot;, &quot;length:2.0&quot;) for the features.</p>
<pre><code>&quot;height:1.5&quot; &quot;length:2.0&quot;
</code></pre><h2 id="quantitative-and-categorical-variables">Quantitative and Categorical variables</h2>
<p>A <a href="http://www.oswego.edu/~srp/stats/variable_types.htm" target="_blank">quantitative variable</a> must have an <em>index</em> entry.</p>
<p>Hivemall (v0.3.1 or later) provides <em>add_feature_index</em> function which is useful for adding indexes to quantitative variables. </p>
<pre><code class="lang-sql"><span class="hljs-keyword">select</span> add_feature_index(<span class="hljs-built_in">array</span>(<span class="hljs-number">3</span>,<span class="hljs-number">4.0</span>,<span class="hljs-number">5</span>)) <span class="hljs-keyword">from</span> dual;
</code></pre>
<blockquote>
<p>[&quot;1:3.0&quot;,&quot;2:4.0&quot;,&quot;3:5.0&quot;]</p>
</blockquote>
<p>You can omit specifying <em>weight</em> for each feature e.g. for <a href="http://www.oswego.edu/~srp/stats/variable_types.htm" target="_blank">Categorical variables</a> as follows:</p>
<pre><code>feature ::= &lt;index&gt;
</code></pre><p>Note 1.0 is used for the weight when omitting <em>weight</em>. </p>
<h2 id="biasdummy-variable-in-features">Bias/Dummy Variable in features</h2>
<p>Note that &quot;0&quot; is reserved for a Bias variable (called dummy variable in Statistics). </p>
<p>The <a href="../tips/addbias.html">add_bias</a> function is Hivemall appends &quot;0:1.0&quot; as an element of array in <em>features</em>.</p>
<h2 id="feature-hashing">Feature hashing</h2>
<p>Hivemall supports <a href="https://en.wikipedia.org/wiki/Feature_hashing" target="_blank">feature hashing/hashing trick</a> through <a href="../ft_engineering/hashing.html#mhash-function">mhash function</a>.</p>
<p>The mhash function takes a feature (i.e., <em>index</em>) of TEXT format and generates a hash number of a range from 1 to 2^24 (=16777216) by the default setting.</p>
<p>Feature hashing is useful where the dimension of feature vector (i.e., the number of elements in <em>features</em>) is so large. Consider applying <a href="../ft_engineering/hashing.html#mhash-function">mhash function</a>) when a prediction model does not fit in memory and OutOfMemory exception happens.</p>
<p>In general, you don&apos;t need to use mhash when the dimension of feature vector is less than 16777216.
If feature <em>index</em> is very long TEXT (e.g., &quot;xxxxxxx-yyyyyy-weight:55.3&quot;) and uses huge memory spaces, consider using mhash as follows:</p>
<pre><code class="lang-sql"><span class="hljs-comment">-- feature is v0.3.2 or before</span>
concat(mhash(extract_feature(&quot;xxxxxxx-yyyyyy-weight:55.3&quot;)), &quot;:&quot;, extract_weight(&quot;xxxxxxx-yyyyyy-weight:55.3&quot;))
<span class="hljs-comment">-- feature is v0.3.2-1 or later</span>
feature(mhash(extract_feature(&quot;xxxxxxx-yyyyyy-weight:55.3&quot;)), extract_weight(&quot;xxxxxxx-yyyyyy-weight:55.3&quot;))
</code></pre>
<blockquote>
<p>43352:55.3</p>
</blockquote>
<h2 id="feature-normalization">Feature Normalization</h2>
<p>Feature (weight) normalization is important in machine learning. Please refer <a href="../ft_engineering/scaling.html">this article</a> for detail.</p>
<hr>
<h1 id="label-format-in-binary-classification">Label format in Binary Classification</h1>
<p>The <em>label</em> must be an <em>INT</em> typed column and the values are positive (+1) or negative (-1) as follows:</p>
<pre><code>&lt;label&gt; ::= 1 | -1
</code></pre><p>Alternatively, you can use the following format that represents 1 for a positive example and 0 for a negative example: </p>
<pre><code>&lt;label&gt; ::= 0 | 1
</code></pre><h1 id="label-format-in-multi-class-classification">Label format in Multi-class Classification</h1>
<p>You can used any PRIMITIVE type in the multi-class <em>label</em>. </p>
<pre><code>&lt;label&gt; ::= &lt;primitive type&gt;
</code></pre><p>Typically, the type of label column will be INT, BIGINT, or TEXT.</p>
<hr>
<h1 id="input-format-in-regression">Input format in Regression</h1>
<p>In regression, response/predictor variable (we denote it as <em>target</em>) is a real number.</p>
<p>Before Hivemall v0.3, we accepts only FLOAT type for <em>target</em>.</p>
<pre><code>&lt;target&gt; ::= &lt;FLOAT&gt;
</code></pre><p>You need to explicitly cast a double value of <em>target</em> to float as follows:</p>
<pre><code class="lang-sql">CAST(target as FLOAT)
</code></pre>
<p>On the other hand, Hivemall v0.3 or later accepts double compatible numbers in <em>target</em>.</p>
<pre><code>&lt;target&gt; ::= &lt;FLOAT | DOUBLE | INT | TINYINT | SMALLINT| BIGINT &gt;
</code></pre><h2 id="target-in-logistic-regression">Target in Logistic Regression</h2>
<p>Logistic regression is actually a binary classification scheme while it can produce probabilities of positive of a training example. </p>
<p>A <em>target</em> value of a training input must be in range 0.0 to 1.0, specifically 0.0 or 1.0.</p>
<hr>
<h1 id="helper-functions">Helper functions</h1>
<pre><code class="lang-sql"><span class="hljs-comment">-- hivemall v0.3.2 and before</span>
<span class="hljs-keyword">select</span> <span class="hljs-keyword">concat</span>(<span class="hljs-string">&quot;weight&quot;</span>,<span class="hljs-string">&quot;:&quot;</span>,<span class="hljs-number">55.0</span>);
<span class="hljs-comment">-- hivemall v0.3.2-1 and later</span>
<span class="hljs-keyword">select</span> feature(<span class="hljs-string">&quot;weight&quot;</span>, <span class="hljs-number">55.0</span>);
</code></pre>
<blockquote>
<p>weight:55.0</p>
</blockquote>
<pre><code class="lang-sql"><span class="hljs-keyword">select</span> extract_feature(<span class="hljs-string">&quot;weight:55.0&quot;</span>), extract_weight(<span class="hljs-string">&quot;weight:55.0&quot;</span>);
</code></pre>
<blockquote>
<p>weight | 55.0</p>
</blockquote>
<pre><code class="lang-sql"><span class="hljs-comment">-- hivemall v0.4.0 and later</span>
<span class="hljs-keyword">select</span> feature_index(<span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;10:0.2&quot;</span>,<span class="hljs-string">&quot;7:0.3&quot;</span>,<span class="hljs-string">&quot;9&quot;</span>));
</code></pre>
<blockquote>
<p>[10,7,9]</p>
</blockquote>
<pre><code class="lang-sql"><span class="hljs-keyword">select</span>
convert_label(<span class="hljs-number">-1</span>), convert_label(<span class="hljs-number">1</span>), convert_label(<span class="hljs-number">0.0</span>f), convert_label(<span class="hljs-number">1.0</span>f)
<span class="hljs-keyword">from</span>
dual;
</code></pre>
<blockquote>
<p>0.0f | 1.0f | -1 | 1</p>
</blockquote>
<h2 id="quantitative-features">Quantitative Features</h2>
<p><code>array&lt;string&gt; quantitative_features(array&lt;string&gt; featureNames, feature1, feature2, .. [, const string options])</code> is a helper function to create sparse quantitative features from a table.</p>
<pre><code class="lang-sql"><span class="hljs-keyword">select</span> quantitative_features(
<span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;apple&quot;</span>,<span class="hljs-string">&quot;height&quot;</span>,<span class="hljs-string">&quot;weight&quot;</span>),
<span class="hljs-number">1</span>,<span class="hljs-number">180.3</span>,<span class="hljs-number">70.2</span>
<span class="hljs-comment">-- ,&quot;-emit_null&quot;</span>
);
</code></pre>
<blockquote>
<p>[&quot;apple:1.0&quot;,&quot;height:180.3&quot;,&quot;weight:70.2&quot;]</p>
</blockquote>
<pre><code class="lang-sql"><span class="hljs-keyword">select</span> quantitative_features(
<span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;apple&quot;</span>,<span class="hljs-string">&quot;height&quot;</span>,<span class="hljs-string">&quot;weight&quot;</span>),
<span class="hljs-number">1</span>,<span class="hljs-keyword">cast</span>(<span class="hljs-literal">null</span> <span class="hljs-keyword">as</span> <span class="hljs-keyword">double</span>),<span class="hljs-number">70.2</span>
,<span class="hljs-string">&quot;-emit_null&quot;</span>
);
</code></pre>
<blockquote>
<p>[&quot;apple:1.0&quot;,null,&quot;weight:70.2&quot;]</p>
</blockquote>
<h2 id="categorical-features">Categorical Features</h2>
<p><code>array&lt;string&gt; categorical_features(array&lt;string&gt; featureNames, feature1, feature2, .. [, const string options])</code> is a helper function to create sparse categorical features from a table.</p>
<pre><code class="lang-sql"><span class="hljs-keyword">select</span> categorical_features(
<span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;is_cat&quot;</span>,<span class="hljs-string">&quot;is_dog&quot;</span>,<span class="hljs-string">&quot;is_lion&quot;</span>,<span class="hljs-string">&quot;is_pengin&quot;</span>,<span class="hljs-string">&quot;species&quot;</span>),
<span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1.0</span>, <span class="hljs-literal">true</span>, <span class="hljs-string">&quot;dog&quot;</span>
<span class="hljs-comment">-- ,&quot;-emit_null&quot;</span>
);
</code></pre>
<blockquote>
<p>[&quot;is_cat#1&quot;,&quot;is_dog#0&quot;,&quot;is_lion#1.0&quot;,&quot;is_pengin#true&quot;,&quot;species#dog&quot;]</p>
</blockquote>
<pre><code class="lang-sql"><span class="hljs-keyword">select</span> categorical_features(
<span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;is_cat&quot;</span>,<span class="hljs-string">&quot;is_dog&quot;</span>,<span class="hljs-string">&quot;is_lion&quot;</span>,<span class="hljs-string">&quot;is_pengin&quot;</span>,<span class="hljs-string">&quot;species&quot;</span>),
<span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1.0</span>, <span class="hljs-literal">true</span>, <span class="hljs-literal">null</span>
,<span class="hljs-string">&quot;-emit_null&quot;</span>
);
</code></pre>
<blockquote>
<p>[&quot;is_cat#1&quot;,&quot;is_dog#0&quot;,&quot;is_lion#1.0&quot;,&quot;is_pengin#true&quot;,null]</p>
</blockquote>
<h2 id="preparing-training-data-table">Preparing training data table</h2>
<p>You can create a training data table as follows:</p>
<pre><code class="lang-sql"><span class="hljs-keyword">select</span>
<span class="hljs-keyword">rowid</span>() <span class="hljs-keyword">as</span> <span class="hljs-keyword">rowid</span>,
concat_array(
<span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;bias:1.0&quot;</span>),
categorical_features(
<span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;id&quot;</span>, <span class="hljs-string">&quot;name&quot;</span>),
<span class="hljs-keyword">id</span>, <span class="hljs-keyword">name</span>
),
quantitative_features(
<span class="hljs-built_in">array</span>(<span class="hljs-string">&quot;height&quot;</span>, <span class="hljs-string">&quot;weight&quot;</span>),
height, weight
)
) <span class="hljs-keyword">as</span> features,
click_or_not <span class="hljs-keyword">as</span> label
<span class="hljs-keyword">from</span>
<span class="hljs-keyword">table</span>;
</code></pre>
<p><div id="page-footer" class="localized-footer"><hr><!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<p><sub><font color="gray">
Apache Hivemall is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator.
</font></sub></p>
</div></p>
</section>
</div>
<div class="search-results">
<div class="has-results">
<h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
<ul class="search-results-list"></ul>
</div>
<div class="no-results">
<h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<script>
var gitbook = gitbook || [];
gitbook.push(function() {
gitbook.page.hasChanged({"page":{"title":"Input Format","level":"1.2.3","depth":2,"next":{"title":"List of Functions","level":"1.3","depth":1,"path":"misc/funcs.md","ref":"misc/funcs.md","articles":[]},"previous":{"title":"Install as permanent functions","level":"1.2.2","depth":2,"path":"getting_started/permanent-functions.md","ref":"getting_started/permanent-functions.md","articles":[]},"dir":"ltr"},"config":{"plugins":["theme-api","edit-link","github","splitter","etoc","callouts","toggle-chapters","anchorjs","codeblock-filename","expandable-chapters","multipart","codeblock-filename","katex","emphasize","localized-footer"],"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"pluginsConfig":{"emphasize":{},"callouts":{},"etoc":{"h2lb":3,"header":1,"maxdepth":3,"mindepth":1,"notoc":true},"github":{"url":"https://github.com/apache/incubator-hivemall/"},"splitter":{},"search":{},"downloadpdf":{"base":"https://github.com/apache/incubator-hivemall/docs/gitbook","label":"PDF","multilingual":false},"multipart":{},"localized-footer":{"filename":"FOOTER.md","hline":"true"},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"katex":{},"fontsettings":{"theme":"white","family":"sans","size":2,"font":"sans"},"highlight":{},"codeblock-filename":{},"theme-api":{"languages":[],"split":false,"theme":"dark"},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"edit-link":{"label":"Edit","base":"https://github.com/apache/incubator-hivemall/tree/master/docs/gitbook"},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":true},"anchorjs":{"selector":"h1,h2,h3,*:not(.callout) > h4,h5"},"toggle-chapters":{},"expandable-chapters":{}},"theme":"default","pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"variables":{},"title":"Hivemall User Manual","links":{"sidebar":{"<i class=\"fa fa-home\"></i> Home":"https://hivemall.incubator.apache.org/"}},"gitbook":"3.x.x","description":"User Manual for Apache Hivemall"},"file":{"path":"getting_started/input-format.md","mtime":"2021-04-22T11:42:38.137Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2021-04-22T11:56:59.644Z"},"basePath":"..","book":{"language":""}});
});
</script>
</div>
<script src="../gitbook/gitbook.js"></script>
<script src="../gitbook/theme.js"></script>
<script src="../gitbook/gitbook-plugin-edit-link/plugin.js"></script>
<script src="../gitbook/gitbook-plugin-github/plugin.js"></script>
<script src="../gitbook/gitbook-plugin-splitter/splitter.js"></script>
<script src="../gitbook/gitbook-plugin-etoc/plugin.js"></script>
<script src="../gitbook/gitbook-plugin-toggle-chapters/toggle.js"></script>
<script src="../gitbook/gitbook-plugin-anchorjs/anchor.min.js"></script>
<script src="../gitbook/gitbook-plugin-anchorjs/anchor-style.js"></script>
<script src="../gitbook/gitbook-plugin-expandable-chapters/expandable-chapters.js"></script>
<script src="../gitbook/gitbook-plugin-search/search-engine.js"></script>
<script src="../gitbook/gitbook-plugin-search/search.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
<script src="../gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
<script src="../gitbook/gitbook-plugin-sharing/buttons.js"></script>
<script src="../gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
<script src="../gitbook/gitbook-plugin-theme-api/theme-api.js"></script>
</body>
</html>