blob: 16d5125b8415e03289849dbb66db83e8aae5ad73 [file] [log] [blame]
"""
@file
@brief Convert model table from MADlib to PMML
@namespace table_to_pmml
"""
import plpy
from builder import RegressionPMMLBuilder
from builder import GLMPMMLBuilder
from builder import OrdinalRegressionPMMLBuilder
from builder import MultinomRegressionPMMLBuilder
from builder import DecisionTreePMMLBuilder
from builder import RandomForestPMMLBuilder
from utilities.validate_args import input_tbl_valid
from utilities.validate_args import cols_in_tbl_valid
from utilities.utilities import add_postfix
BUILDER_CLASS = {
'glm': GLMPMMLBuilder,
'multinom': MultinomRegressionPMMLBuilder,
'ordinal': OrdinalRegressionPMMLBuilder,
'logregr': RegressionPMMLBuilder,
'linregr': RegressionPMMLBuilder,
'tree_train': DecisionTreePMMLBuilder,
'forest_train': RandomForestPMMLBuilder
}
def table_to_pmml(schema_madlib, model_table, **kwargs):
# parameter validation
input_tbl_valid(model_table, 'PMML')
model_summary_name = add_postfix(model_table, "_summary")
input_tbl_valid(model_summary_name, 'PMML')
cols_in_tbl_valid(model_summary_name, ['method'], 'PMML')
model_type = plpy.execute("SELECT method FROM {0}".
format(model_summary_name))[0]['method']
if model_type not in BUILDER_CLASS:
plpy.error("Model {0} to PMML is not supported!".format(model_type))
# construct builder
name_spec = kwargs['name_spec'] if 'name_spec' in kwargs else None
builder = BUILDER_CLASS[model_type](schema_madlib, model_type,
model_table, name_spec)
# query and build
builder.query()
builder.build()
return builder.pmml_str
########################################################################
# online document
########################################################################
def pmml_help_msg(schema_madlib, **kwargs):
""" Help message for PMML
Returns:
A string, contains the help message
"""
help_string = """
----------------------------------------------------------------
SUMMARY
----------------------------------------------------------------
Implements the PMML XML standard to describe and exchange models
produced by data mining and machine learning algorithms. Currently,
pmml() supports linear regression, logistic regression, generalized
linear model, multinomial logistic regression and ordinal linear
regression model.
------------------------------------------------------------------
USAGE
------------------------------------------------------------------
SELECT {schema_madlib}.pmml(
model_table, -- Model table that contains the output of a training algorithm
name_spec -- (optional) Names to be used in the Data Dictionary
);
------------------------------------------------------------------
OUTPUT
------------------------------------------------------------------
The output of this function is a standard PMML document.
"""
return help_string.format(schema_madlib=schema_madlib)