ml/classifiers.py - fineract-credit-scorecard - Git at Google

 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements. See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership. The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License. You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied. See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #

 """
 Credit Risk Models

 The module contains model definitions of various tested models for credit
 assessment
 """

 from typing import Any, Dict, List
 import joblib
 import logging
 import pandas as pd
 from django.core.exceptions import BadRequest
 from sklearn.preprocessing import LabelEncoder

 log = logging.getLogger(__name__)


 class Classifier(object):
     """
     Basic Scorecard Model

     Warning: This class should not be used directly. Use derived classes
     instead.
     """
     def __init__(self,
                  model=None,
                  categorical=[],
                  label_encoders: List[LabelEncoder] = {}):

         self.model = model
         self.categorical = categorical
         self.label_encoders = label_encoders

     # def __str__(self):
     #     return f"""
     #     Model Object
     #     ----------------------------------------------------------------

     #     Classifier: {self.classifier().__class__.__name__}
     #     Test Size: {self.test_size}
     #     Random State: {self.random_state}
     #     Number of Splits: {self.n_splits}
     #     Parameter Grid: {self.params}

     #     {self.model}
     #     """

     def preprocessing(self, data: Dict[str, Any]):
         """
         Preprocess python dict object for prediction

         Parameters
         ----------
         data: dict
             dictionary of data to predict
         """

         categorical = [x for x in self.categorical if x != 'risk']

         # log.info(f"Categorical: {categorical}")

         # for category in categorical:
         #     if category not in list(data.keys()):
         #         data[category] = None

         for key, value in data.items():
             if type(value) == str:
                 data[key] = value

         data = pd.DataFrame(data, index=[0])

         # fill missing values
         # data.fillna(self.values_fill_missing)

         le = self.label_encoders
         data = data.dropna()

         # convert categoricals
         for category in categorical:
             failed_trials = []
             try:
                 data[category] = le[category].transform(data[category])
             except KeyError as e:
                 failed_trials.append(e)
                 log.debug(f"An error occured: {str(e)}")
                 if len(failed_trials) >= 3:
                     raise BadRequest(failed_trials)
                 else:
                     data[e] = None

         return data

     def predict(self, data):
         """
         Predict scorecard model

         Args:
             data: array
                 Data to perform prediction on.
         """
         return self.model.predict_proba(data)

     def postprocessing(self, prediction):
         label = "bad"
         if prediction[1] > 0.5:
             label = "good"
         return {"probability": prediction[1], "label": label}

     def compute_prediction(self, data: Dict[str, Any]):
         try:
             input_data = self.preprocessing(data)
             prediction = self.predict(input_data)[0]
             prediction = self.postprocessing(prediction)
         except Exception as e:
             log.debug(f'An error occured: {str(e)}')
             raise BadRequest(str(e))

         return prediction


 class RandomForestClassifier(Classifier):
     def __init__(
         self,
         model=joblib.load('zoo/models/german/rf_classifier.joblib'),
         categorical=joblib.load('zoo/models/german/categorical.joblib'),
         label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
         super(RandomForestClassifier, self).__init__(model, categorical,
                                                      label_encoders)


 class SVC(Classifier):
     def __init__(
         self,
         model=joblib.load('zoo/models/german/svc_classifier.joblib'),
         categorical=joblib.load('zoo/models/german/categorical.joblib'),
         label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
         super(SVC, self).__init__(model, categorical, label_encoders)


 class MLP(Classifier):
     def __init__(
         self,
         model=joblib.load('zoo/models/german/mlp_classifier.joblib'),
         categorical=joblib.load('zoo/models/german/categorical.joblib'),
         label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
         super(MLP, self).__init__(model, categorical, label_encoders)


 class GradientBoostClassifier(Classifier):
     def __init__(
         self,
         model=joblib.load('zoo/models/german/gb_classifier.joblib'),
         categorical=joblib.load('zoo/models/german/categorical.joblib'),
         label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
         super(GradientBoostClassifier, self).__init__(model, categorical,
                                                       label_encoders)
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#

	"""
	Credit Risk Models

	The module contains model definitions of various tested models for credit
	assessment
	"""

	from typing import Any, Dict, List
	import joblib
	import logging
	import pandas as pd
	from django.core.exceptions import BadRequest
	from sklearn.preprocessing import LabelEncoder

	log = logging.getLogger(__name__)


	class Classifier(object):
	"""
	Basic Scorecard Model

	Warning: This class should not be used directly. Use derived classes
	instead.
	"""
	def __init__(self,
	model=None,
	categorical=[],
	label_encoders: List[LabelEncoder] = {}):

	self.model = model
	self.categorical = categorical
	self.label_encoders = label_encoders

	# def __str__(self):
	# return f"""
	# Model Object
	# ----------------------------------------------------------------

	# Classifier: {self.classifier().__class__.__name__}
	# Test Size: {self.test_size}
	# Random State: {self.random_state}
	# Number of Splits: {self.n_splits}
	# Parameter Grid: {self.params}

	# {self.model}
	# """

	def preprocessing(self, data: Dict[str, Any]):
	"""
	Preprocess python dict object for prediction

	Parameters
	----------
	data: dict
	dictionary of data to predict
	"""

	categorical = [x for x in self.categorical if x != 'risk']

	# log.info(f"Categorical: {categorical}")

	# for category in categorical:
	# if category not in list(data.keys()):
	# data[category] = None

	for key, value in data.items():
	if type(value) == str:
	data[key] = value

	data = pd.DataFrame(data, index=[0])

	# fill missing values
	# data.fillna(self.values_fill_missing)

	le = self.label_encoders
	data = data.dropna()

	# convert categoricals
	for category in categorical:
	failed_trials = []
	try:
	data[category] = le[category].transform(data[category])
	except KeyError as e:
	failed_trials.append(e)
	log.debug(f"An error occured: {str(e)}")
	if len(failed_trials) >= 3:
	raise BadRequest(failed_trials)
	else:
	data[e] = None

	return data

	def predict(self, data):
	"""
	Predict scorecard model

	Args:
	data: array
	Data to perform prediction on.
	"""
	return self.model.predict_proba(data)

	def postprocessing(self, prediction):
	label = "bad"
	if prediction[1] > 0.5:
	label = "good"
	return {"probability": prediction[1], "label": label}

	def compute_prediction(self, data: Dict[str, Any]):
	try:
	input_data = self.preprocessing(data)
	prediction = self.predict(input_data)[0]
	prediction = self.postprocessing(prediction)
	except Exception as e:
	log.debug(f'An error occured: {str(e)}')
	raise BadRequest(str(e))

	return prediction


	class RandomForestClassifier(Classifier):
	def __init__(
	self,
	model=joblib.load('zoo/models/german/rf_classifier.joblib'),
	categorical=joblib.load('zoo/models/german/categorical.joblib'),
	label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
	super(RandomForestClassifier, self).__init__(model, categorical,
	label_encoders)


	class SVC(Classifier):
	def __init__(
	self,
	model=joblib.load('zoo/models/german/svc_classifier.joblib'),
	categorical=joblib.load('zoo/models/german/categorical.joblib'),
	label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
	super(SVC, self).__init__(model, categorical, label_encoders)


	class MLP(Classifier):
	def __init__(
	self,
	model=joblib.load('zoo/models/german/mlp_classifier.joblib'),
	categorical=joblib.load('zoo/models/german/categorical.joblib'),
	label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
	super(MLP, self).__init__(model, categorical, label_encoders)


	class GradientBoostClassifier(Classifier):
	def __init__(
	self,
	model=joblib.load('zoo/models/german/gb_classifier.joblib'),
	categorical=joblib.load('zoo/models/german/categorical.joblib'),
	label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
	super(GradientBoostClassifier, self).__init__(model, categorical,
	label_encoders)