blob: cf508677606280b8d242bab559da43b1145f844a [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
"""
Credit Risk Models
The module contains model definitions of various tested models for credit
assessment
"""
from typing import Any, Dict, List
import joblib
import logging
import pandas as pd
from django.core.exceptions import BadRequest
from sklearn.preprocessing import LabelEncoder
log = logging.getLogger(__name__)
class Classifier(object):
"""
Basic Scorecard Model
Warning: This class should not be used directly. Use derived classes
instead.
"""
def __init__(self,
model=None,
categorical=[],
label_encoders: List[LabelEncoder] = {}):
self.model = model
self.categorical = categorical
self.label_encoders = label_encoders
# def __str__(self):
# return f"""
# Model Object
# ----------------------------------------------------------------
# Classifier: {self.classifier().__class__.__name__}
# Test Size: {self.test_size}
# Random State: {self.random_state}
# Number of Splits: {self.n_splits}
# Parameter Grid: {self.params}
# {self.model}
# """
def preprocessing(self, data: Dict[str, Any]):
"""
Preprocess python dict object for prediction
Parameters
----------
data: dict
dictionary of data to predict
"""
categorical = [x for x in self.categorical if x != 'risk']
# log.info(f"Categorical: {categorical}")
# for category in categorical:
# if category not in list(data.keys()):
# data[category] = None
for key, value in data.items():
if type(value) == str:
data[key] = value
data = pd.DataFrame(data, index=[0])
# fill missing values
# data.fillna(self.values_fill_missing)
le = self.label_encoders
data = data.dropna()
# convert categoricals
for category in categorical:
failed_trials = []
try:
data[category] = le[category].transform(data[category])
except KeyError as e:
failed_trials.append(e)
log.debug(f"An error occured: {str(e)}")
if len(failed_trials) >= 3:
raise BadRequest(failed_trials)
else:
data[e] = None
return data
def predict(self, data):
"""
Predict scorecard model
Args:
data: array
Data to perform prediction on.
"""
return self.model.predict_proba(data)
def postprocessing(self, prediction):
label = "bad"
if prediction[1] > 0.5:
label = "good"
return {"probability": prediction[1], "label": label}
def compute_prediction(self, data: Dict[str, Any]):
try:
input_data = self.preprocessing(data)
prediction = self.predict(input_data)[0]
prediction = self.postprocessing(prediction)
except Exception as e:
log.debug(f'An error occured: {str(e)}')
raise BadRequest(str(e))
return prediction
class RandomForestClassifier(Classifier):
def __init__(
self,
model=joblib.load('zoo/models/german/rf_classifier.joblib'),
categorical=joblib.load('zoo/models/german/categorical.joblib'),
label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
super(RandomForestClassifier, self).__init__(model, categorical,
label_encoders)
class SVC(Classifier):
def __init__(
self,
model=joblib.load('zoo/models/german/svc_classifier.joblib'),
categorical=joblib.load('zoo/models/german/categorical.joblib'),
label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
super(SVC, self).__init__(model, categorical, label_encoders)
class MLP(Classifier):
def __init__(
self,
model=joblib.load('zoo/models/german/mlp_classifier.joblib'),
categorical=joblib.load('zoo/models/german/categorical.joblib'),
label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
super(MLP, self).__init__(model, categorical, label_encoders)
class GradientBoostClassifier(Classifier):
def __init__(
self,
model=joblib.load('zoo/models/german/gb_classifier.joblib'),
categorical=joblib.load('zoo/models/german/categorical.joblib'),
label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')):
super(GradientBoostClassifier, self).__init__(model, categorical,
label_encoders)