| # | |
| # Licensed to the Apache Software Foundation (ASF) under one | |
| # or more contributor license agreements. See the NOTICE file | |
| # distributed with this work for additional information | |
| # regarding copyright ownership. The ASF licenses this file | |
| # to you under the Apache License, Version 2.0 (the | |
| # "License"); you may not use this file except in compliance | |
| # with the License. You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, | |
| # software distributed under the License is distributed on an | |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
| # KIND, either express or implied. See the License for the | |
| # specific language governing permissions and limitations | |
| # under the License. | |
| # | |
| """ | |
| Credit Risk Models | |
| The module contains model definitions of various tested models for credit | |
| assessment | |
| """ | |
| from typing import Any, Dict, List | |
| import joblib | |
| import logging | |
| import pandas as pd | |
| from django.core.exceptions import BadRequest | |
| from sklearn.preprocessing import LabelEncoder | |
| log = logging.getLogger(__name__) | |
| class Classifier(object): | |
| """ | |
| Basic Scorecard Model | |
| Warning: This class should not be used directly. Use derived classes | |
| instead. | |
| """ | |
| def __init__(self, | |
| model=None, | |
| categorical=[], | |
| label_encoders: List[LabelEncoder] = {}): | |
| self.model = model | |
| self.categorical = categorical | |
| self.label_encoders = label_encoders | |
| # def __str__(self): | |
| # return f""" | |
| # Model Object | |
| # ---------------------------------------------------------------- | |
| # Classifier: {self.classifier().__class__.__name__} | |
| # Test Size: {self.test_size} | |
| # Random State: {self.random_state} | |
| # Number of Splits: {self.n_splits} | |
| # Parameter Grid: {self.params} | |
| # {self.model} | |
| # """ | |
| def preprocessing(self, data: Dict[str, Any]): | |
| """ | |
| Preprocess python dict object for prediction | |
| Parameters | |
| ---------- | |
| data: dict | |
| dictionary of data to predict | |
| """ | |
| categorical = [x for x in self.categorical if x != 'risk'] | |
| # log.info(f"Categorical: {categorical}") | |
| # for category in categorical: | |
| # if category not in list(data.keys()): | |
| # data[category] = None | |
| for key, value in data.items(): | |
| if type(value) == str: | |
| data[key] = value | |
| data = pd.DataFrame(data, index=[0]) | |
| # fill missing values | |
| # data.fillna(self.values_fill_missing) | |
| le = self.label_encoders | |
| data = data.dropna() | |
| # convert categoricals | |
| for category in categorical: | |
| failed_trials = [] | |
| try: | |
| data[category] = le[category].transform(data[category]) | |
| except KeyError as e: | |
| failed_trials.append(e) | |
| log.debug(f"An error occured: {str(e)}") | |
| if len(failed_trials) >= 3: | |
| raise BadRequest(failed_trials) | |
| else: | |
| data[e] = None | |
| return data | |
| def predict(self, data): | |
| """ | |
| Predict scorecard model | |
| Args: | |
| data: array | |
| Data to perform prediction on. | |
| """ | |
| return self.model.predict_proba(data) | |
| def postprocessing(self, prediction): | |
| label = "bad" | |
| if prediction[1] > 0.5: | |
| label = "good" | |
| return {"probability": prediction[1], "label": label} | |
| def compute_prediction(self, data: Dict[str, Any]): | |
| try: | |
| input_data = self.preprocessing(data) | |
| prediction = self.predict(input_data)[0] | |
| prediction = self.postprocessing(prediction) | |
| except Exception as e: | |
| log.debug(f'An error occured: {str(e)}') | |
| raise BadRequest(str(e)) | |
| return prediction | |
| class RandomForestClassifier(Classifier): | |
| def __init__( | |
| self, | |
| model=joblib.load('zoo/models/german/rf_classifier.joblib'), | |
| categorical=joblib.load('zoo/models/german/categorical.joblib'), | |
| label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')): | |
| super(RandomForestClassifier, self).__init__(model, categorical, | |
| label_encoders) | |
| class SVC(Classifier): | |
| def __init__( | |
| self, | |
| model=joblib.load('zoo/models/german/svc_classifier.joblib'), | |
| categorical=joblib.load('zoo/models/german/categorical.joblib'), | |
| label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')): | |
| super(SVC, self).__init__(model, categorical, label_encoders) | |
| class MLP(Classifier): | |
| def __init__( | |
| self, | |
| model=joblib.load('zoo/models/german/mlp_classifier.joblib'), | |
| categorical=joblib.load('zoo/models/german/categorical.joblib'), | |
| label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')): | |
| super(MLP, self).__init__(model, categorical, label_encoders) | |
| class GradientBoostClassifier(Classifier): | |
| def __init__( | |
| self, | |
| model=joblib.load('zoo/models/german/gb_classifier.joblib'), | |
| categorical=joblib.load('zoo/models/german/categorical.joblib'), | |
| label_encoders=joblib.load('zoo/models/german/label_encoders.joblib')): | |
| super(GradientBoostClassifier, self).__init__(model, categorical, | |
| label_encoders) |