Delete mlscorecard package

commit: 8eaa3f2de03c67d4205a5e8ebe8859ad85ec4d15 [log] [tgz]
author: xurror <kaze.nasser@outlook.com> Mon Jun 07 11:58:09 2021 +0100
committer: Yemdjih Kaze Nasser <kaze.nasser@outlook.com> Mon Jun 07 12:28:37 2021 +0100
tree: c85ef0003f03606ff7b9bbb1495023e7059a2fa3
parent: af248dfd59867836f930c45a1bde4bb638195e23 [diff]
diff --git a/mlscorecard/models.py b/mlscorecard/models.py
deleted file mode 100644
index 0eef237..0000000
--- a/mlscorecard/models.py
+++ /dev/null

@@ -1,212 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-"""
-Credit Risk Models
-
-The module contains model definitions of various tested models for credit
-assessment
-"""
-
-import numpy as np
-import pandas as pd
-
-from sklearn.preprocessing import LabelEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import GridSearchCV, ShuffleSplit
-from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, recall_score, precision_score
-
-class Model(object):
-    """
-    Basic Scorecard Model
-
-    Warning: This class should not be used directly. Use derived classes
-    instead.
-    """
-
-    def __init__(self,
-                 classifier=None,
-                 test_size=2.0,
-                 n_splits=1,
-                 random_state=None,
-                 params=None):
-                 
-        self.classifier = classifier
-        self.params = params
-        self.random_state = random_state
-        self.test_size = test_size
-        self.n_splits = n_splits
-
-        self.model = GridSearchCV(estimator=classifier,
-                                  param_grid=params,
-                                  cv=ShuffleSplit(test_size=0.20,
-                                  n_splits=n_splits,
-                                  random_state=0))
-    
-    def __str__(self):
-        return f"""
-        Model Object
-        ----------------------------------------------------------------
-
-        Classifier: {self.classifier().__class__.__name__}
-        Test Size: {self.test_size}
-        Random State: {self.random_state}
-        Number of Splits: {self.n_splits}
-        Parameter Grid: {self.params}
-
-        {self.model}
-        """
-
-    def train(self, x_train, y_train):
-        """
-        Train scorecard model
-        
-        Args:
-            x_train:
-                array of training parameters
-            y_train:
-                pandas dataframe with training labels
-        """
-
-        self.model = self.model.fit(x_train, y_train.values.ravel())
-
-    def predict(self, data):
-        """
-        Predict scorecard model
-
-        Args:
-            data: array
-                Data to perform prediction on.
-        """
-
-        return self.model.predict(data)
-
-    def accuracy(self, x_test, y_test):
-        """
-        Compute scorecard model accuracy
-
-        Args:
-            x_test: array
-                The test parameters.
-            y_test: array
-                The labels
-        """
-
-        y_pred = self.predict(x_test)
-        return accuracy_score(y_test, y_pred, normalize=False)
-
-    def metrics(self, x_test, y_test):
-        """
-        Comput scorecard model metrics
-        
-        Args:
-            x_test: array
-                The test parameters.
-            y_test: array
-                The labels
-        """
-
-        y_pred = self.predict(x_test)
-        cm = confusion_matrix(y_pred, y_test)
-        accuracy = accuracy_score(y_test, y_pred, normalize=True)
-        f1 = f1_score(self.y_test, y_pred, average="macro")
-        recall = recall_score(y_test, y_pred, average="macro")
-        precision = precision_score(y_test, y_pred, average="macro")
-        return {"accuracy" : accuracy,
-                "f1_score" : f1,
-                "recall_score" : recall,
-                "precision_score": precision}
-
-
-class RandomForest(Model):
-    """
-    Model to predict credit risk using Random Forest Classifier
-    
-    Parameters
-    ----------
-    classifier: object, default: RandomForestClassifier
-        sklearn classifier class.
-
-    test_size: float, default: 0.2
-        fraction of the dataset to use as test set.
-
-    n_splits: int, default: 1
-        number of splits.
-
-    random_state: int, default: 0
-        random state.
-
-    params: dict: default: {'n_estimators' : [20, 30, 40], 'random_state' : [0]}
-        model optimisation parameters
-    """
-
-    def __init__(self,
-                 test_size=2.0,
-                 n_splits=1,
-                 random_state=0,
-                 params={'n_estimators' : [20, 30, 40], 'random_state' : [0]}):
-        self.classifier = RandomForestClassifier
-        super(RandomForest, self).__init__(self.classifier,
-                                           test_size,
-                                           n_splits,
-                                           random_state,
-                                           params)
-
-    def preprocessing(self, data):
-        """
-        Preprocess [German](https://raw.githubusercontent.com/humbletechy/Assign/master/datasets_9109_12699_german_credit_data.csv) dataset
-
-        Parameters
-        ----------
-        data: DataFrame
-            Pandas dataframe containing German dataset.
-        """
-
-        # Drop savings account and checkings account columns as they contain a lot
-        # of NaN values and may not always be available in real life scenarios
-        data = data.drop(columns = ['Saving accounts', 'Checking account'])
-    
-        cols = data.columns
-        num_cols = data._get_numeric_data().columns
-        categorical = list(set(cols) - set(num_cols))
-
-        le = LabelEncoder()
-        data = data.dropna()
-        # Encode text columns to number values
-        for category in categorical:
-            data[category] = le.fit_transform(data[category])
-
-        for col in data.columns:
-            if(col not in categorical):
-                data[col] = (data[col].astype('float') - np.mean(data[col].astype('float')))/np.std(data[col].astype('float'))
-
-        # Get Training parameters
-        target_col = data.columns[-1]
-        x = data.drop(columns=target_col, axis=1)
-        y = data[target_col].astype('int')
-
-
-        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = self.test_size)
-        x_train = pd.DataFrame(x_train)
-        y_train = pd.DataFrame(y_train)
-
-        sc = StandardScaler()
-        x_train = sc.fit_transform(x_train)
-        x_test = sc.transform(x_test)
-
-        return (x_train, x_test, y_train, y_test)
commit	8eaa3f2de03c67d4205a5e8ebe8859ad85ec4d15	[log] [tgz]
author	xurror <kaze.nasser@outlook.com>	Mon Jun 07 11:58:09 2021 +0100
committer	Yemdjih Kaze Nasser <kaze.nasser@outlook.com>	Mon Jun 07 12:28:37 2021 +0100
tree	c85ef0003f03606ff7b9bbb1495023e7059a2fa3
parent	af248dfd59867836f930c45a1bde4bb638195e23 [diff]