German Credit Risk

A data scientist is working on 5 models,

  1. German Credit Risk-SGD - Stochastic gradient descent classifer

  2. German Credit Risk-RF - Random forest classifer

  3. German Credit Risk-SVC - Support vector classifer

  4. German Credit Risk-custom - Custom ML model hosted externally

  5. German Credit Risk-GBC - Gradient boosting classifer

Assume the following tasks,

  • Store German Credit Risk-SGD in the project space

  • Deploy German Credit Risk-RF to the development space

  • Validate German Credit Risk-SVC and German Credit Risk-custom in the development enviroment

  • Operate German Credit Risk-GBC in the production enviroment

[ ]:
! pip install cpdflow

Import libraries

[ ]:
import json
import logging
import cpdflow
import pandas as pd
from IPython.display import display

logging.getLogger("cpdflow").setLevel(logging.INFO)

Model script

[ ]:
%%writefile german-credit-risk-sgd.py

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from cpdflow.model.model import get_input_data_schema

df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)
ct = ColumnTransformer([("ohe", OneHotEncoder(), X.select_dtypes(include=["object"]).columns.tolist())])
scaler = StandardScaler(with_mean=False)

model = Pipeline([("ct", ct), ("scaler", scaler), ("clf", SGDClassifier(loss="modified_huber"))]).fit(X, y)
input_data_schema = get_input_data_schema(X=X)
custom_metrics = {
    "average_precision": 0.9
}
[ ]:
%%writefile german-credit-risk-rf.py

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from cpdflow.model.model import get_input_data_schema

df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)
ct = ColumnTransformer([("ohe", OneHotEncoder(), X.select_dtypes(include=["object"]).columns.tolist())])

model = Pipeline([("ct", ct), ("clf", RandomForestClassifier())]).fit(X, y)
input_data_schema = get_input_data_schema(X=X)
custom_metrics = {
    "average_precision": 0.9
}
[ ]:
%%writefile german-credit-risk-svc.py

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from cpdflow.model.model import get_input_data_schema

df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)
ct = ColumnTransformer([("ohe", OneHotEncoder(), X.select_dtypes(include=["object"]).columns.tolist())])
scaler = StandardScaler(with_mean=False)

model = Pipeline([("ct", ct), ("scaler", scaler), ("clf", SVC(probability=True))]).fit(X, y)
input_data_schema = get_input_data_schema(X=X)
custom_metrics = {
    "average_precision": 0.9
}
[ ]:
%%writefile german-credit-risk-gbc.py

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from cpdflow.model.model import get_input_data_schema

df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)
ct = ColumnTransformer([("ohe", OneHotEncoder(), X.select_dtypes(include=["object"]).columns.tolist())])

model = Pipeline([("ct", ct), ("clf", GradientBoostingClassifier())]).fit(X, y)
input_data_schema = get_input_data_schema(X=X)
custom_metrics = {
    "average_precision": 0.9
}
[ ]:
%%writefile german-credit-risk-custom.py

import pandas as pd
from cpdflow.model.model import get_input_data_schema

df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)

input_data_schema = get_input_data_schema(X=X)
custom_metrics = {"average_precision": 0.9}

Read data

[ ]:
df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]

Scoring payload (required) and meta payload (optional)

[ ]:
num_records = 100
df_sample = df.sample(num_records)

df_meta = df_sample[protected_attributes]
df_meta.to_csv("german_credit_risk_meta.csv", index=False)
display(df_meta.head())

df_scoring = df_sample.drop([target] + protected_attributes, axis=1)
df_scoring.to_csv("german_credit_risk_scoring.csv", index=False)
df_scoring.head()

Feedback payload (required)

[ ]:
df_feedback = df.sample(100)
df_feedback.to_csv("german_credit_risk_feedback.csv", index=False)
df_feedback.head()

Configuration file

[ ]:
config = {
    "platform": {"apikey": "", "url": "https://us-south.ml.cloud.ibm.com"},
    "ws": {"project_name": "Demo"},
    "wml": {"dev_space": "Dev Space 3", "prod_space": "Prod Space"},
    "wkc": {"catalog_name": "My Catalog", "model_entry_name": "German Credit Risk Model", "model_entry_description": "German Credit Risk Model Description"},
    "cos": {
        "cos_api_key": "",
        "cos_resource_crn": "crn:v1:bluemix:public:iam-identity::a/53be0036a6fd4cdd9f4caca09dbcb6c9::serviceid:ServiceId-07cbf50f-45ec-4dfc-85b4-ea9fb3ce614f",
        "cos_endpoint": "https://s3.us.cloud-object-storage.appdomain.cloud",
        "bucket_name": "my-bucket",
        "training_file_name": "german_credit_data_biased_training.csv",
    },
    "wos": {
        "data_mart_id": "0adabc21-cf18-48c0-a36c-f7e3f3b092e8",
        "dev_service_provider": "WML - Dev",
        "prod_service_provider": "WML - Prod",
        "custom_service_provider": "Custom WML Provider",
        "custom_metric": {"custom_monitor_name": "Custom Metrics", "custom_metric_script": "custom-metric.py", "overwrite": True,},
        "scoring_payload": {"file_name": "german_credit_risk_scoring.csv"},
        "meta_payload": {"file_name": "german_credit_risk_meta.csv"},
        "feedback_payload": {"file_name": "german_credit_risk_feedback.csv"},
        "monitor_config": {
            "quality": {"parameters": {"min_feedback_data_size": 50}, "thresholds": [{"metric_id": "area_under_roc", "type": "lower_limit", "value": 0.9}]},
            "drift": {"parameters": {"min_samples": 100, "drift_threshold": 0.1, "train_drift_model": True, "enable_model_drift": False, "enable_data_drift": True}},
            "fairness": {
                "parameters": {
                    "features": [{"feature": "Sex", "majority": ["male"], "minority": ["female"], "threshold": 0.95}, {"feature": "Age", "majority": [[26, 75]], "minority": [[18, 25]]}],
                    "favourable_class": ["No Risk"],
                    "unfavourable_class": ["Risk"],
                    "min_records": 100,
                },
                "thresholds": [
                    {"metric_id": "fairness_value", "specific_values": [{"applies_to": [{"type": "tag", "value": "Age", "key": "feature"}], "value": 80}], "type": "lower_limit", "value": 80}
                ],
            },
            "explainability": {"parameters": {"enabled": True}},
        },
    },
    "models": {
        "model_configs": [
            {"model_name": "German Credit Risk-SGD", "model_script": "german-credit-risk-sgd.py", "update": True, "overwrite": True},
            {"model_name": "German Credit Risk-RF", "model_script": "german-credit-risk-rf.py", "update": True, "overwrite": True},
            {"model_name": "German Credit Risk-SVC", "model_script": "german-credit-risk-svc.py", "update": True, "overwrite": True},
            {"model_name": "German Credit Risk-GBC", "model_script": "german-credit-risk-gbc.py", "update": True, "overwrite": True},
            {
                "model_name": "German Credit Risk-custom",
                "model_script": "german-credit-risk-custom.py",
                "scoring_url": "http://ml-provider-ml.itzroks-550003aw18-xko3n2-6ccd7f378ae819553d37d5f2ee142bd6-0000.au-syd.containers.appdomain.cloud/predict",
                "overwrite": True,
            },
        ]
    },
}

with open("config.json", "w") as f:
    json.dump(config, f, indent=4)

config = cpdflow.init_config(config=config)

Develop

Specify models in the Develop lifecycle stage.

[ ]:
cpdflow.apply.develop(config=config, model_names=["German Credit Risk-SGD"])

Deploy

Specify models in the Deploy lifecycle stage.

[ ]:
cpdflow.apply.deploy(config=config, model_names=["German Credit Risk-RF"], space_type="dev")

Validate

Specify models in the Validate lifecycle stage.

[ ]:
cpdflow.apply.validate(config=config, model_names=["German Credit Risk-SVC", "German Credit Risk-custom"])

Operate

Specify models in the Operate lifecycle stage.

[ ]:
cpdflow.apply.operate(config=config, model_names=["German Credit Risk-GBC"])