German Credit Risk¶
A data scientist is working on 5 models,
German Credit Risk-SGD - Stochastic gradient descent classifer
German Credit Risk-RF - Random forest classifer
German Credit Risk-SVC - Support vector classifer
German Credit Risk-custom - Custom ML model hosted externally
German Credit Risk-GBC - Gradient boosting classifer
Assume the following tasks,
Store German Credit Risk-SGD in the project space
Deploy German Credit Risk-RF to the development space
Validate German Credit Risk-SVC and German Credit Risk-custom in the development enviroment
Operate German Credit Risk-GBC in the production enviroment
[ ]:
! pip install cpdflow
Import libraries¶
[ ]:
import json
import logging
import cpdflow
import pandas as pd
from IPython.display import display
logging.getLogger("cpdflow").setLevel(logging.INFO)
Model script¶
[ ]:
%%writefile german-credit-risk-sgd.py
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from cpdflow.model.model import get_input_data_schema
df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)
ct = ColumnTransformer([("ohe", OneHotEncoder(), X.select_dtypes(include=["object"]).columns.tolist())])
scaler = StandardScaler(with_mean=False)
model = Pipeline([("ct", ct), ("scaler", scaler), ("clf", SGDClassifier(loss="modified_huber"))]).fit(X, y)
input_data_schema = get_input_data_schema(X=X)
custom_metrics = {
"average_precision": 0.9
}
[ ]:
%%writefile german-credit-risk-rf.py
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from cpdflow.model.model import get_input_data_schema
df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)
ct = ColumnTransformer([("ohe", OneHotEncoder(), X.select_dtypes(include=["object"]).columns.tolist())])
model = Pipeline([("ct", ct), ("clf", RandomForestClassifier())]).fit(X, y)
input_data_schema = get_input_data_schema(X=X)
custom_metrics = {
"average_precision": 0.9
}
[ ]:
%%writefile german-credit-risk-svc.py
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from cpdflow.model.model import get_input_data_schema
df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)
ct = ColumnTransformer([("ohe", OneHotEncoder(), X.select_dtypes(include=["object"]).columns.tolist())])
scaler = StandardScaler(with_mean=False)
model = Pipeline([("ct", ct), ("scaler", scaler), ("clf", SVC(probability=True))]).fit(X, y)
input_data_schema = get_input_data_schema(X=X)
custom_metrics = {
"average_precision": 0.9
}
[ ]:
%%writefile german-credit-risk-gbc.py
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from cpdflow.model.model import get_input_data_schema
df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)
ct = ColumnTransformer([("ohe", OneHotEncoder(), X.select_dtypes(include=["object"]).columns.tolist())])
model = Pipeline([("ct", ct), ("clf", GradientBoostingClassifier())]).fit(X, y)
input_data_schema = get_input_data_schema(X=X)
custom_metrics = {
"average_precision": 0.9
}
[ ]:
%%writefile german-credit-risk-custom.py
import pandas as pd
from cpdflow.model.model import get_input_data_schema
df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
y = df[target]
X = df.drop([target] + protected_attributes, axis=1)
input_data_schema = get_input_data_schema(X=X)
custom_metrics = {"average_precision": 0.9}
Read data¶
[ ]:
df = pd.read_csv("https://raw.githubusercontent.com/randyphoa/cpdflow/main/examples/german_credit_data_biased_training.csv")
target = "Risk"
protected_attributes = ["Age"]
Scoring payload (required) and meta payload (optional)¶
[ ]:
num_records = 100
df_sample = df.sample(num_records)
df_meta = df_sample[protected_attributes]
df_meta.to_csv("german_credit_risk_meta.csv", index=False)
display(df_meta.head())
df_scoring = df_sample.drop([target] + protected_attributes, axis=1)
df_scoring.to_csv("german_credit_risk_scoring.csv", index=False)
df_scoring.head()
Feedback payload (required)¶
[ ]:
df_feedback = df.sample(100)
df_feedback.to_csv("german_credit_risk_feedback.csv", index=False)
df_feedback.head()
Configuration file¶
[ ]:
config = {
"platform": {"apikey": "", "url": "https://us-south.ml.cloud.ibm.com"},
"ws": {"project_name": "Demo"},
"wml": {"dev_space": "Dev Space 3", "prod_space": "Prod Space"},
"wkc": {"catalog_name": "My Catalog", "model_entry_name": "German Credit Risk Model", "model_entry_description": "German Credit Risk Model Description"},
"cos": {
"cos_api_key": "",
"cos_resource_crn": "crn:v1:bluemix:public:iam-identity::a/53be0036a6fd4cdd9f4caca09dbcb6c9::serviceid:ServiceId-07cbf50f-45ec-4dfc-85b4-ea9fb3ce614f",
"cos_endpoint": "https://s3.us.cloud-object-storage.appdomain.cloud",
"bucket_name": "my-bucket",
"training_file_name": "german_credit_data_biased_training.csv",
},
"wos": {
"data_mart_id": "0adabc21-cf18-48c0-a36c-f7e3f3b092e8",
"dev_service_provider": "WML - Dev",
"prod_service_provider": "WML - Prod",
"custom_service_provider": "Custom WML Provider",
"custom_metric": {"custom_monitor_name": "Custom Metrics", "custom_metric_script": "custom-metric.py", "overwrite": True,},
"scoring_payload": {"file_name": "german_credit_risk_scoring.csv"},
"meta_payload": {"file_name": "german_credit_risk_meta.csv"},
"feedback_payload": {"file_name": "german_credit_risk_feedback.csv"},
"monitor_config": {
"quality": {"parameters": {"min_feedback_data_size": 50}, "thresholds": [{"metric_id": "area_under_roc", "type": "lower_limit", "value": 0.9}]},
"drift": {"parameters": {"min_samples": 100, "drift_threshold": 0.1, "train_drift_model": True, "enable_model_drift": False, "enable_data_drift": True}},
"fairness": {
"parameters": {
"features": [{"feature": "Sex", "majority": ["male"], "minority": ["female"], "threshold": 0.95}, {"feature": "Age", "majority": [[26, 75]], "minority": [[18, 25]]}],
"favourable_class": ["No Risk"],
"unfavourable_class": ["Risk"],
"min_records": 100,
},
"thresholds": [
{"metric_id": "fairness_value", "specific_values": [{"applies_to": [{"type": "tag", "value": "Age", "key": "feature"}], "value": 80}], "type": "lower_limit", "value": 80}
],
},
"explainability": {"parameters": {"enabled": True}},
},
},
"models": {
"model_configs": [
{"model_name": "German Credit Risk-SGD", "model_script": "german-credit-risk-sgd.py", "update": True, "overwrite": True},
{"model_name": "German Credit Risk-RF", "model_script": "german-credit-risk-rf.py", "update": True, "overwrite": True},
{"model_name": "German Credit Risk-SVC", "model_script": "german-credit-risk-svc.py", "update": True, "overwrite": True},
{"model_name": "German Credit Risk-GBC", "model_script": "german-credit-risk-gbc.py", "update": True, "overwrite": True},
{
"model_name": "German Credit Risk-custom",
"model_script": "german-credit-risk-custom.py",
"scoring_url": "http://ml-provider-ml.itzroks-550003aw18-xko3n2-6ccd7f378ae819553d37d5f2ee142bd6-0000.au-syd.containers.appdomain.cloud/predict",
"overwrite": True,
},
]
},
}
with open("config.json", "w") as f:
json.dump(config, f, indent=4)
config = cpdflow.init_config(config=config)
Develop¶
Specify models in the Develop lifecycle stage.
[ ]:
cpdflow.apply.develop(config=config, model_names=["German Credit Risk-SGD"])
Deploy¶
Specify models in the Deploy lifecycle stage.
[ ]:
cpdflow.apply.deploy(config=config, model_names=["German Credit Risk-RF"], space_type="dev")
Validate¶
Specify models in the Validate lifecycle stage.
[ ]:
cpdflow.apply.validate(config=config, model_names=["German Credit Risk-SVC", "German Credit Risk-custom"])
Operate¶
Specify models in the Operate lifecycle stage.
[ ]:
cpdflow.apply.operate(config=config, model_names=["German Credit Risk-GBC"])