The goal of scorecard
package is to make the development
of the traditional credit risk scorecard model easier and efficient by
providing functions for some common tasks that summarized in below. This
package can also used in the development of machine learning models on
binary classification.
split_df
, replace_na
,
one_hot
, var_scale
)woebin
,
woebin_plot
, woebin_adj
,
woebin_ply
)var_filter
, iv
,
vif
)perf_eva
, perf_cv
,
perf_psi
)scorecard
, scorecard2
,
scorecard_ply
)gains_table
,
report
)scorecard
from CRAN with:install.packages("scorecard")
scorecard
from github with:# install.packages("devtools")
::install_github("shichenxie/scorecard") devtools
This is a basic example which shows you how to develop a common credit risk scorecard:
# Traditional Credit Scoring Using Logistic Regression
library(scorecard)
# data preparing ------
# load germancredit data
data("germancredit")
# filter variable via missing rate, iv, identical value rate
= var_filter(germancredit, y="creditability")
dt_f # breaking dt into train and test
= split_df(dt_f, y="creditability", ratios = c(0.6, 0.4), seed = 30)
dt_list = lapply(dt_list, function(x) x$creditability)
label_list
# woe binning ------
= woebin(dt_f, y="creditability")
bins # woebin_plot(bins)
# binning adjustment
## adjust breaks interactively
# breaks_adj = woebin_adj(dt_f, "creditability", bins)
## or specify breaks manually
= list(
breaks_adj age.in.years=c(26, 35, 40),
other.debtors.or.guarantors=c("none", "co-applicant%,%guarantor"))
= woebin(dt_f, y="creditability", breaks_list=breaks_adj)
bins_adj
# converting train and test into woe values
= lapply(dt_list, function(x) woebin_ply(x, bins_adj))
dt_woe_list
# glm / selecting variables ------
= glm( creditability ~ ., family = binomial(), data = dt_woe_list$train)
m1 # vif(m1, merge_coef = TRUE) # summary(m1)
# Select a formula-based model by AIC (or by LASSO for large dataset)
= step(m1, direction="both", trace = FALSE)
m_step = eval(m_step$call)
m2 # vif(m2, merge_coef = TRUE) # summary(m2)
# performance ks & roc ------
## predicted proability
= lapply(dt_woe_list, function(x) predict(m2, x, type='response'))
pred_list ## Adjusting for oversampling (support.sas.com/kb/22/601.html)
# card_prob_adj = scorecard2(bins_adj, dt=dt_list$train, y='creditability',
# x=sub('_woe$','',names(coef(m2))[-1]), badprob_pop=0.03, return_prob=TRUE)
## performance
= perf_eva(pred = pred_list, label = label_list)
perf # perf_adj = perf_eva(pred = card_prob_adj$prob, label = label_list$train)
# score ------
## scorecard
= scorecard(bins_adj, m2)
card ## credit score
= lapply(dt_list, function(x) scorecard_ply(x, card))
score_list ## psi
perf_psi(score = score_list, label = label_list)