You can save and load treatment plans. Note: treatments plans are intended to be used with the version of vtreat
they were constructed with (though we try to make plans forward-compatible). So it is good idea to have procedures to re-build treatment plans.
The easiest way to save vtreat
treatment plans is to use R
’s built in saveRDS
function.
To save in a file:
library("vtreat")
<- data.frame(x=c('a','a','a','b','b',NA,NA),
dTrainC z=c(1,2,3,4,NA,6,NA),
y=c(FALSE,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE))
<- designTreatmentsC(dTrainC, colnames(dTrainC),
treatmentsC 'y', TRUE,
verbose= FALSE)
= paste0(tempfile(c('vtreatPlan')), '.RDS')
fileName saveRDS(treatmentsC,fileName)
rm(list=c('treatmentsC'))
And then to restore and use.
library("vtreat")
<- readRDS(fileName)
treatmentsC
<- data.frame(x=c('a','b','c',NA),z=c(10,20,30,NA))
dTestC <- prepare(treatmentsC, dTestC, pruneSig= c())
dTestCTreated
# clean up
unlink(fileName)
Treatment plans can also be stored as binary blobs in databases. Using ideas from here gives us the following through the DBI
interface.
<- NULL
con if (requireNamespace('RSQLite', quietly = TRUE) &&
requireNamespace('DBI', quietly = TRUE)) {
library("RSQLite")
<- dbConnect(drv=SQLite(), dbname=":memory:")
con # create table
dbExecute(con, 'create table if not exists treatments
(key varchar(200) primary key,
treatment blob)')
# wrap data
<- data.frame(
df key='treatmentsC',
treatment = I(list(serialize(treatmentsC, NULL))))
# Clear any previous version
dbExecute(con,
"delete from treatments where key='treatmentsC'")
# insert treatmentplan
# depreciated
# dbGetPreparedQuery(con,
# 'insert into treatments (key, treatment) values (:key, :treatment)',
# bind.data=df)
dbExecute(con,
'insert into treatments (key, treatment) values (:key, :treatment)',
params=df)
<- paste(capture.output(print(con)),collapse='\n')
constr paste('saved to db: ', constr)
}
## [1] "saved to db: <SQLiteConnection>\n Path: :memory:\n Extensions: TRUE"
rm(list= c('treatmentsC', 'dTestCTreated'))
And we can read the treatment back in as follows.
if(!is.null(con)) {
<- lapply(
treatmentsList dbGetQuery(con,
"select * from treatments where key='treatmentsC'")$treatment,
unserialize)<- treatmentsList[[1]]
treatmentsC dbDisconnect(con)
<- prepare(treatmentsC, dTestC, pruneSig= c())
dTestCTreated print(dTestCTreated)
}
## x_catP x_catB z z_isBAD x_lev_NA x_lev_x_a x_lev_x_b
## 1 0.42857143 -0.9807709 10.0 0 0 1 0
## 2 0.28571429 -0.2876737 20.0 0 0 0 1
## 3 0.07142857 0.0000000 30.0 0 0 0 0
## 4 0.28571429 9.6158638 3.2 1 1 0 0