Last updated on 2022-09-11 12:56:15 CEST.
Flavor | Version | Tinstall | Tcheck | Ttotal | Status | Flags |
---|---|---|---|---|---|---|
r-devel-linux-x86_64-debian-clang | 0.1.1 | 11.10 | 143.20 | 154.30 | ERROR | |
r-devel-linux-x86_64-debian-gcc | 0.1.1 | 10.08 | 104.34 | 114.42 | ERROR | |
r-devel-linux-x86_64-fedora-clang | 0.1.1 | 198.76 | ERROR | |||
r-devel-linux-x86_64-fedora-gcc | 0.1.1 | 196.36 | ERROR | |||
r-devel-windows-x86_64 | 0.1.1 | 20.00 | 182.00 | 202.00 | ERROR | |
r-patched-linux-x86_64 | 0.1.1 | 9.69 | 134.18 | 143.87 | ERROR | |
r-release-linux-x86_64 | 0.1.1 | 10.90 | 133.10 | 144.00 | ERROR | |
r-release-macos-arm64 | 0.1.1 | 54.00 | NOTE | |||
r-release-macos-x86_64 | 0.1.1 | 97.00 | NOTE | |||
r-release-windows-x86_64 | 0.1.1 | 49.00 | 173.00 | 222.00 | ERROR | |
r-oldrel-macos-arm64 | 0.1.1 | 89.00 | NOTE | |||
r-oldrel-macos-x86_64 | 0.1.1 | 89.00 | NOTE | |||
r-oldrel-windows-ix86+x86_64 | 0.1.1 | 25.00 | 165.00 | 190.00 | ERROR |
Version: 0.1.1
Check: LazyData
Result: NOTE
'LazyData' is specified without a 'data' directory
Flavors: r-devel-linux-x86_64-debian-clang, r-devel-linux-x86_64-debian-gcc, r-devel-linux-x86_64-fedora-clang, r-devel-linux-x86_64-fedora-gcc, r-devel-windows-x86_64, r-patched-linux-x86_64, r-release-linux-x86_64, r-release-macos-arm64, r-release-macos-x86_64, r-release-windows-x86_64, r-oldrel-macos-arm64, r-oldrel-macos-x86_64, r-oldrel-windows-ix86+x86_64
Version: 0.1.1
Check: tests
Result: ERROR
Running 'testthat.R' [26s/30s]
Running the tests in 'tests/testthat.R' failed.
Complete output:
> library(testthat)
> library(condvis2)
>
> test_check("condvis2")
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
== Failed tests ================================================================
-- Error (test-CVpredict.R:485:3): CVpredict bartMachine factor ----------------
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)`: object 'iris1' not found
Backtrace:
x
1. \-bartMachine::bartMachine(iris1[, -5], iris1[, 5], verbose = FALSE) at test-CVpredict.R:485:2
2. +-base::do.call(build_bart_machine, as.list(match.call())[-1])
3. \-bartMachine (local) `<fn>`(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)
-- Error (test-CVpredict.R:502:3): CVpredict bartMachine numeric ---------------
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)`: object 'iris1' not found
Backtrace:
x
1. \-bartMachine::bartMachine(iris1[, -1], iris1[, 1], verbose = FALSE) at test-CVpredict.R:502:2
2. +-base::do.call(build_bart_machine, as.list(match.call())[-1])
3. \-bartMachine (local) `<fn>`(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
Error: Test failures
Execution halted
Flavor: r-devel-linux-x86_64-debian-clang
Version: 0.1.1
Check: tests
Result: ERROR
Running ‘testthat.R’ [19s/25s]
Running the tests in ‘tests/testthat.R’ failed.
Complete output:
> library(testthat)
> library(condvis2)
>
> test_check("condvis2")
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
══ Failed tests ════════════════════════════════════════════════════════════════
── Error (test-CVpredict.R:485:3): CVpredict bartMachine factor ────────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -5], iris1[, 5], verbose = FALSE) at test-CVpredict.R:485:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)
── Error (test-CVpredict.R:502:3): CVpredict bartMachine numeric ───────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -1], iris1[, 1], verbose = FALSE) at test-CVpredict.R:502:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
Error: Test failures
Execution halted
Flavor: r-devel-linux-x86_64-debian-gcc
Version: 0.1.1
Check: tests
Result: ERROR
Running ‘testthat.R’ [31s/74s]
Running the tests in ‘tests/testthat.R’ failed.
Complete output:
> library(testthat)
> library(condvis2)
>
> test_check("condvis2")
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
══ Failed tests ════════════════════════════════════════════════════════════════
── Error (test-CVpredict.R:485:3): CVpredict bartMachine factor ────────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -5], iris1[, 5], verbose = FALSE) at test-CVpredict.R:485:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)
── Error (test-CVpredict.R:502:3): CVpredict bartMachine numeric ───────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -1], iris1[, 1], verbose = FALSE) at test-CVpredict.R:502:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
Error: Test failures
Execution halted
Flavor: r-devel-linux-x86_64-fedora-clang
Version: 0.1.1
Check: tests
Result: ERROR
Running ‘testthat.R’ [31s/83s]
Running the tests in ‘tests/testthat.R’ failed.
Complete output:
> library(testthat)
> library(condvis2)
>
> test_check("condvis2")
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
══ Failed tests ════════════════════════════════════════════════════════════════
── Error (test-CVpredict.R:485:3): CVpredict bartMachine factor ────────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -5], iris1[, 5], verbose = FALSE) at test-CVpredict.R:485:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)
── Error (test-CVpredict.R:502:3): CVpredict bartMachine numeric ───────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -1], iris1[, 1], verbose = FALSE) at test-CVpredict.R:502:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
Error: Test failures
Execution halted
Flavor: r-devel-linux-x86_64-fedora-gcc
Version: 0.1.1
Check: tests
Result: ERROR
Running 'testthat.R' [27s]
Running the tests in 'tests/testthat.R' failed.
Complete output:
> library(testthat)
> library(condvis2)
>
> test_check("condvis2")
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
══ Failed tests ════════════════════════════════════════════════════════════════
── Error (test-CVpredict.R:485:3): CVpredict bartMachine factor ────────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -5], iris1[, 5], verbose = FALSE) at test-CVpredict.R:485:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)
── Error (test-CVpredict.R:502:3): CVpredict bartMachine numeric ───────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -1], iris1[, 1], verbose = FALSE) at test-CVpredict.R:502:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
Error: Test failures
Execution halted
Flavor: r-devel-windows-x86_64
Version: 0.1.1
Check: tests
Result: ERROR
Running ‘testthat.R’ [24s/29s]
Running the tests in ‘tests/testthat.R’ failed.
Complete output:
> library(testthat)
> library(condvis2)
>
> test_check("condvis2")
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
══ Failed tests ════════════════════════════════════════════════════════════════
── Error (test-CVpredict.R:485:3): CVpredict bartMachine factor ────────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -5], iris1[, 5], verbose = FALSE) at test-CVpredict.R:485:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)
── Error (test-CVpredict.R:502:3): CVpredict bartMachine numeric ───────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -1], iris1[, 1], verbose = FALSE) at test-CVpredict.R:502:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
Error: Test failures
Execution halted
Flavors: r-patched-linux-x86_64, r-release-linux-x86_64
Version: 0.1.1
Check: package dependencies
Result: NOTE
Packages suggested but not available for checking:
'scagnostics', 'bartMachine'
Flavor: r-release-macos-x86_64
Version: 0.1.1
Check: tests
Result: ERROR
Running 'testthat.R' [26s]
Running the tests in 'tests/testthat.R' failed.
Complete output:
> library(testthat)
> library(condvis2)
>
> test_check("condvis2")
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
══ Failed tests ════════════════════════════════════════════════════════════════
── Error (test-CVpredict.R:485:3): CVpredict bartMachine factor ────────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -5], iris1[, 5], verbose = FALSE) at test-CVpredict.R:485:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)
── Error (test-CVpredict.R:502:3): CVpredict bartMachine numeric ───────────────
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)`: object 'iris1' not found
Backtrace:
▆
1. └─bartMachine::bartMachine(iris1[, -1], iris1[, 1], verbose = FALSE) at test-CVpredict.R:502:2
2. ├─base::do.call(build_bart_machine, as.list(match.call())[-1])
3. └─bartMachine (local) `<fn>`(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
Error: Test failures
Execution halted
Flavor: r-release-windows-x86_64
Version: 0.1.1
Check: tests
Result: ERROR
Running 'testthat.R' [27s]
Running the tests in 'tests/testthat.R' failed.
Complete output:
> library(testthat)
> library(condvis2)
>
> test_check("condvis2")
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
== Failed tests ================================================================
-- Error (test-CVpredict.R:485:3): CVpredict bartMachine factor ----------------
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)`: object 'iris1' not found
Backtrace:
x
1. \-bartMachine::bartMachine(iris1[, -5], iris1[, 5], verbose = FALSE) at test-CVpredict.R:485:2
2. +-base::do.call(build_bart_machine, as.list(match.call())[-1])
3. \-bartMachine (local) `<fn>`(X = iris1[, -5], y = iris1[, 5], verbose = FALSE)
-- Error (test-CVpredict.R:502:3): CVpredict bartMachine numeric ---------------
Error in `(function (X = NULL, y = NULL, Xy = NULL, num_trees = 50, num_burn_in = 250,
num_iterations_after_burn_in = 1000, alpha = 0.95, beta = 2,
k = 2, q = 0.9, nu = 3, prob_rule_class = 0.5, mh_prob_steps = c(2.5,
2.5, 4)/9, debug_log = FALSE, run_in_sample = TRUE, s_sq_y = "mse",
sig_sq_est = NULL, cov_prior_vec = NULL, interaction_constraints = NULL,
use_missing_data = FALSE, covariates_to_permute = NULL, num_rand_samps_in_library = 10000,
use_missing_data_dummies_as_covars = FALSE, replace_missing_data_with_x_j_bar = FALSE,
impute_missingness_with_rf_impute = FALSE, impute_missingness_with_x_j_bar_for_lm = TRUE,
mem_cache_for_speed = TRUE, flush_indices_to_save_RAM = TRUE,
serialize = FALSE, seed = NULL, verbose = TRUE)
{
if (verbose) {
cat("bartMachine initializing with", num_trees, "trees...\n")
}
t0 = Sys.time()
if (use_missing_data_dummies_as_covars && replace_missing_data_with_x_j_bar) {
stop("You cannot impute by averages and use missing data as dummies simultaneously.")
}
if ((is.null(X) && is.null(Xy)) || is.null(y) && is.null(Xy)) {
stop("You need to give bartMachine a training set either by specifying X and y or by specifying a matrix Xy which contains the response named \"y.\"\n")
}
else if (!is.null(X) && !is.null(y) && !is.null(Xy)) {
stop("You cannot specify both X,y and Xy simultaneously.")
}
else if (is.null(X) && is.null(y)) {
if (!inherits(Xy, "data.frame")) {
stop(paste("The training data Xy must be a data frame."),
call. = FALSE)
}
y = Xy[, ncol(Xy)]
for (cov in 1:(ncol(Xy) - 1)) {
if (colnames(Xy)[cov] == "") {
colnames(Xy)[cov] = paste("V", cov, sep = "")
}
}
X = as.data.frame(Xy[, 1:(ncol(Xy) - 1)])
colnames(X) = colnames(Xy)[1:(ncol(Xy) - 1)]
}
if (!inherits(X, "data.frame")) {
stop(paste("The training data X must be a data frame."),
call. = FALSE)
}
if (verbose) {
cat("bartMachine vars checked...\n")
}
gc()
y_levels = levels(y)
if (inherits(y, "numeric") || inherits(y, "integer")) {
if (inherits(y, "integer")) {
y = as.numeric(y)
}
java_bart_machine = .jnew("bartMachine.bartMachineRegressionMultThread")
y_remaining = y
pred_type = "regression"
if (inherits(y, "integer")) {
cat("Warning: The response y is integer, bartMachine will run regression.\n")
}
}
else if (inherits(y, "factor") & length(y_levels) == 2) {
java_bart_machine = .jnew("bartMachine.bartMachineClassificationMultThread")
y_remaining = ifelse(y == y_levels[1], 1, 0)
pred_type = "classification"
}
else {
stop("Your response must be either numeric, an integer or a factor with two levels.\n")
}
num_gibbs = num_burn_in + num_iterations_after_burn_in
if (ncol(X) == 0) {
stop("Your data matrix must have at least one attribute.")
}
if (nrow(X) == 0) {
stop("Your data matrix must have at least one observation.")
}
if (length(y) != nrow(X)) {
stop("The number of responses must be equal to the number of observations in the training data.")
}
if (verbose) {
cat("bartMachine java init...\n")
}
if (is.null(colnames(X))) {
colnames(X) = paste("V", seq(from = 1, to = ncol(X),
by = 1), sep = "")
}
if (any(mh_prob_steps < 0)) {
stop("The grow, prune, change ratio parameter vector must all be greater than 0.")
}
predictors_which_are_factors = names(which(sapply(X, is.factor)))
for (predictor in predictors_which_are_factors) {
X[, predictor] = factor(X[, predictor])
}
if (verbose) {
cat("bartMachine factors created...\n")
}
if (sum(is.na(y_remaining)) > 0) {
stop("You cannot have any missing data in your response vector.")
}
rf_imputations_for_missing = NULL
if (impute_missingness_with_rf_impute) {
if (nrow(na.omit(X)) == nrow(X)) {
warning("No missing entries in the training data to impute.")
rf_imputations_for_missing = X
}
else {
predictor_colnums_with_missingness = names(which(colSums(is.na(X)) >
0))
rf_imputations_for_missing = rfImpute(X, y)
rf_imputations_for_missing = rf_imputations_for_missing[,
2:ncol(rf_imputations_for_missing)]
rf_imputations_for_missing = rf_imputations_for_missing[,
predictor_colnums_with_missingness]
}
colnames(rf_imputations_for_missing) = paste(colnames(rf_imputations_for_missing),
"_imp", sep = "")
if (verbose) {
cat("bartMachine after rf imputations...\n")
}
}
if (!use_missing_data && !replace_missing_data_with_x_j_bar) {
rows_before = nrow(X)
X = na.omit(X)
rows_after = nrow(X)
if (rows_before - rows_after > 0) {
stop("You have ", rows_before - rows_after, " observations with missing data. \nYou must either omit your missing data using \"na.omit()\" or turn on the\n\"use_missing_data\" or \"replace_missing_data_with_x_j_bar\" feature in order to use bartMachine.\n")
}
}
else if (replace_missing_data_with_x_j_bar) {
X = imputeMatrixByXbarjContinuousOrModalForBinary(X,
X)
if (verbose) {
cat("Imputed missing data using attribute averages.\n")
}
}
if (verbose) {
cat("bartMachine before preprocess...\n")
}
pre_process_obj = pre_process_training_data(X, use_missing_data_dummies_as_covars,
rf_imputations_for_missing)
model_matrix_training_data = cbind(pre_process_obj$data,
y_remaining)
p = ncol(model_matrix_training_data) - 1
factor_lengths = pre_process_obj$factor_lengths
if (verbose) {
cat("bartMachine after preprocess...", p, "total features...\n")
}
null_cov_prior_vec = is.null(cov_prior_vec)
if (null_cov_prior_vec && length(factor_lengths) > 0) {
cov_prior_vec = rep(1, p)
j_factor_begin = p - sum(factor_lengths) + 1
for (l in 1:length(factor_lengths)) {
factor_length = factor_lengths[l]
cov_prior_vec[j_factor_begin:(j_factor_begin + factor_length -
1)] = 1/factor_length
j_factor_begin = j_factor_begin + factor_length
}
}
if (!is.null(interaction_constraints)) {
if (!mem_cache_for_speed) {
stop("In order to use interaction constraints, \"mem_cache_for_speed\" must be set to TRUE.")
}
if (!inherits(interaction_constraints, "list")) {
stop("specified parameter \"interaction_constraints\" must be a list")
}
else if (length(interaction_constraints) == 0) {
stop("interaction_constraints list cannot be empty")
}
for (a in 1:length(interaction_constraints)) {
vars_a = interaction_constraints[[a]]
for (b in 1:length(vars_a)) {
var = vars_a[b]
if ((inherits(var, "numeric") | inherits(var,
"integer")) & !(var %in% (1:p))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is numeric but not one of 1, ...,", p,
"where", p, "is the number of columns in X."))
}
if (inherits(var, "factor")) {
var = as.character(var)
}
if (inherits(var, "character") & !(var %in% colnames(X))) {
stop(paste("Element", var, "in interaction_constraints vector number",
a, "is a string but not one of the column names of X."))
}
if (inherits(var, "integer") | inherits(var,
"numeric")) {
vars_a[b] = var - 1
}
else if (inherits(var, "character")) {
vars_a[b] = which(colnames(X) == var) - 1
}
}
interaction_constraints[[a]] = as.integer(vars_a)
}
}
if (!is.null(covariates_to_permute)) {
for (cov in covariates_to_permute) {
if (!(cov %in% colnames(model_matrix_training_data)) &&
inherits(cov, "character")) {
stop("Covariate \"", cov, "\" not found in design matrix.")
}
}
permuted_order = sample(1:nrow(model_matrix_training_data),
nrow(model_matrix_training_data))
model_matrix_training_data[, covariates_to_permute] = model_matrix_training_data[permuted_order,
covariates_to_permute]
}
if (debug_log & verbose) {
cat("warning: printing out the log file will slow down the runtime significantly.\n")
.jcall(java_bart_machine, "V", "writeStdOutToLogFile")
}
if (ncol(model_matrix_training_data) - 1 >= nrow(model_matrix_training_data)) {
if (verbose) {
cat("warning: cannot use MSE of linear model for s_sq_y if p > n. bartMachine will use sample var(y) instead.\n")
}
s_sq_y = "var"
}
if (is.null(sig_sq_est)) {
if (pred_type == "regression") {
y_range = max(y) - min(y)
y_trans = (y - min(y))/y_range - 0.5
if (s_sq_y == "mse") {
X_for_lm = as.data.frame(model_matrix_training_data)[1:(ncol(model_matrix_training_data) -
1)]
if (impute_missingness_with_x_j_bar_for_lm) {
X_for_lm = imputeMatrixByXbarjContinuousOrModalForBinary(X_for_lm,
X_for_lm)
}
else if (nrow(na.omit(X_for_lm)) == 0) {
stop("The data does not have enough full records to estimate a naive prediction error. Please rerun with \"impute_missingness_with_x_j_bar_for_lm\" set to true.")
}
mod = lm(y_trans ~ ., X_for_lm)
mse = var(mod$residuals)
sig_sq_est = as.numeric(mse)
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else if (s_sq_y == "var") {
sig_sq_est = as.numeric(var(y_trans))
.jcall(java_bart_machine, "V", "setSampleVarY",
sig_sq_est)
}
else {
stop("s_sq_y must be \"mse\" or \"var\"", call. = FALSE)
}
sig_sq_est = sig_sq_est * y_range^2
}
if (verbose) {
cat("bartMachine sigsq estimated...\n")
}
}
else {
if (verbose) {
cat("bartMachine using previous sigsq estimated...\n")
}
}
if (!exists("BART_NUM_CORES", envir = bartMachine_globals)) {
assign("BART_NUM_CORES", BART_NUM_CORES_DEFAULT, bartMachine_globals)
}
num_cores = get("BART_NUM_CORES", bartMachine_globals)
.jcall(java_bart_machine, "V", "setNumCores", as.integer(num_cores))
.jcall(java_bart_machine, "V", "setNumTrees", as.integer(num_trees))
.jcall(java_bart_machine, "V", "setNumGibbsBurnIn", as.integer(num_burn_in))
.jcall(java_bart_machine, "V", "setNumGibbsTotalIterations",
as.integer(num_gibbs))
.jcall(java_bart_machine, "V", "setAlpha", alpha)
.jcall(java_bart_machine, "V", "setBeta", beta)
.jcall(java_bart_machine, "V", "setK", k)
.jcall(java_bart_machine, "V", "setQ", q)
.jcall(java_bart_machine, "V", "setNU", nu)
mh_prob_steps = mh_prob_steps/sum(mh_prob_steps)
.jcall(java_bart_machine, "V", "setProbGrow", mh_prob_steps[1])
.jcall(java_bart_machine, "V", "setProbPrune", mh_prob_steps[2])
.jcall(java_bart_machine, "V", "setVerbose", verbose)
.jcall(java_bart_machine, "V", "setMemCacheForSpeed", mem_cache_for_speed)
.jcall(java_bart_machine, "V", "setFlushIndicesToSaveRAM",
flush_indices_to_save_RAM)
if (!is.null(seed)) {
.jcall(java_bart_machine, "V", "setSeed", as.integer(seed))
if (num_cores > 1) {
warning("Setting the seed when using parallelization does not result in deterministic output.\nIf you need deterministic output, you must run \"set_bart_machine_num_cores(1)\" and then build the BART model with the set seed.")
}
}
.jcall(java_bart_machine, "V", "setNormSamples", rnorm(num_rand_samps_in_library))
n_plus_hyper_nu = nrow(model_matrix_training_data) + nu
.jcall(java_bart_machine, "V", "setGammaSamples", rchisq(num_rand_samps_in_library,
n_plus_hyper_nu))
if (length(cov_prior_vec) != 0) {
offset = length(cov_prior_vec) - (ncol(model_matrix_training_data) -
1)
if (offset < 0) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was lengthened with 1's)"))
cov_prior_vec = c(cov_prior_vec, rep(1, -offset))
}
if (length(cov_prior_vec) != ncol(model_matrix_training_data) -
1) {
warning(paste("covariate prior vector length =",
length(cov_prior_vec), "has to be equal to p =",
ncol(model_matrix_training_data) - 1, "(the vector was shortened)"))
cov_prior_vec = cov_prior_vec[1:(ncol(model_matrix_training_data) -
1)]
}
if (sum(cov_prior_vec > 0) != ncol(model_matrix_training_data) -
1) {
stop("covariate prior vector has to have all its elements be positive",
call. = FALSE)
return(TRUE)
}
.jcall(java_bart_machine, "V", "setCovSplitPrior", .jarray(as.numeric(cov_prior_vec)))
}
if (!is.null(interaction_constraints)) {
.jcall(java_bart_machine, "V", "intializeInteractionConstraints",
length(interaction_constraints))
for (interaction_constraint_vector in interaction_constraints) {
for (b in 1:length(interaction_constraint_vector)) {
.jcall(java_bart_machine, "V", "addInteractionConstraint",
as.integer(interaction_constraint_vector[b]),
.jarray(as.integer(interaction_constraint_vector[-b])))
}
}
}
for (i in 1:nrow(model_matrix_training_data)) {
row_as_char = as.character(model_matrix_training_data[i,
])
row_as_char = replace(row_as_char, is.na(row_as_char),
"NA")
.jcall(java_bart_machine, "V", "addTrainingDataRow",
row_as_char)
}
.jcall(java_bart_machine, "V", "finalizeTrainingData")
if (verbose) {
cat("bartMachine training data finalized...\n")
}
if (verbose) {
cat("Now building bartMachine for", pred_type)
if (pred_type == "classification") {
cat(" where \"", y_levels[1], "\" is considered the target level",
sep = "")
}
cat("...")
if (length(cov_prior_vec) != 0) {
cat("Covariate importance prior ON. ")
}
if (use_missing_data) {
cat("Missing data feature ON. ")
}
if (use_missing_data_dummies_as_covars) {
cat("Missingness used as covariates. ")
}
if (impute_missingness_with_rf_impute) {
cat("Missing values imputed via rfImpute. ")
}
cat("\n")
}
.jcall(java_bart_machine, "V", "Build")
bart_machine = list(java_bart_machine = java_bart_machine,
training_data_features = colnames(model_matrix_training_data)[1:ifelse(use_missing_data &&
use_missing_data_dummies_as_covars, (p/2), p)], training_data_features_with_missing_features = colnames(model_matrix_training_data)[1:p],
X = X, y = y, y_levels = y_levels, pred_type = pred_type,
model_matrix_training_data = model_matrix_training_data,
n = nrow(model_matrix_training_data), p = p, num_cores = num_cores,
num_trees = num_trees, num_burn_in = num_burn_in, num_iterations_after_burn_in = num_iterations_after_burn_in,
num_gibbs = num_gibbs, alpha = alpha, beta = beta, k = k,
q = q, nu = nu, prob_rule_class = prob_rule_class, mh_prob_steps = mh_prob_steps,
s_sq_y = s_sq_y, run_in_sample = run_in_sample, sig_sq_est = sig_sq_est,
time_to_build = Sys.time() - t0, cov_prior_vec = cov_prior_vec,
interaction_constraints = interaction_constraints, use_missing_data = use_missing_data,
use_missing_data_dummies_as_covars = use_missing_data_dummies_as_covars,
replace_missing_data_with_x_j_bar = replace_missing_data_with_x_j_bar,
impute_missingness_with_rf_impute = impute_missingness_with_rf_impute,
impute_missingness_with_x_j_bar_for_lm = impute_missingness_with_x_j_bar_for_lm,
verbose = verbose, serialize = serialize, mem_cache_for_speed = mem_cache_for_speed,
flush_indices_to_save_RAM = flush_indices_to_save_RAM,
debug_log = debug_log, seed = seed, num_rand_samps_in_library = num_rand_samps_in_library)
if (!null_cov_prior_vec) {
bart_machine$cov_prior_vec = cov_prior_vec
}
if (run_in_sample) {
if (verbose) {
cat("evaluating in sample data...")
}
if (pred_type == "regression") {
y_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
y_hat_train = rowMeans(y_hat_posterior_samples)
bart_machine$y_hat_train = y_hat_train
bart_machine$residuals = y_remaining - bart_machine$y_hat_train
bart_machine$L1_err_train = sum(abs(bart_machine$residuals))
bart_machine$L2_err_train = sum(bart_machine$residuals^2)
bart_machine$PseudoRsq = 1 - bart_machine$L2_err_train/sum((y_remaining -
mean(y_remaining))^2)
bart_machine$rmse_train = sqrt(bart_machine$L2_err_train/bart_machine$n)
}
else if (pred_type == "classification") {
p_hat_posterior_samples = .jcall(bart_machine$java_bart_machine,
"[[D", "getGibbsSamplesForPrediction", .jarray(model_matrix_training_data,
dispatch = TRUE), as.integer(num_cores), simplify = TRUE)
p_hat_train = rowMeans(p_hat_posterior_samples)
y_hat_train = labels_to_y_levels(bart_machine, p_hat_train >
prob_rule_class)
bart_machine$p_hat_train = p_hat_train
bart_machine$y_hat_train = y_hat_train
confusion_matrix = as.data.frame(matrix(NA, nrow = 3,
ncol = 3))
rownames(confusion_matrix) = c(paste("actual", y_levels),
"use errors")
colnames(confusion_matrix) = c(paste("predicted",
y_levels), "model errors")
confusion_matrix[1:2, 1:2] = as.integer(table(y,
y_hat_train))
confusion_matrix[3, 1] = round(confusion_matrix[2,
1]/(confusion_matrix[1, 1] + confusion_matrix[2,
1]), 3)
confusion_matrix[3, 2] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 2] + confusion_matrix[2,
2]), 3)
confusion_matrix[1, 3] = round(confusion_matrix[1,
2]/(confusion_matrix[1, 1] + confusion_matrix[1,
2]), 3)
confusion_matrix[2, 3] = round(confusion_matrix[2,
1]/(confusion_matrix[2, 1] + confusion_matrix[2,
2]), 3)
confusion_matrix[3, 3] = round((confusion_matrix[1,
2] + confusion_matrix[2, 1])/sum(confusion_matrix[1:2,
1:2]), 3)
bart_machine$confusion_matrix = confusion_matrix
bart_machine$misclassification_error = confusion_matrix[3,
3]
}
if (verbose) {
cat("done\n")
}
}
if (serialize) {
cat("serializing in order to be saved for future R sessions...")
.jcache(bart_machine$java_bart_machine)
cat("done\n")
}
class(bart_machine) = "bartMachine"
bart_machine
})(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)`: object 'iris1' not found
Backtrace:
x
1. \-bartMachine::bartMachine(iris1[, -1], iris1[, 1], verbose = FALSE) at test-CVpredict.R:502:2
2. +-base::do.call(build_bart_machine, as.list(match.call())[-1])
3. \-bartMachine (local) `<fn>`(X = iris1[, -1], y = iris1[, 1], verbose = FALSE)
[ FAIL 2 | WARN 2 | SKIP 0 | PASS 205 ]
Error: Test failures
Execution halted
Flavor: r-oldrel-windows-ix86+x86_64