Below, we provide an example in which we apply Diagnostic 3 to an artificial data set to illustrate covariate balance for a time-varying exposure without censoring.
data("example_sml")
drops <- c("h_0", "h_1", "h_2")
mydata <- example_sml[ , !(names(example_sml) %in% drops)]
mydata.history <- makehistory.one(input=mydata,
id="id",
exposure="a",
name.history="h",
times=c(0,1,2))
mydata.tidy <- lengthen(
input=example_sml, #mydata.history,
id="id",
diagnostic=3,
censoring="no",
times.exposure=c(0,1,2),
times.covariate=c(0,1,2),
exposure="a",
temporal.covariate=c("l","m","o"),
static.covariate=c("n","p"),
history="h",
weight.exposure="wax"
)
mydata.tidy.omit <- omit.history(
input=mydata.tidy,
omission="relative",
covariate.name=c("l","m","o"),
distance=1
)
mytable <- balance (
input=mydata.tidy.omit,
diagnostic=3,
approach="weight",
censoring="no",
scope="all",
times.exposure=c(0,1,2),
times.covariate=c(0,1,2),
exposure="a",
history="h",
weight.exposure="wax",
ignore.missing.metric="no",
sort.order= c("l","m","o","n","p")
)
myplot <- makeplot (
input=mytable,
diagnostic =3,
approach="weight",
scope="all",
metric="SMD"
)
myplot
write.csv(mytable,paste(path,"mytable.csv",sep=""))
ggsave(filename=paste(path,"myplot.pdf",sep=""))
Now, we illustrate how Diagnostic 1 can be implemented using a regression approach.
mydata.tidy <- lengthen(
input=mydata.history,
diagnostic=1,
censoring="no",
id="id",
times.exposure=c(0,1,2),
times.covariate=c(0,1,2),
exposure="a",
temporal.covariate=c("l","m","n","o","p"),
history="h"
)
head(mydata.tidy)
#> id name.cov time.exposure time.covariate h a value.cov
#> 1 1 l 0 0 H 1 1
#> 2 1 l 1 0 H1 0 1
#> 3 1 l 1 1 H1 0 1
#> 4 1 l 2 0 H10 0 1
#> 5 1 l 2 1 H10 0 1
#> 6 1 l 2 2 H10 0 1
Notice that lengthen has already subset the data appropriately for diagnostic 1, where at any time \(t\) covariates are measured at or before exposure. We could use omit.history()
to further restrict to the same measurement times for exposures and covariates.
lengthen()
and regression models
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(broom)
mydata.tidy.reg <- mutate(mydata.tidy,
time=time.exposure,
distance=time.exposure-time.covariate,
history=h)
output <- mydata.tidy.reg %>%
group_by(name.cov) %>% #note, you can include other stratifying variables here or in the model
filter(time.exposure>=time.covariate) %>% #lengthen actually arealdy took care of this, provided here for clarity
do(tidy(lm(formula=value.cov~a+time+distance+history,.))) %>% #same model form used for every covariate
filter(term=="a") %>% ungroup()
table.reg <- output %>%
select(name.cov,estimate) %>%
rename(D=estimate)
print(table.reg)
#> # A tibble: 5 x 2
#> name.cov D
#> <chr> <dbl>
#> 1 l 0.102
#> 2 m -0.149
#> 3 n 0.0462
#> 4 o 0.0832
#> 5 p 0.0368
table.std <- balance(input=mydata.tidy,
diagnostic=1,
approach="none",
censoring="no",
scope="average",
average.over="distance",
ignore.missing.metric="no",
times.exposure=c(0,1,2),
times.covariate=c(0,1,2),
exposure="a",
history="h"
)
print(table.std)
#> period.id period.start period.end name.cov D SMD N
#> 1 1 0 2 l 0.10203234 0.21905115 60000
#> 2 1 0 2 m -0.15051552 -0.33489936 60000
#> 3 1 0 2 n 0.04648841 0.09452689 60000
#> 4 1 0 2 o 0.08324444 0.18305066 60000
#> 5 1 0 2 p 0.03766586 0.08452254 60000