library(data.table)
viz.data <- function(get.seg.means, data.per.seg=10, penalty=1){
no.labels <- data.table(
start=integer(), end=integer(), changes=integer())
atime.list <- atime::atime(
N=2^seq(1, 20),
setup={
seg.means <- get.seg.means(N)
mean.vec <- rep(seg.means, each=data.per.seg)
set.seed(1)
data.vec <- rnorm(data.per.seg*N, mean.vec, 0.2)
},
"changepoint::cpt.mean"={
changepoint::cpt.mean(
data.vec, method="PELT", penalty="Manual", pen.value=penalty)
},
"binsegRcpp::binseg_normal"={
binsegRcpp::binseg_normal(data.vec, N)
},
"fpop::Fpop"={
fpop::Fpop(data.vec, penalty)
},
"LOPART::LOPART"={
LOPART::LOPART(data.vec, no.labels, penalty)
},
times=5)
best.list <- atime::references_best(atime.list)
if(require(ggplot2)){
hline.df <- with(atime.list, data.frame(seconds.limit, unit="seconds"))
gg <- ggplot()+
theme_bw()+
facet_grid(unit ~ ., scales="free")+
geom_hline(aes(
yintercept=seconds.limit),
color="grey",
data=hline.df)+
geom_line(aes(
N, empirical, color=expr.name),
data=best.list$meas)+
geom_ribbon(aes(
N, ymin=min, ymax=max, fill=expr.name),
data=best.list$meas[unit=="seconds"],
alpha=0.5)+
scale_x_log10()+
scale_y_log10("median line, min/max band")
if(require(directlabels)){
gg+
directlabels::geom_dl(aes(
N, empirical, color=expr.name, label=expr.class),
method="right.polygons",
data=best.list$meas)+
theme(legend.position="none")+
coord_cartesian(xlim=c(2,1e7))
}else{
gg
}
}
}
viz.data(function(N.segs)rep(0:1,l=N.segs))
The plots above show some speed differences between segmentation algorithms. It is clear that LOPART and binseg algorithms are slow/quadratic in these data (ten points up, ten points down, ten points up, …), whereas FPOP and PELT (changepoint pkg) are fast/log-linear. This is the worst case for binseg.
viz.data(function(N.segs)1:N.segs)
Results above show a more typical result: LOPART is slow/quadratic whereas others are fast/log-linear.
viz.data(function(N.segs)1:N.segs, data.per.seg=1, penalty=1e10)
#> Warning: Transformation introduced infinite values in continuous y-axis
#> Transformation introduced infinite values in continuous y-axis
Results above show a highly unusual/pathological result: FPOP and PELT are quadratic time for a data sequence which is always increasing with a large penalty.