This document serves as an overview for measuring the performance of
RcppAlgos
against other tools for generating combinations,
permutations, and partitions. This stackoverflow post: Permutations and
combinations with/without replacement and for distinct/non-distinct
items/multiset has some benchmarks. You will note that the examples
in that post were relatively small. The benchmarks below will focus on
larger examples where performance really matters and for this reason we
only consider the packages arrangements,
partitions,
and RcppAlgos.
For the benchmarks below, we used a Macbook Pro i7 16Gb
machine. We also tested on a Windows and Linux machine with similar
specs and obtained similar results.
library(RcppAlgos)
library(partitions)
library(arrangements)
library(microbenchmark)
<- capture.output(sessionInfo())
pertinent_output
options(digits = 4)
cat(paste(pertinent_output[1:3], collapse = "\n"))
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Monterey 12.3
cat(pertinent_output[which(pertinent_output == "other attached packages:") + 1L])
#> [1] microbenchmark_1.4-7 arrangements_1.1.9 partitions_1.10-4 RcppAlgos_2.6.0
<- as.integer(RcppAlgos::stdThreadMax() / 2)
numThreads print(numThreads)
#> [1] 4
set.seed(13)
<- sort(sample(100, 30))
v1 <- 21
m <- comboGeneral(v1, m, Parallel = T)
t1 <- combinations(v1, m)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14307150 21
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v1, m),
cbArrangements = combinations(v1, m),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.00 1.000 1.000 15
#> cbRcppAlgosSer 3.560 2.493 2.517 2.47 2.466 2.394 15
#> cbArrangements 3.798 2.628 2.662 2.63 2.609 2.508 15
<- v1[1:10]
v2 <- 20
m <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads)
t1 <- combinations(v2, m, replace = TRUE)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 10015005 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v2, m, TRUE),
cbArrangements = combinations(v2, m, replace = TRUE),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15
#> cbRcppAlgosSer 2.423 2.394 2.265 2.294 2.130 2.143 15
#> cbArrangements 2.116 2.322 2.186 2.228 2.084 2.075 15
<- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5)
myFreqs <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610))
v3 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads)
t1 <- combinations(freq = myFreqs, k = 20, x = v3)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 14594082 20
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads),
cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs),
cbArrangements = combinations(freq = myFreqs, k = 20, x = v3),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.00 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 2.647 2.52 2.381 2.342 2.247 2.266 10
#> cbArrangements 3.535 3.59 3.343 3.326 3.147 3.151 10
<- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59))
v4 <- permuteGeneral(v4, 6, nThreads = numThreads)
t1 <- permutations(v4, 6)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8910720 6
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v4, 6),
cbArrangements = permutations(v4, 6),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15
#> cbRcppAlgosSer 2.394 2.390 1.917 2.390 1.984 1.584 15
#> cbArrangements 3.076 3.079 2.696 3.112 3.108 2.071 15
## Indexing permutation example with the partitions package
<- permuteGeneral(11, nThreads = 4)
t1 <- permutations(11)
t2 <- perms(11)
t3
dim(t1)
#> [1] 39916800 11
stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3))))
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4),
cbRcppAlgosSer = permuteGeneral(11),
cbArrangements = permutations(11),
cbPartitions = perms(11),
times = 5, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 5
#> cbRcppAlgosSer 3.315 3.345 2.871 3.190 3.930 1.837 5
#> cbArrangements 3.595 3.776 3.336 3.781 4.396 2.339 5
#> cbPartitions 9.950 9.982 8.074 9.558 9.862 4.926 5
<- v3[1:5]
v5 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads)
t1 <- permutations(v5, 10, replace = TRUE)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9765625 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE),
cbArrangements = permutations(x = v5, k = 10, replace = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 2.948 2.955 1.958 2.853 2.347 0.604 10
#> cbArrangements 2.833 2.844 2.175 2.754 2.330 1.374 10
<- sort(runif(12))
v6 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads)
t1 <- permutations(freq = rep(1:3, 4), k = 7, x = v6)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 19520760 7
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads),
cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)),
cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.355 3.332 2.535 3.138 1.509 1.983 10
#> cbArrangements 3.531 3.503 2.824 3.313 2.078 2.065 10
<- comboGeneral(0:140, freqs=c(140, rep(1, 140)),
t1 constraintFun = "sum", comparisonFun = "==",
limitConstraints = 140)
<- partitions(140, distinct = TRUE)
t2 <- diffparts(140)
t3
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 140
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 140), all(rowSums(t2) == 140),
all(colSums(t3) == 140))
dim(t1)
#> [1] 9617150 16
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))),
cbArrangements = partitions(140, distinct = TRUE),
cbPartitions = diffparts(140),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.032 3.061 2.810 3.428 2.432 2.595 10
#> cbArrangements 2.814 2.911 2.406 2.843 1.943 2.175 10
#> cbPartitions 20.374 21.172 16.409 20.305 13.367 11.729 10
<- comboGeneral(160, 10,
t1 constraintFun = "sum", comparisonFun = "==",
limitConstraints = 160)
<- partitions(160, 10, distinct = TRUE)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 8942920 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(160, 10),
cbArrangements = partitions(160, 10, distinct = TRUE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.072 3.056 2.896 3.040 3.017 1.806 10
#> cbArrangements 3.387 3.361 3.010 3.329 3.345 1.892 10
<- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum",
t1 comparisonFun = "==", limitConstraints = 65)
<- partitions(65)
t2 <- parts(65)
t3
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 65
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 65), all(rowSums(t2) == 65),
all(colSums(t3) == 65))
dim(t1)
#> [1] 2012558 65
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE),
cbArrangements = partitions(65),
cbPartitions = parts(65),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20
#> cbRcppAlgosSer 2.952 3.050 2.635 3.331 2.353 2.158 20
#> cbArrangements 2.840 2.825 2.400 2.852 2.136 2.101 20
#> cbPartitions 11.037 10.916 8.431 10.695 6.768 5.907 20
<- comboGeneral(100, 15, TRUE, constraintFun = "sum",
t1 comparisonFun = "==", limitConstraints = 100)
<- partitions(100, 15)
t2 stopifnot(identical(t1, t2))
dim(t1)
#> [1] 9921212 15
rm(t1, t2)
# This takes a really long time... not because of restrictedparts,
# but because apply is not that fast. This transformation is
# needed for proper comparisons. As a result, we will compare
# a smaller example
# t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort))
<- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort))
t3 stopifnot(identical(partitions(50, 15), t3))
rm(t3)
invisible(gc())
microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE,
nThreads = numThreads),
cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE),
cbArrangements = partitions(100, 15),
cbPartitions = restrictedparts(100, 15,
include.zero = FALSE),
times = 10, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10
#> cbRcppAlgosSer 3.022 3.014 2.532 2.527 2.403 2.210 10
#> cbArrangements 3.031 3.037 2.652 2.796 2.373 2.359 10
#> cbPartitions 15.093 15.061 11.792 12.642 9.781 8.815 10
Currenlty, RcppAlgos
is the only package capable of
efficiently generating partitions of multisets. Therefore, we will only
time RcppAlgos
and use this as a reference for future
improvements.
<- comboGeneral(120, 10, freqs=rep(1:8, 15),
t1 constraintFun = "sum", comparisonFun = "==",
limitConstraints = 120)
dim(t1)
#> [1] 7340225 10
stopifnot(all(rowSums(t1) == 120))
microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)),
times = 10)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> cbRcppAlgos 725.9 730.3 747.5 732.4 760.8 797.9 10
### In RcppAlgos 2.3.6 - 2.4.3
#> Unit: seconds
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1172.9 1175.1 1223.5 1193.3 1200.5 1482.0 10 10
<- compositionsGeneral(0:15, repetition = TRUE)
t1 <- arrangements::compositions(15)
t2 <- partitions::compositions(15)
t3
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 15
stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))),
all(rowSums(t1) == 15), all(rowSums(t2) == 15),
all(colSums(t3) == 15))
dim(t1)
#> [1] 16384 15
rm(t1, t2, t3)
invisible(gc())
microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE),
cbArrangements = arrangements::compositions(15),
cbPartitions = partitions::compositions(15),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosSer 0.6558 0.9888 0.9716 1.003 0.9939 0.9327 20
#> cbArrangements 1.0000 1.0000 1.0000 1.000 1.0000 1.0000 20
#> cbPartitions 167.5308 177.1624 203.1560 188.783 241.1307 225.0345 20
For the next two examples, we will exclude the
partitions
package for efficiency reasons.
<- compositionsGeneral(0:23, repetition = TRUE)
t1 <- arrangements::compositions(23)
t2
# Each package has different output formats... we only examine dimensions
# and that each result is a partition of 23
stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23),
all(rowSums(t2) == 23))
dim(t1)
#> [1] 4194304 23
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE),
cbArrangements = arrangements::compositions(23),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20
#> cbRcppAlgosSer 3.094 3.072 3.009 3.052 2.975 2.828 20
#> cbArrangements 3.106 3.088 3.021 3.059 2.967 2.876 20
<- compositionsGeneral(30, 10, repetition = TRUE)
t1 <- arrangements::compositions(30, 10)
t2
stopifnot(identical(t1, t2), all(rowSums(t1) == 30))
dim(t1)
#> [1] 10015005 10
rm(t1, t2)
invisible(gc())
microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE,
nThreads = numThreads),
cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE),
cbArrangements = arrangements::compositions(30, 10),
times = 20, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20
#> cbRcppAlgosSer 2.310 2.525 2.304 2.453 2.196 1.833 20
#> cbArrangements 2.357 2.360 2.157 2.294 2.050 1.694 20
We will show one example from each category to demonstrate the
efficiency of the iterators in RcppAlgos
. The results are
similar for the rest of the cases not shown.
<- function(n, total) {
pkg_arrangements <- icombinations(n, as.integer(n / 2))
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- comboIter(n, as.integer(n / 2))
a for (i in 1:total) a@nextIter()
}
<- comboCount(18, 9)
total
total#> [1] 48620
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(18, total),
cbArrangements = pkg_arrangements(18, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15
#> cbArrangements 22.93 22.28 21.66 21.71 21.31 20.09 15
<- function(n, total) {
pkg_arrangements <- ipermutations(n)
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- permuteIter(n)
a for (i in 1:total) a@nextIter()
}
<- permuteCount(8)
total
total#> [1] 40320
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(8, total),
cbArrangements = pkg_arrangements(8, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.0 1.00 1.00 15
#> cbArrangements 21.91 22.04 21.35 21.4 20.94 20.05 15
<- function(n, total) {
pkg_partitions <- firstpart(n)
a for (i in 1:(total - 1)) a <- nextpart(a)
}
<- function(n, total) {
pkg_arrangements <- ipartitions(n)
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- partitionsIter(0:n, repetition = TRUE)
a for (i in 1:total) a@nextIter()
}
<- partitionsCount(0:40, repetition = TRUE)
total
total#> [1] 37338
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(40, total),
cbArrangements = pkg_arrangements(40, total),
cbPartitions = pkg_partitions(40, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15
#> cbArrangements 18.03 17.86 16.30 16.03 15.94 12.91 15
#> cbPartitions 36.25 36.09 32.75 32.36 32.00 25.49 15
<- function(n, total) {
pkg_partitions <- firstcomposition(n)
a for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE)
}
<- function(n, total) {
pkg_arrangements <- icompositions(n)
a for (i in 1:total) a$getnext()
}
<- function(n, total) {
pkg_RcppAlgos <- compositionsIter(0:n, repetition = TRUE)
a for (i in 1:total) a@nextIter()
}
<- compositionsCount(0:15, repetition = TRUE)
total
total#> [1] 16384
microbenchmark(cbRcppAlgos = pkg_RcppAlgos(15, total),
cbArrangements = pkg_arrangements(15, total),
cbPartitions = pkg_partitions(15, total),
times = 15, unit = "relative")
#> Unit: relative
#> expr min lq mean median uq max neval
#> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 10
#> cbArrangements 16.50 16.05 15.04 15.59 15.11 12.98 10
#> cbPartitions 70.29 69.42 63.21 65.71 63.40 50.01 10