2021-06-25 @Atsushi Kawaguchi

The msma package provides functions for a matrix decomposition method incorporating sparse and supervised modeling for a multiblock multivariable data analysis.

Preparation

Install package (as necessary)

if(!require("msma")) install.packages("msma")

Load package

library(msma)

Getting started

Simulated multiblock data (list data) by using the function simdata. Sample size is 50. The correlation coeficient is 0.8. The numbers of columns for response and predictor can be specified by the argument Yps and Xps, respectively. The length of vecor represents the number of blocks. That is, response has three blocks with the numbers of columns being 3, 4, and 5 and predictor has one block with the number of columns being 3.

dataset0 = simdata(n = 50, rho = 0.8, Yps = c(3, 4, 5), Xps = 3, seed=1)
X0 = dataset0$X; Y0 = dataset0$Y 

The argument comp can specify the number of components. The arguments lambdaX and lambdaY can specify the regularization parameters for X and Y, respectively.

  • One Component
fit01 = msma(X0, Y0, comp=1, lambdaX=0.05, lambdaY=1:3)
fit01
## Call:
## msma.default(X = X0, Y = Y0, comp = 1, lambdaX = 0.05, lambdaY = 1:3)
## 
## Numbers of non-zeros for X: 
##        comp1
## block1     3
## 
## Numbers of non-zeros for X super: 
## comp1 
##     1 
## 
## Numbers of non-zeros for Y: 
##        comp1
## block1     1
## block2     1
## block3     1
## 
## Numbers of non-zeros for Y super: 
## comp1 
##     3

The plot function is available. In default setting, the block weights are displayed as a barplot.

plot(fit01)

  • Two Component
fit02 = msma(X0, Y0, comp=2, lambdaX=0.03, lambdaY=0.01*(1:3))
fit02
## Call:
## msma.default(X = X0, Y = Y0, comp = 2, lambdaX = 0.03, lambdaY = 0.01 * 
##     (1:3))
## 
## Numbers of non-zeros for X: 
##        comp1 comp2
## block1     3     3
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 
##     1     1 
## 
## Numbers of non-zeros for Y: 
##        comp1 comp2
## block1     3     3
## block2     4     4
## block3     5     5
## 
## Numbers of non-zeros for Y super: 
## comp1 comp2 
##     3     3

Single Block

Two matrics are prepared by specifying arguments Yps and Xps.

dataset1 = simdata(n = 50, rho = 0.8, Yps = 5, Xps = 5, seed=1)
X1 = dataset1$X[[1]]; Y1 = dataset1$Y 

Principal Component Analysis (PCA)

If input is a matrix, a principal component analysis is implemented.

(fit111 = msma(X1, comp=5))
## Call:
## msma.default(X = X1, comp = 5)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5
## block1     5     5     5     5     5
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 comp3 comp4 comp5 
##     1     1     1     1     1

The weight (loading) vectors can be obtained as follows.

fit111$wbX
## [[1]]
##           comp1       comp2       comp3         comp4       comp5
## X.1.1 0.4309622 -0.74170223 -0.03672379  0.1325580413 -0.49520613
## X.1.2 0.4483196  0.31188303  0.63228246  0.5490205405  0.02310504
## X.1.3 0.4601597 -0.19547078 -0.38567734  0.1474129336  0.76129277
## X.1.4 0.4392794  0.55811865 -0.57117598 -0.0006449093 -0.41145448
## X.1.5 0.4566923  0.05386584  0.35196769 -0.8119567864  0.07331836

The bar plots of weight vectors are provided by the function plot. The component number is specified by the argument axes. The plot type is selected by the argument plottype.

par(mfrow=c(1,2))
plot(fit111, axes = 1, plottype="bar")
plot(fit111, axes = 2, plottype="bar")

The score vectors for first six subjects.

lapply(fit111$sbX, head)
## [[1]]
##            [,1]          [,2]        [,3]        [,4]        [,5]
## [1,]  0.7097369  0.0487564120  0.10746733 -0.02462727 -0.00598565
## [2,] -0.6976955 -0.5423072581 -0.98211121 -0.23652145 -0.16120137
## [3,]  2.4367362 -0.0238218850 -0.32403419 -0.44206969 -0.47004393
## [4,] -2.4460385 -0.0007036966  0.08112764  0.14263545 -0.45584684
## [5,]  1.7708133  0.9741849574 -0.64716134  0.09377875 -0.78085072
## [6,] -0.8043943 -0.9469205017 -0.34705994 -0.62641753  0.26617649

The scatter plots for the score vectors specified by the argument v. The argument axes is specified by the two length vector represents which components are displayed.

plot(fit111, v="score", axes = 1:2, plottype="scatter")

plot(fit111, v="score", axes = 2:3, plottype="scatter")

When the argument v was specified as “cpev”, the cummulative eigenvalues are plotted.

par(mfrow=c(1,1))
plot(fit111, v="cpev")

Other functions in R for PCA

There is the R function prcomp to implement PCA.

(fit1112 = prcomp(X1, scale=TRUE))
## Standard deviations (1, .., p=5):
## [1] 2.0446732 0.5899513 0.4458638 0.3926788 0.3439156
## 
## Rotation (n x k) = (5 x 5):
##             PC1         PC2         PC3           PC4         PC5
## [1,] -0.4309746  0.74172462 -0.03722419  1.351296e-01 -0.49442882
## [2,] -0.4483141 -0.31171881  0.63044575  5.510830e-01  0.02629751
## [3,] -0.4601629  0.19547669 -0.38616901  1.416349e-01  0.76213651
## [4,] -0.4392701 -0.55816074 -0.57114566 -4.296727e-05 -0.41144993
## [5,] -0.4566918 -0.05405032  0.35470924 -8.111640e-01  0.06859643
summary(fit1112)
## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5
## Standard deviation     2.0447 0.58995 0.44586 0.39268 0.34392
## Proportion of Variance 0.8361 0.06961 0.03976 0.03084 0.02366
## Cumulative Proportion  0.8361 0.90575 0.94551 0.97634 1.00000
biplot(fit1112)

The ggfortify package is also available for the PCA plot.

Sparse PCA

If lambdaX (>0) is specified, a sparse principal component analysis is implemented.

(fit112 = msma(X1, comp=5, lambdaX=0.1))
## Call:
## msma.default(X = X1, comp = 5, lambdaX = 0.1)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5
## block1     5     4     4     5     4
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 comp3 comp4 comp5 
##     1     1     1     1     1
par(mfrow=c(1,2))
plot(fit112, axes = 1, plottype="bar")
plot(fit112, axes = 2, plottype="bar")

Supervised Sparse PCA

The outcome Z is generated.

set.seed(1); Z = rbinom(50, 1, 0.5)

If the outcome Z is specified, a supervised sparse principal component analysis is implemented.

(fit113 = msma(X1, Z=Z, comp=5, lambdaX=0.02))
## Call:
## msma.default(X = X1, Z = Z, comp = 5, lambdaX = 0.02)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5
## block1     5     5     5     4     5
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 comp3 comp4 comp5 
##     1     1     1     1     1
par(mfrow=c(1,2))
plot(fit113, axes = 1, plottype="bar")
plot(fit113, axes = 2, plottype="bar")

Partial Least Squres (PLS)

If the another input Y1 is specified, a partial least squres is implemented.

(fit121 = msma(X1, Y1, comp=2))
## Call:
## msma.default(X = X1, Y = Y1, comp = 2)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2
## block1     5     5
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 
##     1     1 
## 
## Numbers of non-zeros for Y: 
##        comp1 comp2
## block1     5     5
## 
## Numbers of non-zeros for Y super: 
## comp1 comp2 
##     1     1

The component number is specified by the argument axes. When the argument XY was specified as “XY”, the scatter plots for Y score against X score are plotted.

par(mfrow=c(1,2))
plot(fit121, axes = 1, XY="XY")
plot(fit121, axes = 2, XY="XY")

Sparse PLS

If lambdaX and lambdaY are specified, a sparse PLS is implemented.

(fit122 = msma(X1, Y1, comp=2, lambdaX=0.5, lambdaY=0.5))
## Call:
## msma.default(X = X1, Y = Y1, comp = 2, lambdaX = 0.5, lambdaY = 0.5)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2
## block1     2     2
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 
##     1     1 
## 
## Numbers of non-zeros for Y: 
##        comp1 comp2
## block1     2     2
## 
## Numbers of non-zeros for Y super: 
## comp1 comp2 
##     1     1
par(mfrow=c(1,2))
plot(fit122, axes = 1, XY="XY")
plot(fit122, axes = 2, XY="XY")

Supervised Sparse PLS

If the outcome Z is specified, a supervised sparse PLS is implemented.

(fit123 = msma(X1, Y1, Z, comp=2, lambdaX=0.5, lambdaY=0.5))
## Call:
## msma.default(X = X1, Y = Y1, Z = Z, comp = 2, lambdaX = 0.5, 
##     lambdaY = 0.5)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2
## block1     2     2
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 
##     1     1 
## 
## Numbers of non-zeros for Y: 
##        comp1 comp2
## block1     2     2
## 
## Numbers of non-zeros for Y super: 
## comp1 comp2 
##     1     1
par(mfrow=c(1,2))
plot(fit123, axes = 1, XY="XY")
plot(fit123, axes = 2, XY="XY")

Multi Block

Multiblock data is a list of data matrix.

dataset2 = simdata(n = 50, rho = 0.8, Yps = c(2, 3), Xps = c(3, 4), seed=1)
X2 = dataset2$X; Y2 = dataset2$Y 

PCA

The input class is list.

class(X2)
## [1] "list"

The list length is 2 for 2 blocks.

length(X2)
## [1] 2

list of data matrix structure.

lapply(X2, dim)
## [[1]]
## [1] 50  3
## 
## [[2]]
## [1] 50  4

The function msma is applied to this list X2 as follows.

(fit211 = msma(X2, comp=1))
## Call:
## msma.default(X = X2, comp = 1)
## 
## Numbers of non-zeros for X: 
##        comp1
## block1     3
## block2     4
## 
## Numbers of non-zeros for X super: 
## comp1 
##     2

The bar plots for the block and super weights (loadings) specified the argument block.

par(mfrow=c(1,2))
plot(fit211, axes = 1, plottype="bar", block="block")
plot(fit211, axes = 1, plottype="bar", block="super")

Sparse PCA

If lambdaX with the length of 2 (same as the length of blocks) are specified, a multiblock sparse PCA is implemented.

(fit212 = msma(X2, comp=1, lambdaX=c(0.5, 0.5)))
## Call:
## msma.default(X = X2, comp = 1, lambdaX = c(0.5, 0.5))
## 
## Numbers of non-zeros for X: 
##        comp1
## block1     3
## block2     2
## 
## Numbers of non-zeros for X super: 
## comp1 
##     2

The bar plots for the block and super weights (loadings).

par(mfrow=c(1,2))
plot(fit212, axes = 1, plottype="bar", block="block")
plot(fit212, axes = 1, plottype="bar", block="super")

Supervised Sparse PCA

If the outcome Z is specified, a supervised analysis is implemented.

(fit213 = msma(X2, Z=Z, comp=1, lambdaX=c(0.5, 0.5)))
## Call:
## msma.default(X = X2, Z = Z, comp = 1, lambdaX = c(0.5, 0.5))
## 
## Numbers of non-zeros for X: 
##        comp1
## block1     3
## block2     2
## 
## Numbers of non-zeros for X super: 
## comp1 
##     2
par(mfrow=c(1,2))
plot(fit213, axes = 1, plottype="bar", block="block")
plot(fit213, axes = 1, plottype="bar", block="super")

PLS

If the another input (list) Y2 is specified, the partial least squared is implemented.

(fit221 = msma(X2, Y2, comp=1))
## Call:
## msma.default(X = X2, Y = Y2, comp = 1)
## 
## Numbers of non-zeros for X: 
##        comp1
## block1     3
## block2     4
## 
## Numbers of non-zeros for X super: 
## comp1 
##     2 
## 
## Numbers of non-zeros for Y: 
##        comp1
## block1     2
## block2     3
## 
## Numbers of non-zeros for Y super: 
## comp1 
##     2
par(mfrow=c(1,2))
plot(fit221, axes = 1, plottype="bar", block="block", XY="X")
plot(fit221, axes = 1, plottype="bar", block="super", XY="X")

par(mfrow=c(1,2))
plot(fit221, axes = 1, plottype="bar", block="block", XY="Y")
plot(fit221, axes = 1, plottype="bar", block="super", XY="Y")

Sparse PLS

The regularized parameters lambdaX and lambdaY are specified vectors with same length with the length of lists X2 and Y2, respectively.

(fit222 = msma(X2, Y2, comp=1, lambdaX=c(0.5, 0.5), lambdaY=c(0.5, 0.5)))
## Call:
## msma.default(X = X2, Y = Y2, comp = 1, lambdaX = c(0.5, 0.5), 
##     lambdaY = c(0.5, 0.5))
## 
## Numbers of non-zeros for X: 
##        comp1
## block1     2
## block2     2
## 
## Numbers of non-zeros for X super: 
## comp1 
##     2 
## 
## Numbers of non-zeros for Y: 
##        comp1
## block1     1
## block2     3
## 
## Numbers of non-zeros for Y super: 
## comp1 
##     2
par(mfrow=c(1,2))
plot(fit222, axes = 1, plottype="bar", block="block", XY="X")
plot(fit222, axes = 1, plottype="bar", block="super", XY="X")

par(mfrow=c(1,2))
plot(fit222, axes = 1, plottype="bar", block="block", XY="Y")
plot(fit222, axes = 1, plottype="bar", block="super", XY="Y")

Supervised Sparse PLS

(fit223 = msma(X2, Y2, Z, comp=1, lambdaX=c(0.5, 0.5), lambdaY=c(0.5, 0.5)))
## Call:
## msma.default(X = X2, Y = Y2, Z = Z, comp = 1, lambdaX = c(0.5, 
##     0.5), lambdaY = c(0.5, 0.5))
## 
## Numbers of non-zeros for X: 
##        comp1
## block1     2
## block2     2
## 
## Numbers of non-zeros for X super: 
## comp1 
##     2 
## 
## Numbers of non-zeros for Y: 
##        comp1
## block1     1
## block2     3
## 
## Numbers of non-zeros for Y super: 
## comp1 
##     2
par(mfrow=c(1,2))
plot(fit223, axes = 1, plottype="bar", block="block", XY="X")
plot(fit223, axes = 1, plottype="bar", block="super", XY="X")

par(mfrow=c(1,2))
plot(fit223, axes = 1, plottype="bar", block="block", XY="Y")
plot(fit223, axes = 1, plottype="bar", block="super", XY="Y")

Parameter Selection

The number of components and regularized parameters can be selected by the function optparasearch. The following options are available.

criteria = c("BIC", "CV")
search.methods = c("regparaonly", "regpara1st", "ncomp1st", "simultaneous")

Single Block

PCA

  • The regparaonly method searches for the regularized parameters with a fixed number of components.
(opt11 = optparasearch(X1, search.method = "regparaonly", criterion="BIC"))
## $optncomp
## [1] 10
## 
## $optlambdaX
## [1] 0.2578646
## 
## $search.method
## [1] "regparaonly"
## 
## $criterion
## [1] "BIC"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit311 = msma(X1, comp=opt11$optncomp, lambdaX=opt11$optlambdaX))
## Call:
## msma.default(X = X1, comp = opt11$optncomp, lambdaX = opt11$optlambdaX)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     5     2     3     2     3     4     2     2     2      3
## 
## Numbers of non-zeros for X super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      1      1      1      1      1      1      1      1      1      1
  • The regpara1st identifies the regularized parameters by fixing the number of components, then searching for the number of components with the selected regularized parameters.
(opt12 = optparasearch(X1, search.method = "regpara1st", criterion="BIC"))
## $criterion
## [1] "BIC"
## 
## $comps
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $mincriterion
## [1] -71.49842
## 
## $criterions
##  [1]  -1.718325  -2.202584  -2.657239  -3.466906 -71.498415 -65.545077
##  [7] -65.498245 -65.454849 -65.421685 -65.368019
## 
## $optncomp
## [1] 5
## 
## $optlambdaX
## [1] 0.2578646
## 
## $optlambdaY
## NULL
## 
## $search.method
## [1] "regpara1st"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit312 = msma(X1, comp=opt12$optncomp, lambdaX=opt12$optlambdaX))
## Call:
## msma.default(X = X1, comp = opt12$optncomp, lambdaX = opt12$optlambdaX)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5
## block1     5     2     3     2     3
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 comp3 comp4 comp5 
##     1     1     1     1     1
  • The ncomp1st method identifies the number of components with a regularized parameter of 0, then searches for the regularized parameters with the selected number of components.
(opt13 = optparasearch(X1, search.method = "ncomp1st", criterion="BIC"))
## $criterion
## [1] "BIC"
## 
## $comps
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $mincriterion
## [1] -71.83271
## 
## $criterions
##  [1]  -1.718502  -2.161108  -2.598561  -3.322634 -71.832711 -65.445201
##  [7] -65.335728 -65.249299 -65.127189 -65.018710
## 
## $optncomp
## [1] 5
## 
## $optlambdaX
## [1] 0.1719097
## 
## $search.method
## [1] "ncomp1st"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit313 = msma(X1, comp=opt13$optncomp, lambdaX=opt13$optlambdaX))
## Call:
## msma.default(X = X1, comp = opt13$optncomp, lambdaX = opt13$optlambdaX)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5
## block1     5     3     3     4     4
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 comp3 comp4 comp5 
##     1     1     1     1     1
  • The simultaneous method identifies the number of components by searching the regularized parameters in each component.
(opt14 = optparasearch(X1, search.method = "simultaneous", criterion="BIC"))
## $criterion
## [1] "BIC"
## 
## $comps
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $mincriterion
## criterion 
## -71.86316 
## 
## $criterions
##  criterion  criterion  criterion  criterion  criterion  criterion  criterion 
##  -1.718502  -2.202584  -2.677000  -3.497959 -71.863165 -65.586209 -65.498245 
##  criterion  criterion  criterion 
## -65.454849 -65.421685 -65.368019 
## 
## $optncomp
## [1] 5
## 
## $optlambdaX
## [1] 0.1719097
## 
## $optlambdaY
## NULL
## 
## $search.method
## [1] "simultaneous"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit314 = msma(X1, comp=opt14$optncomp, lambdaX=opt14$optlambdaX))
## Call:
## msma.default(X = X1, comp = opt14$optncomp, lambdaX = opt14$optlambdaX)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5
## block1     5     3     3     4     4
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 comp3 comp4 comp5 
##     1     1     1     1     1

The argument maxpct4ncomp=0.5 means that 0.5\(\lambda\) is used as the regularized parameter when the number of components is searched and where \(\lambda\) is the maximum of the regularized parameters among the possible candidates.

(opt132 = optparasearch(X1, search.method = "ncomp1st", criterion="BIC", maxpct4ncomp=0.5))
## $criterion
## [1] "BIC"
## 
## $comps
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $mincriterion
## criterion 
## -71.86316 
## 
## $criterions
##  criterion  criterion  criterion  criterion  criterion  criterion  criterion 
##  -1.718502  -2.190527  -2.649988  -3.450188 -71.863165 -65.586209 -65.462913 
##  criterion  criterion  criterion 
## -65.364646 -65.297389 -65.199647 
## 
## $optncomp
## [1] 5
## 
## $optlambdaX
## [1] 0.1719097
## 
## $search.method
## [1] "ncomp1st"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit3132 = msma(X1, comp=opt132$optncomp, lambdaX=opt132$optlambdaX))
## Call:
## msma.default(X = X1, comp = opt132$optncomp, lambdaX = opt132$optlambdaX)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5
## block1     5     3     3     4     4
## 
## Numbers of non-zeros for X super: 
## comp1 comp2 comp3 comp4 comp5 
##     1     1     1     1     1

PLS

For PLS, two parameters \(\lambda_X\) and \(\lambda_Y\) are used in arguments lambdaX and lambdaY to control sparseness for data X and Y, respectively.

(opt21 = optparasearch(X2, Y2, search.method = "regparaonly", criterion="BIC"))
## $optncomp
## [1] 10
## 
## $optlambdaX
##   lambdaX1   lambdaX2 
## 0.08245431 0.48828296 
## 
## $optlambdaY
##  lambdaY1  lambdaY2 
## 0.4318262 0.2256272 
## 
## $optlambdaXsup
## lambdaXsup1 
##   0.4273084 
## 
## $optlambdaYsup
## lambdaYsup1 
##   0.6187171 
## 
## $search.method
## [1] "regparaonly"
## 
## $criterion
## [1] "BIC"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit321 = msma(X2, Y2, comp=opt21$optncomp, lambdaX=opt21$optlambdaX, lambdaY=opt21$optlambdaY))
## Call:
## msma.default(X = X2, Y = Y2, comp = opt21$optncomp, lambdaX = opt21$optlambdaX, 
##     lambdaY = opt21$optlambdaY)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     3     3     3     2     3     3     2     3     3      3
## block2     2     2     1     2     2     1     1     1     2      1
## 
## Numbers of non-zeros for X super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      2      2      2      2      2      2      2      2      2      2 
## 
## Numbers of non-zeros for Y: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     1     1     2     1     1     1     1     1     1      1
## block2     3     2     3     2     2     2     3     3     2      2
## 
## Numbers of non-zeros for Y super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      2      2      2      2      2      2      2      2      2      2

Multi Block

The multi block structure has

dataset3 = simdata(n = 50, rho = 0.8, Yps = rep(4, 5), Xps = rep(4, 5), seed=1)
X3 = dataset3$X; Y3 = dataset3$Y 

PCA

(opt31 = optparasearch(X3, search.method = "regparaonly", criterion="BIC"))
## $optncomp
## [1] 10
## 
## $optlambdaX
##  lambdaX1  lambdaX2  lambdaX3  lambdaX4  lambdaX5 
## 0.1310995 0.1513608 0.1590729 0.1300171 0.3984990 
## 
## $optlambdaXsup
## lambdaXsup1 
##   0.1493833 
## 
## $search.method
## [1] "regparaonly"
## 
## $criterion
## [1] "BIC"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit331 = msma(X3, comp=opt31$optncomp, lambdaX=opt31$optlambdaX, lambdaXsup=opt31$optlambdaXsup))
## Call:
## msma.default(X = X3, comp = opt31$optncomp, lambdaX = opt31$optlambdaX, 
##     lambdaXsup = opt31$optlambdaXsup)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     0     4     0     2     3     0     0     3     0      3
## block2     0     0     2     2     0     0     0     2     2      0
## block3     4     0     2     0     0     4     0     0     4      4
## block4     0     2     2     3     0     2     0     0     0      0
## block5     0     0     2     2     0     2     1     0     0      1
## 
## Numbers of non-zeros for X super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      1      2      4      4      1      3      1      2      2      3
(opt32 = optparasearch(X3, search.method = "regparaonly", criterion="BIC", whichselect="X"))
## $optncomp
## [1] 10
## 
## $optlambdaX
##   lambdaX1   lambdaX2   lambdaX3   lambdaX4   lambdaX5 
## 0.42607353 0.56760297 0.11930469 0.48756415 0.09962475 
## 
## $search.method
## [1] "regparaonly"
## 
## $criterion
## [1] "BIC"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit332 = msma(X3, comp=opt32$optncomp, lambdaX=opt32$optlambdaX, lambdaXsup=opt32$optlambdaXsup))
## Call:
## msma.default(X = X3, comp = opt32$optncomp, lambdaX = opt32$optlambdaX, 
##     lambdaXsup = opt32$optlambdaXsup)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     2     2     1     1     2     1     1     1     1      1
## block2     2     1     1     1     1     1     1     1     1      1
## block3     4     4     3     3     3     4     4     4     4      4
## block4     1     2     1     2     1     1     1     1     1      1
## block5     4     3     3     4     2     3     2     2     3      3
## 
## Numbers of non-zeros for X super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      5      5      5      5      5      5      5      5      5      5
(opt33 = optparasearch(X3, search.method = "regparaonly", criterion="BIC", whichselect="Xsup"))
## $optncomp
## [1] 10
## 
## $optlambdaXsup
## [1] 0.1493833
## 
## $search.method
## [1] "regparaonly"
## 
## $criterion
## [1] "BIC"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit333 = msma(X3, comp=opt33$optncomp, lambdaX=opt33$optlambdaX, lambdaXsup=opt33$optlambdaXsup))
## Call:
## msma.default(X = X3, comp = opt33$optncomp, lambdaX = opt33$optlambdaX, 
##     lambdaXsup = opt33$optlambdaXsup)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     0     0     3     0     2     0     0     1     2      0
## block2     0     0     4     3     0     4     0     0     0      2
## block3     4     0     0     4     3     0     2     0     0      0
## block4     0     4     3     3     4     0     0     0     0      0
## block5     4     0     4     2     0     0     2     0     0      0
## 
## Numbers of non-zeros for X super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      2      1      4      4      3      1      2      1      1      1

PLS

This is computationally expensive and takes much longer to execute due to the large number of blocks.

(opt41 = optparasearch(X3, Y3, search.method = "regparaonly", criterion="BIC"))
(fit341 = msma(X3, Y3, comp=opt41$optncomp, lambdaX=opt41$optlambdaX, lambdaY=opt41$optlambdaY, lambdaXsup=opt41$optlambdaXsup, lambdaYsup=opt41$optlambdaYsup))

In this example, it works by narrowing down the parameters as follows.

(opt42 = optparasearch(X3, Y3, search.method = "regparaonly", criterion="BIC", whichselect=c("Xsup","Ysup")))
## $optncomp
## [1] 10
## 
## $optlambdaXsup
## lambdaXsup1 
##   0.2459844 
## 
## $optlambdaYsup
## lambdaYsup1 
##   0.2072225 
## 
## $search.method
## [1] "regparaonly"
## 
## $criterion
## [1] "BIC"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit342 = msma(X3, Y3, comp=opt42$optncomp, lambdaX=opt42$optlambdaX, lambdaY=opt42$optlambdaY, lambdaXsup=opt42$optlambdaXsup, lambdaYsup=opt42$optlambdaYsup))
## Call:
## msma.default(X = X3, Y = Y3, comp = opt42$optncomp, lambdaX = opt42$optlambdaX, 
##     lambdaY = opt42$optlambdaY, lambdaXsup = opt42$optlambdaXsup, 
##     lambdaYsup = opt42$optlambdaYsup)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     4     2     0     2     0     0     0     0     3      0
## block2     0     0     4     2     0     0     0     2     3      0
## block3     0     4     0     0     3     0     0     3     0      3
## block4     0     3     0     0     4     4     3     0     0      0
## block5     4     0     0     4     3     2     0     0     0      0
## 
## Numbers of non-zeros for X super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      2      3      1      3      3      1      1      2      2      1 
## 
## Numbers of non-zeros for Y: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     0     4     0     0     0     4     1     3     3      4
## block2     0     4     4     2     0     3     0     0     2      2
## block3     2     0     2     3     3     0     0     0     0      0
## block4     4     4     3     0     0     0     2     0     2      2
## block5     2     3     0     3     0     3     0     0     0      2
## 
## Numbers of non-zeros for Y super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      3      4      3      3      1      3      2      1      3      4

Another example dataset is generated.

dataset4 = simdata(n = 50, rho = 0.8, Yps = rep(4, 2), Xps = rep(4, 3), seed=1)
X4 = dataset4$X; Y4 = dataset4$Y 

With this number of blocks, the calculation can be performed in a relatively short time.

(opt43 = optparasearch(X4, Y4, search.method = "regparaonly", criterion="BIC"))
## $optncomp
## [1] 10
## 
## $optlambdaX
##  lambdaX1  lambdaX2  lambdaX3 
## 0.1659782 0.4483126 0.1461557 
## 
## $optlambdaY
##  lambdaY1  lambdaY2 
## 0.5138342 0.4073084 
## 
## $optlambdaXsup
## lambdaXsup1 
##   0.1571942 
## 
## $optlambdaYsup
## lambdaYsup1 
##   0.1728372 
## 
## $search.method
## [1] "regparaonly"
## 
## $criterion
## [1] "BIC"
## 
## $criterion4ncomp
## [1] "BIC"
## 
## attr(,"class")
## [1] "optparasearch"
(fit343 = msma(X4, Y4, comp=opt43$optncomp, lambdaX=opt43$optlambdaX, lambdaY=opt43$optlambdaY, lambdaXsup=opt43$optlambdaXsup, lambdaYsup=opt43$optlambdaYsup))
## Call:
## msma.default(X = X4, Y = Y4, comp = opt43$optncomp, lambdaX = opt43$optlambdaX, 
##     lambdaY = opt43$optlambdaY, lambdaXsup = opt43$optlambdaXsup, 
##     lambdaYsup = opt43$optlambdaYsup)
## 
## Numbers of non-zeros for X: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     2     3     3     2     0     2     2     2     2      2
## block2     2     1     1     2     0     2     3     3     2      3
## block3     3     3     3     0     3     2     2     2     2      2
## 
## Numbers of non-zeros for X super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      3      3      3      2      1      3      3      3      3      3 
## 
## Numbers of non-zeros for Y: 
##        comp1 comp2 comp3 comp4 comp5 comp6 comp7 comp8 comp9 comp10
## block1     2     1     1     1     0     0     0     0     0      0
## block2     3     2     2     0     2     1     1     1     1      2
## 
## Numbers of non-zeros for Y super: 
##  comp1  comp2  comp3  comp4  comp5  comp6  comp7  comp8  comp9 comp10 
##      2      2      2      1      1      1      1      1      1      1