prWarp: Papionin example

Anne Le Maitre, Silvester Bartsch, Nicole Grunstra, Philipp Mitteroecker

2020-10-22

This document corresponds to the worked example of the paper Detecting phylogenetic signal and adaptation in papionin cranial shape by decomposing variation at different spatial scales (Grunstra et al., in press). The dataset is a set of 70 2D landmarks on the midsagittal plane of the skull in 67 primates from 18 species (16 papionin taxa, 2 outgroups). The example below describes how to decompose shape variation into total, outline, and residual shape (approach 1) or into large-scale and small-scale shape (approach 2). For further details, please read the associated paper.

Data preparation

Install and load packages in R.

library("prWarp")
library("geomorph")

Load the dataset papionin. This dataset comprises the coordinates of 70 two-dimensional landmarks for the 67 specimens as a 3D array.

data("papionin")  # load dataset
species <- papionin$species  # species
papionin_ar <- papionin$coords  # landmark coordinates
k <- dim(papionin_ar)[1]  # number of landmarks
n_spec <- dim(papionin_ar)[3]  # number of specimens
n_species <- length(levels(species))  # number of species

Approach 1: Total, outline, and residual shape variation

In this approach the total shape variation is decomposed into the variation of outline shape and residual shape. The outline shape corresponds to a subset of the full landmark set, in which all landmarks that demarcate the boarders between bones (i.e., landmarks on sutures and synchondroses) are free to slide. The residual shape corresponds to the slid landmark coordinates of the full dataset after warping them to the mean outline shape using TPS interpolation.

Total shape

For the full landmark set (70 landmarks), define semilandmarks and curves for landmark sliding as well as links between landmarks for visualization.

# Semilandmarks
all_semi_lm <- papionin$semi_lm
# Curves for the sliding process
all_curves <- papionin$curves
# Links between landmarks for visualization
all_links <- papionin$links

Visualize fixed (red) vs. sliding (black) landmarks for one specimen before sliding.

plot(papionin_ar[,,1], pch = 16, col = "black", cex = 0.7, asp = 1)
points(papionin_ar[-all_semi_lm,,1], pch = 16, col = "red")
text(papionin_ar[-all_semi_lm,,1], label = c(1:k)[-all_semi_lm], pos = 1, col = "red", cex = 0.8)
text(papionin_ar[all_semi_lm,,1], label = all_semi_lm, pos = 1, col = "black", cex = 0.6)

Slide the 70 landmarks by minimizing bending energy. Note that the procSym function of the package Morpho gives as output both slid but not Procrustes aligned outline landmarks (i.e., slid landmarks in the original coordinate system) and slid and Procrustes aligned outline landmarks (i.e., slid landmarks superimposed for all specimens in a new coordinate system).

papionin_gpa <- Morpho::procSym(papionin_ar, SMvector = all_semi_lm, outlines = all_curves, pcAlign = FALSE)  # sliding + GPA
## reflection involved
## Iteration 1 ..
## squared distance between means: 3.83893855864639 
##  ------------------------------------------- 
## Iteration 2 ..
## squared distance between means: 7.72205177923358e-05 
##  ------------------------------------------- 
## Iteration 3 ..
## squared distance between means: 3.33008920865188e-06 
##  -------------------------------------------
papionin_ar_slid <- papionin_gpa$dataslide  # slid landmarks in the original space
total_shape <- papionin_gpa$rotated  # Procrustes coordinates
total_shape_average <- papionin_gpa$mshape  # average shape

Visualize fixed (red) vs. sliding (black) landmarks for one specimen after sliding, for comparisons to the position of the semilandmarks before sliding.

plot(papionin_ar_slid[,,1], pch = 16, col = "black", cex = 0.7, asp = 1)
points(papionin_ar_slid[-all_semi_lm,,1], pch = 16, col = "red")
text(papionin_ar_slid[-all_semi_lm,,1], label = c(1:k)[-all_semi_lm], pos = 1, col = "red", cex = 0.8)
text(papionin_ar_slid[all_semi_lm,,1], label = all_semi_lm, pos = 1, col = "black", cex = 0.6)

For the analyses of total shape variation use slid landmarks after superimposition (generalized Procrustes analysis, GPA).

Total shape variation for all specimens.

plotAllSpecimens(total_shape, mean = FALSE, plot.param = list(pt.cex = 0.5))
for (i in 1:n_spec) {
  segments(total_shape[all_links[,1],1,i], 
           total_shape[all_links[,1],2,i], 
           total_shape[all_links[,2],1,i], 
           total_shape[all_links[,2],2,i])
}

Total shape variation between species.

# Computation of the average total shape for each species
total_shape_between <- array(NA, dim = c(k, 2, n_species))
for (i in 1:n_species) {
  spi <- which(species == levels(species)[i])
  total_shape_between[,,i] <- mshape(total_shape[, , spi])
}
# Plot total shape between species
plotAllSpecimens(total_shape_between, mean = FALSE, plot.param = list(pt.cex = 0.5))
for (i in 1:n_species) {
  segments(total_shape_between[all_links[,1],1,i], 
           total_shape_between[all_links[,1],2,i], 
           total_shape_between[all_links[,2],1,i], 
           total_shape_between[all_links[,2],2,i])
}

Total shape variation within species.

# Computation of the pooled individual within-species total shape
total_shape_within <- array(NA, dim = c(k, 2, n_spec))
for (i in 1:n_species) {
  spi <- which(species == levels(species)[i])
  for (j in 1:length(spi)) {
    total_shape_within[,,spi[j]] <- total_shape[, , spi[j]] - total_shape_between[,,i] + mshape(total_shape_between)
  }
}
# Plot the pooled individual within-species total shape
plotAllSpecimens(total_shape_within, mean = FALSE, plot.param = list(pt.cex = 0.5))
for (i in 1:n_spec) {
  segments(total_shape_within[all_links[,1],1,i], 
           total_shape_within[all_links[,1],2,i], 
           total_shape_within[all_links[,2],1,i], 
           total_shape_within[all_links[,2],2,i])
}

Outline shape

The 'outline' shape corresponds to a subset of 54 landmarks, of which most are defined as semilandmarks and hence free to slide.

outline_lm <- papionin$outline$subset  # subset
k_out <- length(outline_lm)  # number of landmarks in the subset
outline_ar <- papionin_ar[outline_lm, , ]  # select the subset of landmark coordinates

Visualize the landmarks selected to define the subset (in blue).

plot(papionin_ar[outline_lm,,1], pch = 16, col = "blue", asp = 1)
points(papionin_ar[-outline_lm,,1], pch = 4, col = "black", cex = 0.7)
text(papionin_ar[outline_lm,,1], label = outline_lm, pos = 1, col = "blue", cex = 0.8)
text(papionin_ar[-outline_lm,,1], label = c(1:k_out)[-outline_lm], pos = 1, col = "black", cex = 0.6)

For the outline landmark set (54 landmarks), define semilandmarks and curves for landmark sliding as well as links between landmarks for visualization. Note that the landmark numbers refer to the order in the 54-landmark set and not in the initial 70-landmarks set.

# Semilandmarks
outline_semi_lm <- papionin$outline$semi_lm
# Curves
outline_curves <- papionin$outline$curves
# Links between landmarks (for visualization)
outline_links <- papionin$outline$links

Visualize fixed (red) vs. sliding (black) landmarks for the outline of one specimen before sliding.

plot(outline_ar[,,1], pch = 16, col = "black", cex = 0.7, asp = 1)
points(outline_ar[-outline_semi_lm,,1], pch = 16, col = "red")
text(outline_ar[outline_semi_lm,,1], label = outline_semi_lm, pos = 1, col = "black", cex = 0.6)
text(outline_ar[-outline_semi_lm,,1], label = c(1:k_out)[-outline_semi_lm], pos = 1, col = "red", cex = 0.8)

Slide the 54 landmarks by minimizing bending energy. Here both slid but not Procrustes aligned outline landmarks and slid and Procrustes aligned outline landmarks are obtained.

outline_gpa <- Morpho::procSym(outline_ar, SMvector = outline_semi_lm, outlines = outline_curves, pcAlign = FALSE)  # sliding + GPA
## reflection involved
## Iteration 1 ..
## squared distance between means: 3.85834719994358 
##  ------------------------------------------- 
## Iteration 2 ..
## squared distance between means: 0.00016856396081853 
##  ------------------------------------------- 
## Iteration 3 ..
## squared distance between means: 1.89543101565867e-05 
##  ------------------------------------------- 
## Iteration 4 ..
## squared distance between means: 1.67580177683664e-05 
##  ------------------------------------------- 
## Iteration 5 ..
## squared distance between means: 1.58558336641451e-05 
##  ------------------------------------------- 
## Iteration 6 ..
## squared distance between means: 1.49900912797382e-05 
##  ------------------------------------------- 
## Iteration 7 ..
## squared distance between means: 1.41683205965697e-05 
##  ------------------------------------------- 
## Iteration 8 ..
## squared distance between means: 1.34271059429379e-05 
##  ------------------------------------------- 
## Iteration 9 ..
## squared distance between means: 1.27806248456679e-05 
##  ------------------------------------------- 
## Iteration 10 ..
## squared distance between means: 1.22256520950769e-05 
##  ------------------------------------------- 
## Iteration 11 ..
## squared distance between means: 1.17525231794803e-05 
##  ------------------------------------------- 
## Iteration 12 ..
## squared distance between means: 1.13500380739102e-05 
##  ------------------------------------------- 
## Iteration 13 ..
## squared distance between means: 1.10077043774118e-05 
##  ------------------------------------------- 
## Iteration 14 ..
## squared distance between means: 1.07164268267304e-05 
##  ------------------------------------------- 
## Iteration 15 ..
## squared distance between means: 1.0468634359219e-05 
##  ------------------------------------------- 
## Iteration 16 ..
## squared distance between means: 1.02582100738881e-05 
##  ------------------------------------------- 
## Iteration 17 ..
## squared distance between means: 1.0080379123197e-05 
##  ------------------------------------------- 
## Iteration 18 ..
## squared distance between means: 9.93159848867143e-06 
##  -------------------------------------------
outline_ar_slid <- outline_gpa$dataslide  # slid landmarks in the original space
outline_shape <- outline_gpa$rotated  # Procrustes coordinates
outline_shape_average <- outline_gpa$mshape  # average shape

Visualize fixed (red) vs. sliding (black) landmarks for one specimen after sliding, for comparisons to the position of the semilandmarks before sliding.

plot(outline_ar_slid[,,1], pch = 16, col = "black", cex = 0.7, asp = 1)
points(outline_ar_slid[-outline_semi_lm,,1], pch = 16, col = "red")
text(outline_ar_slid[outline_semi_lm,,1], label = outline_semi_lm, pos = 1, col = "black", cex = 0.6)
text(outline_ar_slid[-outline_semi_lm,,1], label = c(1:k_out)[-outline_semi_lm], pos = 1, col = "red", cex = 0.8)

For the analyses of the outline shape variation, use slid landmarks after superimposition (generalized Procrustes analysis, GPA).

Outline shape for all specimens.

plotAllSpecimens(outline_shape, mean = FALSE, plot.param = list(pt.cex = 0.5))
for (i in 1:n_spec) {
  segments(outline_shape[outline_links[,1],1,i], 
           outline_shape[outline_links[,1],2,i], 
           outline_shape[outline_links[,2],1,i], 
           outline_shape[outline_links[,2],2,i])
}

Outline shape variation between species.

# Computation of the average outline shape for each species
outline_shape_between <- array(NA, dim = c(k_out, 2, n_species))
for (i in 1:n_species) {
  spi <- which(species == levels(species)[i])
  outline_shape_between[,,i] <- mshape(outline_shape[, , spi])
}
# Plot outline shape between species
plotAllSpecimens(outline_shape_between, mean = FALSE, plot.param = list(pt.cex = 0.5))
for (i in 1:n_species) {
  segments(outline_shape_between[outline_links[,1],1,i], 
           outline_shape_between[outline_links[,1],2,i], 
           outline_shape_between[outline_links[,2],1,i], 
           outline_shape_between[outline_links[,2],2,i])
}

Outline shape variation within species.

# Computation of the pooled individual within-species outline shape
outline_shape_within <- array(NA, dim = c(k_out, 2, n_spec))
for (i in 1:n_species) {
  spi <- which(species == levels(species)[i])
  for (j in 1:length(spi)) {
    outline_shape_within[,,spi[j]] <- outline_shape[, , spi[j]] - outline_shape_between[,,i] + mshape(outline_shape_between)
  }
}
# Plot the pooled individual within-species total shape
plotAllSpecimens(outline_shape_within, mean = FALSE, plot.param = list(pt.cex = 0.5))
for (i in 1:n_spec) {
  segments(outline_shape_within[outline_links[,1],1,i], 
           outline_shape_within[outline_links[,1],2,i], 
           outline_shape_within[outline_links[,2],1,i], 
           outline_shape_within[outline_links[,2],2,i])
}

For all statistical analyses of the outline shape, use slid and Procrustes aligned landmarks. Here a principal component analysis is performed.

# PCA
outline_shape_pca <- prcomp(two.d.array(outline_shape_between))
# Plot PC1 and PC2
col.sp <- c(rep("red", 3), rep("black", 2), rep("green", 2), rep ("blue", 8), "red", rep("green", 2))  # color vector
pch.sp <- c(rep(16, 4), 1, rep(16, 9), rep(1, 4))  # symbol vector
plot(outline_shape_pca$x[,1:2], las = 1, pch = pch.sp, col = col.sp, asp = 1, main = "Outline shape")
text(outline_shape_pca$x[,1:2], labels = levels(species), pos = 1, cex = 0.8)

Residual shape

The 'residual' shape corresponds to the result of the warping of the 70 slid landmarks to the mean of the Procrustes aligned outline landmarks using TPS interpolation. Note that here:
- the landmark configuration to be mapped is the slid but not Procrustes aligned full landmark set (70 landmarks);
- the reference is the slid but not Procrustes aligned outline landmark subset (54 landmarks);
- the target is the mean of the slid and Procrustes aligned outline landmark subset (54 landmarks).

residual_shape <- tps.all(papionin_ar_slid, outline_ar_slid, outline_shape_average)  # warping to the average outline shape

For the analyses of the residual shape variation no superimposition is needed because the landmarks were already registered by the warping to the mean outline shape (in contrast to GPA, this is a non-rigid registration).

Residual shape for all specimens.

plotAllSpecimens(residual_shape, mean = FALSE, plot.param = list(pt.cex = 0.5))
for (i in 1:n_spec) {
  segments(residual_shape[all_links[,1],1,i], 
           residual_shape[all_links[,1],2,i], 
           residual_shape[all_links[,2],1,i], 
           residual_shape[all_links[,2],2,i])
}

Residual shape variation between species

# Computation of the average residual shape for each species
residual_shape_between <- array(NA, dim = c(k, 2, n_species))
for (i in 1:n_species) {
  spi <- which(species == levels(species)[i])
  residual_shape_between[,,i] <- mshape(residual_shape[, ,spi])
}
# Plot residual shape between species
plotAllSpecimens(residual_shape_between, mean = FALSE, plot.param = list(pt.cex = 0.5))
for (i in 1:n_species) {
  segments(residual_shape_between[all_links[,1],1,i], 
           residual_shape_between[all_links[,1],2,i], 
           residual_shape_between[all_links[,2],1,i], 
           residual_shape_between[all_links[,2],2,i])
}

Residual shape variation within species.

# Computation of the pooled individual within-species residual shape
residual_shape_within <- array(NA, dim = c(k, 2, n_spec))
for (i in 1:n_species) {
  spi <- which(species == levels(species)[i])
  for (j in 1:length(spi)) {
    residual_shape_within[,,spi[j]] <- residual_shape[, , spi[j]] - residual_shape_between[,,i] + mshape(residual_shape_between)
  }
}
# Plot the pooled individual within-species residual shape
plotAllSpecimens(residual_shape_within, mean = FALSE, plot.param = list(pt.cex = 0.5))
for (i in 1:n_spec) {
  segments(residual_shape_within[all_links[,1],1,i], 
           residual_shape_within[all_links[,1],2,i], 
           residual_shape_within[all_links[,2],1,i], 
           residual_shape_within[all_links[,2],2,i])
}

For statistical analyses of the residual shape, no further Procrustes alignment is needed. Here a principal component analysis is performed.

# PCA
residual_shape_pca <- prcomp(two.d.array(residual_shape_between))
# Plot PC1 and PC2
plot(residual_shape_pca$x[,1:2], las = 1, pch = pch.sp, col = col.sp, asp = 1, main = "Residual shape")
text(residual_shape_pca$x[,1:2], labels = levels(species), pos = 1, cex = 0.8)

Approach 2: Small-scale vs. large-scale shape variation

In this approach, shape variation is decomposed into a small-scale shape component and a large-scale shape component. For this, the slid and Procrustes aligned 70-landmark set is decomposed into partial warps. The small-scale component is based on the first pairs of partial warps, and the large-scale shape component is based on the last pairs of partial warps. In this example, the first 50 pairs are taken for the small-scale shape component.

For all specimens

Decompose the 70 Procrustes-aligned landmarks into partial warps. Note that the function create.pw.be returns principal warps, partial warp scores, and associated bending energies, but also small-scale and large-scale components of shape variation when a threshold value (d) is provided.

d <- 50  # threshold for small-scale vs. large-scale components
papionin_be_pw <- create.pw.be(total_shape, total_shape_average, d)

Small-scale component of shape variation for all specimens.

small_shape_all <- xxyy.to.array(papionin_be_pw$Xsmall, p = k, k = 2) 
plot(small_shape_all[,1,], small_shape_all[,2,], asp = 1, las = 1, cex = 0.5, pch = 16, main = "Small-scale shape component", xlab = "X", ylab = "Y")
for (i in 1:n_spec) {
  segments(small_shape_all[all_links[,1],1,i], 
           small_shape_all[all_links[,1],2,i], 
           small_shape_all[all_links[,2],1,i], 
           small_shape_all[all_links[,2],2,i])
}

Large-scale component of shape variation for all specimens.

large_shape_all <- xxyy.to.array(papionin_be_pw$Xlarge, p = k, k = 2) 
plot(large_shape_all[,1,], large_shape_all[,2,], asp = 1, las = 1, cex = 0.5, pch = 16, main = "Large-scale shape component", xlab = "X", ylab = "Y")
for (i in 1:n_spec) {
  segments(large_shape_all[all_links[,1],1,i], 
           large_shape_all[all_links[,1],2,i], 
           large_shape_all[all_links[,2],1,i], 
           large_shape_all[all_links[,2],2,i])
}

Partial warp variance as a function of bending energy for all specimens.

# Computation of log BE^-1 for the (k-3) partial warps
logInvBE <- log((papionin_be_pw$bendingEnergy)^(-1))
# Computation of log PW variance for the (k-3) partial warps
logPWvar <- log(papionin_be_pw$variancePW)
# Linear regression of the log PW variance on the log BE^-1
mod <- lm(logPWvar ~ logInvBE)
# Plot log PW variance on log BE^-1 with regression line
plot(logInvBE, logPWvar, col = "white", asp = 1, main = "PW variance against inverse BE", xlab = "log 1/BE", ylab = "log PW variance", sub = paste("slope =", round(mod$coefficients[2], 2)))
text(logInvBE, logPWvar, labels = 1:(k-3), cex = 0.5)
abline(mod, col = "blue")

Between species

Decompose the 70 Procrustes aligned landmarks, averaged for each species, into partial warps.

d <- 50  # threshold for small-scale vs. large-scale components
papionin_be_pw_between <- create.pw.be(total_shape_between, mshape(total_shape_between), d)

Small-scale component of shape variation between species.

small_shape_between <- xxyy.to.array(papionin_be_pw_between$Xsmall, p = k, k = 2) 
plot(small_shape_between[,1,], small_shape_between[,2,], asp = 1, las = 1, cex = 0.5, pch = 16, main = "Small-scale shape component", xlab = "X", ylab = "Y")
for (i in 1:n_species) {
  segments(small_shape_between[all_links[,1],1,i], 
           small_shape_between[all_links[,1],2,i], 
           small_shape_between[all_links[,2],1,i], 
           small_shape_between[all_links[,2],2,i])
}

Large-scale component of shape variation between species.

large_shape_between <- xxyy.to.array(papionin_be_pw_between$Xlarge, p = k, k = 2) 
plot(large_shape_between[,1,], large_shape_between[,2,], asp = 1, las = 1, cex = 0.5, pch = 16, main = "Large-scale shape component", xlab = "X", ylab = "Y")
for (i in 1:n_species) {
  segments(large_shape_between[all_links[,1],1,i], 
           large_shape_between[all_links[,1],2,i], 
           large_shape_between[all_links[,2],1,i], 
           large_shape_between[all_links[,2],2,i])
}

Partial warp variance as a function of bending energy between species.

# Computation of log BE^-1 for the (k-3) partial warps
logInvBE_between <- log((papionin_be_pw_between$bendingEnergy)^(-1))
# Computation of log PW variance for the (k-3) partial warps
logPWvar_between <- log(papionin_be_pw_between$variancePW)
# Linear regression of the log PW variance on the log BE^-1
mod_between <- lm(logPWvar_between ~ logInvBE_between)
# Plot log PW variance on log BE^-1 with regression line
plot(logInvBE_between, logPWvar_between, col = "white", asp = 1, main = "PW variance against inverse BE", xlab = "log 1/BE", ylab = "log PW variance", sub = paste("slope =", round(mod_between$coefficients[2], 2)))
text(logInvBE_between, logPWvar_between, labels = 1:(k-3), cex = 0.5)
abline(mod_between, col = "blue")

Within species

Decompose the 70 Procrustes aligned landmarks, pooled within species, into partial warps.

d <- 50  # threshold for small-scale vs. large-scale components
papionin_be_pw_within <- create.pw.be(total_shape_within, mshape(total_shape_within), d)

Small-scale component of shape variation within species.

small_shape_within <- xxyy.to.array(papionin_be_pw_within$Xsmall, p = k, k = 2) 
plot(small_shape_within[,1,], small_shape_within[,2,], asp = 1, las = 1, cex = 0.5, pch = 16, main = "Small-scale shape component", xlab = "X", ylab = "Y")
for (i in 1:n_spec) {
  segments(small_shape_within[all_links[,1],1,i], 
           small_shape_within[all_links[,1],2,i], 
           small_shape_within[all_links[,2],1,i], 
           small_shape_within[all_links[,2],2,i])
}

Large-scale component of shape variation within species.

large_shape_within <- xxyy.to.array(papionin_be_pw_within$Xlarge, p = k, k = 2) 
plot(large_shape_within[,1,], large_shape_within[,2,], asp = 1, las = 1, cex = 0.5, pch = 16, main = "Large-scale shape component", xlab = "X", ylab = "Y")
for (i in 1:n_spec) {
  segments(large_shape_within[all_links[,1],1,i], 
           large_shape_within[all_links[,1],2,i], 
           large_shape_within[all_links[,2],1,i], 
           large_shape_within[all_links[,2],2,i])
}

Partial warp variance as a function of bending energy within species.

# Computation of log BE^-1 for the (k-3) partial warps
logInvBE_within <- log((papionin_be_pw_within$bendingEnergy)^(-1))
# Computation of log PW variance for the (k-3) partial warps
logPWvar_within <- log(papionin_be_pw_within$variancePW)
# Linear regression of the log PW variance on the log BE^-1
mod_within <- lm(logPWvar_within ~ logInvBE_within)
# Plot log PW variance on log BE^-1 with regression line
plot(logInvBE_within, logPWvar_within, col = "white", asp = 1, main = "PW variance against inverse BE", xlab = "log 1/BE", ylab = "log PW variance", sub = paste("slope =", round(mod_within$coefficients[2], 2)))
text(logInvBE_within, logPWvar_within, labels = 1:(k-3), cex = 0.5)
abline(mod_within, col = "blue")

References

Bookstein FL (1989). Principal Warps: Thin-plate splines and the decomposition of deformations. IEEE Transactions on pattern analysis and machine intelligence 11(6): 567--585. https://ieeexplore.ieee.org/abstract/document/24792

Grunstra NDS et al. (in press). Detecting phylogenetic signal and adaptation in papionin cranial shape by decomposing variation at different spatial scales. Systematic Biology

Grunstra, NDS et al. (2020). Data form: Detecting phylogenetic signal and adaptation in papionin cranial shape by decomposing variation at different spatial scales. Dryad Digital Repository https://doi.org/10.5061/dryad.zkh189373

Mitteroecker P et al. (2020). Morphometric variation at different spatial scales: coordination and compensation in the emergence of organismal form. Systematic Biology 69(5): 913--926. https://doi.org/10.1093/sysbio/syaa007