babette: Step by Step

Richèl J.C. Bilderbeek

2022-08-12

Introduction

This step-by-step demo shows how to run the babette pipeline in detail.

First, load babette:

library(babette)

In all cases, this is done for a short MCMC chain length of 10K:

inference_model <- create_inference_model()
inference_model$mcmc$chain_length <- 10000
inference_model$mcmc$tracelog$filename <- normalizePath(
  get_beautier_tempfilename(
    pattern = "tracelog_", fileext = ".log"
  ),
  mustWork = FALSE
)
inference_model$mcmc$treelog$filename <- normalizePath(
  get_beautier_tempfilename(
    pattern = "treelog_",
    fileext = ".trees"
  ),
  mustWork = FALSE
)

Create a ‘BEAST2’ input file

This step is commonly done using BEAUti. With babette, this can be done as follows:

beast2_input_file <- tempfile(pattern = "beast2_", fileext = ".xml")
create_beast2_input_file_from_model(
  input_filename = get_babette_path("anthus_aco.fas"),
  inference_model = inference_model,
  output_filename = beast2_input_file
)

Display (part of) the ‘BEAST2’ input file

print(head(readLines(beast2_input_file)))
#> [1] "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><beast beautitemplate='Standard' beautistatus='' namespace=\"beast.core:beast.evolution.alignment:beast.evolution.tree.coalescent:beast.core.util:beast.evolution.nuc:beast.evolution.operators:beast.evolution.sitemodel:beast.evolution.substitutionmodel:beast.evolution.likelihood\" required=\"\" version=\"2.4\">"
#> [2] ""                                                                                                                                                                                                                                                                                                                                                                                   
#> [3] ""                                                                                                                                                                                                                                                                                                                                                                                   
#> [4] "    <data"                                                                                                                                                                                                                                                                                                                                                                          
#> [5] "id=\"anthus_aco\""                                                                                                                                                                                                                                                                                                                                                                  
#> [6] "name=\"alignment\">"

This file can both be loaded by BEAUti and be used by ‘BEAST2’.

The file can be checked if it is indeed a valid input file:

if (is_beast2_installed()) {
  is_beast2_input_file(beast2_input_file)
}
#> [1] TRUE

Run MCMC

This step is commonly done using ‘BEAST2’ from the command-line or using its GUI. With babette, this can be done as follows:

if (is_beast2_installed()) {
  beast2_options <- create_beast2_options(
    input_filename = beast2_input_file
  )
  beastier::check_can_create_file(beast2_options$output_state_filename)
  beastier::check_can_create_treelog_file(beast2_options)
  run_beast2_from_options(
    beast2_options = beast2_options
  )
  testthat::expect_true(file.exists(beast2_options$output_state_filename))
}

Display (part of) the ‘BEAST2’ output files

The .log file contains the model parameters and parameter estimates:

if (is_beast2_installed()) {
  print(head(readLines(inference_model$mcmc$tracelog$filename)))
  print(tail(readLines(inference_model$mcmc$tracelog$filename)))
}
#> [1] "#"                                                                                                                                  
#> [2] "#model:"                                                                                                                            
#> [3] "#"                                                                                                                                  
#> [4] "#<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><input id=\"posterior\" spec=\"beast.core.util.CompoundDistribution\">"
#> [5] "#    <distribution id=\"prior\" spec=\"beast.core.util.CompoundDistribution\">"                                                     
#> [6] "#        <distribution id=\"YuleModel.t:anthus_aco\" spec=\"beast.evolution.speciation.YuleModel\">"                                
#> [1] "5000\t-1777.289360939297\t-1871.337648229884\t94.048287290587\t-1871.337648229884\t0.015287096987004855\t94.048287290587\t299.98252714435995"       
#> [2] "6000\t-1759.260467381365\t-1856.1460326781632\t96.88556529679812\t-1856.1460326781632\t0.013513438766792315\t96.88556529679812\t360.468983684849"   
#> [3] "7000\t-1758.625197648984\t-1857.946408966308\t99.32121131732369\t-1857.946408966308\t0.011287724401933026\t99.32121131732369\t248.76268180481668"   
#> [4] "8000\t-1766.8963828502401\t-1857.4077824667643\t90.51139961652405\t-1857.4077824667643\t0.014801789546695511\t90.51139961652405\t250.51165893178478"
#> [5] "9000\t-1760.1229850965979\t-1854.6695109377547\t94.54652584115674\t-1854.6695109377547\t0.012535468411928272\t94.54652584115674\t252.0177808118283" 
#> [6] "10000\t-1765.8228214042595\t-1862.1708548409642\t96.34803343670482\t-1862.1708548409642\t0.01217436432253529\t96.34803343670482\t287.4042804280882"

The .trees file contains the alignment, taxa and posterior trees:

if (is_beast2_installed()) {
  print(head(readLines(inference_model$mcmc$treelog$filename)))
  print(tail(readLines(inference_model$mcmc$treelog$filename)))
}
#> Warning in readLines(inference_model$mcmc$treelog$filename): incomplete final
#> line found on '/home/richel/.cache/beautier/treelog_3dfaf2836be40.trees'
#> [1] "#NEXUS"                ""                      "Begin taxa;"          
#> [4] "\tDimensions ntax=22;" "\t\tTaxlabels"         "\t\t\t61430_aco"
#> Warning in readLines(inference_model$mcmc$treelog$filename): incomplete final
#> line found on '/home/richel/.cache/beautier/treelog_3dfaf2836be40.trees'
#> [1] "tree STATE_6000 = ((((12:0.0014611070447328916,18:0.0014611070447328916):0.00467634516865566,(19:0.005072273327261571,((2:0.0018006366328986564,7:0.0018006366328986564):0.0015053128488877598,10:0.003305949481786416):0.0017663238454751545):0.0010651788861269808):0.001197293096307177,(6:5.582317107554632E-4,20:5.582317107554632E-4):0.006776513598940265):0.006178693457096586,(((((((1:1.6190092190516026E-4,5:1.6190092190516026E-4):1.8507791390075273E-4,((3:1.0511575065052365E-5,17:1.0511575065052365E-5):1.0912223358669175E-4,22:1.1963380865174412E-4):2.273450271541689E-4):4.0740351642374136E-4,11:7.543823522296544E-4):7.67262373931031E-4,21:0.0015216447261606853):0.0019518938768610635,9:0.003473538603021749):0.0013650047035681243,(((8:6.91298573985358E-4,13:6.91298573985358E-4):3.8426981200404724E-4,16:0.0010755683859894052):0.0023954418316506913,(15:1.9543687594530685E-4,4:1.9543687594530685E-4):0.0032755733416947896):0.0013675330889497766):1.2502589388370747E-5,14:0.004851045895978244):0.008662392870814072):0.0;"
#> [2] "tree STATE_7000 = ((((((19:4.4993918983386646E-4,18:4.4993918983386646E-4):6.195155461618707E-4,20:0.001069454735995737):0.0017412720335493806,12:0.0028107267695451177):0.0015081430883031421,((2:0.001563560153609172,7:0.001563560153609172):4.491707723194465E-4,10:0.0020127309259286184):0.0023061389319196414):0.0013176553579948794,6:0.005636525215843139):0.0056511991860898865,((9:0.004285079481436113,(((8:5.035914597180151E-4,13:5.035914597180151E-4):6.159780639087498E-4,16:0.001119569523626765):0.002182384740543929,(15:1.85019759934382E-4,4:1.85019759934382E-4):0.0031169345042363116):9.831252172654193E-4):1.474232968032103E-4,(((22:0.001263571527988838,14:0.001263571527988838):0.002869897638412216,21:0.004133469166401054):1.302892515088177E-4,(((3:1.8460068829044391E-4,17:1.8460068829044391E-4):5.012057526447496E-5,(1:1.1360757645751779E-4,5:1.1360757645751779E-4):1.2111368709740109E-4):0.0017364574823811776,11:0.0019711787459360964):0.0022925796719737756):1.6874436032945136E-4):0.0068552216236937025):0.0;"    
#> [3] "tree STATE_8000 = ((((((2:0.0020147811992749683,7:0.0020147811992749683):3.6106478972108055E-4,10:0.002375845988996049):0.0026008960270122684,6:0.004976742016008317):7.102536513573761E-4,(12:0.00203822631610251,18:0.00203822631610251):0.0036487693512631835):5.446374128219083E-4,(19:0.004376743672346278,20:0.004376743672346278):0.0018548894078413234):0.00857015646650791,((((21:0.003383536091227346,((((5:3.872219275640954E-4,1:3.872219275640954E-4):1.9094790619553356E-4,3:5.78169833759629E-4):3.095163926266671E-4,17:8.87686226386296E-4):4.0772026359232503E-4,11:0.001295406489978621):0.0020881296012487246):0.0037904318294529533,14:0.007173967920680299):1.2168134058662336E-4,(((8:3.2100233002511604E-4,13:3.2100233002511604E-4):0.002329796677164054,16:0.00265079900718917):0.0029295173646250875,(15:2.9473198528593255E-4,4:2.9473198528593255E-4):0.005285584386528325):0.0017153328894526651):1.6349540462757585E-5,(9:0.00725564190034091,22:0.00725564190034091):5.635690138877051E-5):0.007489790744965831):0.0;"            
#> [4] "tree STATE_9000 = (((((((17:4.532611922924326E-4,1:4.532611922924326E-4):2.0108952460885804E-4,(5:4.086913244267104E-4,3:4.086913244267104E-4):2.4565939247458025E-4):6.245621194765042E-4,22:0.0012789128363777948):5.434545218497349E-4,11:0.0018223673582275298):0.004469309066399561,((19:0.005888631899952468,(21:0.001308959036283853,9:0.001308959036283853):0.004579672863668615):1.9515090892518235E-4,14:0.00608378280887765):2.0789361574943994E-4):7.157924548329376E-4,((20:0.0018015026518208193,(15:1.761261689209434E-4,4:1.761261689209434E-4):0.001625376482899876):0.0026786413478679105,(16:8.179622123290174E-4,(8:3.1026570547953877E-4,13:3.1026570547953877E-4):5.076965068494786E-4):0.0036621817873597126):0.0025273248797712975):0.005527999532468244,((((2:0.00213584563118325,7:0.00213584563118325):8.465999229327072E-4,10:0.0029824455541159573):0.004199149075184159,6:0.007181594629300117):1.2278015115313717E-4,(12:0.002141670095096065,18:0.002141670095096065):0.005162704685357188):0.005231093631475018):0.0;"           
#> [5] "tree STATE_10000 = (((((12:1.6011346322821626E-4,18:1.6011346322821626E-4):0.005386615863220685,(10:0.0019638543745241735,(2:0.0012467574182748991,7:0.0012467574182748991):7.170969562492744E-4):0.0035828749519247276):4.133549308251525E-4,6:0.005960084257274054):0.002573050980354338,21:0.008533135237628392):0.0036412290849068985,(((9:0.00483535947789837,14:0.00483535947789837):5.242881942012006E-4,(11:0.0025554191226571953,((19:0.0012021521889915268,((17:1.8623512971811195E-5,1:1.8623512971811195E-5):4.662526839969658E-5,3:6.524878137150778E-5):0.001136903407620019):1.7153728170087447E-4,(22:8.344847787275512E-4,5:8.344847787275512E-4):5.3920469196485E-4):0.001181729651964794):0.0028042285494423755):4.3797523161992653E-4,(((4:7.479471471440021E-5,15:7.479471471440021E-5):0.002595789689587407,20:0.002670584404301807):0.0011920440720867525,(16:0.0014271694885671477,(13:5.56763848549922E-4,8:5.56763848549922E-4):8.704056400172257E-4):0.002435458987821412):0.0019349944273309378):0.006376741418815793):0.0;"          
#> [6] "End;"

The .xml.state file contains the final state of the MCMC run and the MCMC operator acceptances thus far:

if (is_beast2_installed()) {
  print(head(readLines(beast2_options$output_state_filename)))
  print(tail(readLines(beast2_options$output_state_filename)))
}
#> [1] "<itsabeastystatewerein version='2.0' sample='10000'>"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
#> [2] "<statenode id='Tree.t:anthus_aco'>(((((11:1.6011346322821626E-4,17:1.6011346322821626E-4)40:0.005386615863220685,(9:0.0019638543745241735,(1:0.0012467574182748991,6:0.0012467574182748991)31:7.170969562492744E-4)29:0.0035828749519247276)25:4.133549308251525E-4,5:0.005960084257274054)41:0.002573050980354338,20:0.008533135237628392)30:0.0036412290849068985,(((8:0.00483535947789837,13:0.00483535947789837)33:5.242881942012006E-4,(10:0.0025554191226571953,((18:0.0012021521889915268,((16:1.8623512971811195E-5,0:1.8623512971811195E-5)39:4.662526839969658E-5,2:6.524878137150778E-5)37:0.001136903407620019)26:1.7153728170087447E-4,(21:8.344847787275512E-4,4:8.344847787275512E-4)24:5.3920469196485E-4)28:0.001181729651964794)34:0.0028042285494423755)22:4.3797523161992653E-4,(((3:7.479471471440021E-5,14:7.479471471440021E-5)35:0.002595789689587407,19:0.002670584404301807)36:0.0011920440720867525,(15:0.0014271694885671477,(12:5.56763848549922E-4,7:5.56763848549922E-4)23:8.704056400172257E-4)27:0.002435458987821412)38:0.0019349944273309378)32:0.006376741418815793)42:0.0</statenode>"
#> [3] "<statenode id='birthRate.t:anthus_aco'>birthRate.t:anthus_aco[1 1] (-Infinity,Infinity): 287.4042804280882 </statenode>"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   
#> [4] "</itsabeastystatewerein>"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
#> [5] "<!--"                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      
#> [6] "{\"operators\":["                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          
#> [1] "{\"id\":\"YuleModelSubtreeSlide.t:anthus_aco\",\"p\":1,\"accept\":4,\"reject\":2034,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":1022,\"rejectOp\":1022},"  
#> [2] "{\"id\":\"YuleModelNarrow.t:anthus_aco\",\"p\":\"NaN\",\"accept\":860,\"reject\":1080,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":0,\"rejectOp\":0},"      
#> [3] "{\"id\":\"YuleModelWide.t:anthus_aco\",\"p\":\"NaN\",\"accept\":15,\"reject\":357,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":188,\"rejectOp\":188},"      
#> [4] "{\"id\":\"YuleModelWilsonBalding.t:anthus_aco\",\"p\":\"NaN\",\"accept\":29,\"reject\":355,\"acceptFC\":0,\"rejectFC\":0,\"rejectIv\":60,\"rejectOp\":60}"
#> [5] "]}"                                                                                                                                                       
#> [6] "-->"

Parse output

This step is commonly done using Tracer. With babette, this can be done as follows.

Parsing .log file to obtain the parameter estimates:

if (is_beast2_installed()) {
  knitr::kable(
    head(parse_beast_tracelog_file(inference_model$mcmc$tracelog$filename))
  )
}
Sample posterior likelihood prior treeLikelihood TreeHeight YuleModel birthRate
0 -6553.355 -6547.606 -5.748615 -6547.606 1.1956036 -5.748615 1.00000
1000 -1952.488 -2015.620 63.132001 -2015.620 0.0576393 63.132001 28.37087
2000 -1770.872 -1864.843 93.971001 -1864.843 0.0144106 93.971001 246.95951
3000 -1762.877 -1862.221 99.344279 -1862.221 0.0108172 99.344279 446.87514
4000 -1769.121 -1862.042 92.921038 -1862.042 0.0134218 92.921038 209.07644
5000 -1777.289 -1871.338 94.048287 -1871.338 0.0152871 94.048287 299.98253

Parsing .trees file to obtain the posterior phylogenies:

if (is_beast2_installed()) {
  plot_densitree(parse_beast_trees(inference_model$mcmc$treelog$filename))
}

Parsing .xml.state file to obtain the MCMC operator acceptances:

if (is_beast2_installed()) {
  knitr::kable(
    head(parse_beast_state_operators(beast2_options$output_state_filename))
  )
}
operator p accept reject acceptFC rejectFC rejectIv rejectOp
YuleBirthRateScaler.t 0.75 309 128 0 0 0 0
YuleModelTreeScaler.t 0.50 106 330 0 0 0 0
YuleModelTreeRootScaler.t 0.50 98 322 0 0 27 27
YuleModelUniformOperator.t NaN 2327 1647 0 0 0 0
YuleModelSubtreeSlide.t 1.00 4 2034 0 0 1022 1022
YuleModelNarrow.t NaN 860 1080 0 0 0 0