library(dplyr)
library(magrittr)
library(purrr)
library(nlaR)
dt <- nla_load(2012)
tp <- select(dt$waterchem_wide, PTL_RESULT, UID) %>%
left_join(select(dt$wide_siteinfo, AREA_HA, SITE_ID, UID)) %>%
group_by(SITE_ID) %>%
summarize(tp = median(PTL_RESULT), area = median(AREA_HA) / 100)
## Joining, by = "UID"
chl <- select(dt$chla_wide, CHLX_RESULT, UID) %>%
left_join(select(dt$wide_siteinfo, SITE_ID, UID)) %>%
group_by(SITE_ID) %>%
summarize(chl = median(CHLX_RESULT))
## Joining, by = "UID"
secchi <- select(dt$secchi, SECCHI, SITE_ID) %>%
group_by(SITE_ID) %>%
summarize(secchi = median(SECCHI))
res <- reduce(list(tp, chl, secchi), left_join)
## Joining, by = "SITE_ID"
## Joining, by = "SITE_ID"
res <- left_join(res, select(dt$wide_phab, depth = DEPTH_AT_STATION, SITE_ID))
## Joining, by = "SITE_ID"
skimr::skim(res)
## Skim summary statistics
## n obs: 20838
## n variables: 6
##
## Variable type: character
## variable missing complete n min max empty n_unique
## SITE_ID 0 20838 20838 12 13 0 1130
##
## Variable type: numeric
## variable missing complete n mean sd p0 p25 p50 p75
## area 0 20838 20838 4.62 39.54 0.01 0.11 0.3 1.05
## chl 90 20748 20838 26.75 53.16 0 3.03 8 26.8
## depth 9051 11787 20838 1.6 1.71 0 0.7 1.1 1.9
## secchi 1238 19600 20838 2.08 2.25 0.019 0.64 1.39 2.83
## tp 0 20838 20838 120.94 286.04 4 21 41 100
## p100 hist
## 1674.9 ▇▁▁▁▁▁▁▁
## 764.64 ▇▁▁▁▁▁▁▁
## 34.1 ▇▁▁▁▁▁▁▁
## 28 ▇▂▁▁▁▁▁▁
## 3636 ▇▁▁▁▁▁▁▁