-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathfetchPedonPC.R
117 lines (93 loc) · 4.41 KB
/
fetchPedonPC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# horizon checking may be too strict
#' Get a SoilProfileCollection from a PedonPC v.5 database
#'
#' Fetch commonly used site/horizon data from a version 5.x PedonPC database, return as a SoilProfileCollection object.
#' @aliases fetchPedonPC getHzErrorsPedonPC
#' @param dsn The path to a PedonPC version 6.x database
#' @param strict Use "strict" horizon error checking? Default: `TRUE`
#' @return a SoilProfileCollection class object
#' @note This function attempts to do most of the boilerplate work when
#' extracting site/horizon data from a PedonPC or local NASIS database. Pedons
#' that have errors in their horizonation are excluded from the returned
#' object, however, their IDs are printed on the console. See
#' \code{\link{getHzErrorsPedonPC}} for a simple approach to identifying pedons
#' with problematic horizonation. Records from the 'taxhistory' table are
#' selected based on 1) most recent record, or 2) record with the least amount
#' of missing data.
#' @author D. E. Beaudette and J. M. Skovlin
#' @seealso \code{\link{get_hz_data_from_pedon_db}}
#' @keywords manip
#' @export fetchPedonPC
fetchPedonPC <- function(dsn) {
# not in parity with NASIS functions
warning("Loading data from PedonPC will return slightly different data structures than fetchNASIS().", call. = FALSE)
# load data in pieces
site_data <- get_site_data_from_pedon_db(dsn)
hz_data <- get_hz_data_from_pedon_db(dsn)
color_data <- get_colors_from_pedon_db(dsn)
extended_data <- get_extended_data_from_pedon_db(dsn)
# join pieces
# horizon + hz color: all horizons
h <- merge(hz_data, color_data, by='phiid', all.x = TRUE, sort = FALSE)
# convert colors... in the presence of missing color data
if(nrow(h) > 0) {
h$soil_color <- NA
idx <- complete.cases(h$m_r)
h$soil_color[idx] <- with(h[idx, ], rgb(m_r, m_g, m_b)) # moist colors
}
# replace horizons with hz + fragment summary
h <- merge(h, extended_data$frag_summary, by='phiid', all.x = TRUE, sort = FALSE)
# fix some common problems
# replace missing lower boundaries
message('replacing missing lower horizon boundaries ...')
missing.lower.depth.idx <- which(!is.na(h$hzdept) & is.na(h$hzdepb))
h$hzdepb[missing.lower.depth.idx] <- h$hzdept[missing.lower.depth.idx] + 1
# test for bad horizonation... flag, and remove
cat('finding horizonation errors ...\n')
h.test <- aqp::checkHzDepthLogic(h, c('hzdept', 'hzdepb'), idname = 'peiid', fast = TRUE)
# which are the good (valid) ones?
good.pedon.ids <- as.character(h.test$peiid[which(h.test$valid)])
bad.pedon.ids <- as.character(h.test$pedon_id[which(!h.test$valid)])
# keep the good ones
h <- h[which(h$peiid %in% good.pedon.ids), ]
if (nrow(h) == 0)
stop("no horizon data in PedonPC database", call. = FALSE)
# upgrade to SoilProfilecollection
depths(h) <- peiid ~ hzdept + hzdepb
## TODO: this is slow
# move pedon_id into @site, this will be used to join full table of site data
site(h) <- ~ pedon_id
## TODO: this will fail in the presence of duplicates
# add site data to object
site_data$pedon_id <- NULL # remove 'pedon_id' column from site_data
site(h) <- site_data # left-join via peiid
# load diagnostic horizons into @diagnostic
diagnostic_hz(h) <- extended_data$diagnostic
# add diagnostic boolean data into @site
site(h) <- extended_data$diagHzBoolean
### TODO: consider moving this into the extended data function ###
# load best-guess optimal records from taxhistory
# method is added to the new field called 'selection_method'
.BY <- NULL
.SD <- NULL
# .pickBestTaxHistory <- soilDB:::.pickBestTaxHistory
# .pickBestEcosite <- soilDB:::.pickBestEcosite
ed.tax <- data.table::as.data.table(extended_data$taxhistory)
best.tax.data <- ed.tax[, .pickBestTaxHistory(.SD),
by = list(peiid = ed.tax$peiid)]
site(h) <- as.data.frame(best.tax.data)
# join-in landform string
ed.lf <- data.table::as.data.table(extended_data$geomorph)
lf <- ed.lf[, .formatLandformString(.SD, uid = .BY$peiid, name.sep = '|'),
by = list(peiid = ed.lf$peiid)]
if (ncol(lf) > 1)
site(h) <- as.data.frame(lf[,c("peiid","landform_string")])
# set PedonPC/NASIS-specific horizon identifier
hzidname(h) <- 'phiid'
# 7. save and mention bad pedons
assign('bad.pedon.ids', value=bad.pedon.ids, envir=get_soilDB_env())
if(length(bad.pedon.ids) > 0)
message("horizon errors detected, use `get('bad.pedon.ids', envir=get_soilDB_env())` for a list of pedon IDs")
# done
return(h)
}