-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Use data-raw folder for building datasets from scratch
- Loading branch information
Showing
30 changed files
with
3,257 additions
and
217 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,4 @@ README.Rmd | |
^_pkgdown\.yml$ | ||
^pkgdown$ | ||
^CRAN-SUBMISSION$ | ||
^data-raw$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# make SPC data sets from NASIS | ||
library(aqp) | ||
library(soilDB) | ||
|
||
# load current data sets to fetch peiid | ||
data("loafercreek", package = "soilDB") | ||
data("gopheridge", package = "soilDB") | ||
data("mineralKing", package = "soilDB") | ||
|
||
# # create CSVs (requires NASIS setup) | ||
# # query CA630 and CA792 w/ R08 PEDON/SITE by SSA ID or similar | ||
# # load source data sets (CA630 and CA792 pedons) | ||
# nasis_pedons <- fetchNASIS(rmHzErrors = FALSE, SS = FALSE) | ||
# | ||
# p <- rebuildSPC(subset(nasis_pedons, siteiid %in% as.double(c(loafercreek, gopheridge, mineralKing)$siteiid))) | ||
# write.csv(horizons(p), "data-raw/spc-horizons.csv", row.names = FALSE) | ||
# write.csv(site(p), "data-raw/spc-site.csv", row.names = FALSE) | ||
# write.csv(diagnostic_hz(p), "data-raw/spc-diagnostic_hz.csv", row.names = FALSE) | ||
# write.csv(restrictions(p), "data-raw/spc-restrictions.csv", row.names = FALSE) | ||
|
||
recent1822a <- read.csv("data-raw/spc-horizons.csv") | ||
depths(recent1822a) <- peiid ~ hzdept + hzdepb | ||
site(recent1822a) <- read.csv("data-raw/spc-site.csv") | ||
diagnostic_hz(recent1822a) <- read.csv("data-raw/spc-diagnostic_hz.csv") | ||
restrictions(recent1822a) <- read.csv("data-raw/spc-restrictions.csv") | ||
|
||
# ensure that phiid is set as hzID | ||
hzidname(recent1822a) <- "phiid" | ||
hzdesgnname(recent1822a) <- "hzname" | ||
hztexclname(recent1822a) <- "texcl" | ||
|
||
# subset | ||
loafercreek2 <- rebuildSPC(subset(recent1822a, profile_id(recent1822a) %in% profile_id(loafercreek))) | ||
gopheridge2 <- rebuildSPC(subset(recent1822a, profile_id(recent1822a) %in% profile_id(gopheridge))) | ||
mineralKing2 <- rebuildSPC(subset(recent1822a, profile_id(recent1822a) %in% profile_id(mineralKing))) | ||
|
||
# ensure that phiid is set as hzID | ||
hzidname(loafercreek2) <- "phiid" | ||
hzidname(gopheridge2) <- "phiid" | ||
hzidname(mineralKing2) <- "phiid" | ||
|
||
# verify completeness | ||
if (all(profile_id(loafercreek) %in% profile_id(loafercreek2))) | ||
loafercreek <- loafercreek2 | ||
|
||
if (all(profile_id(gopheridge) %in% profile_id(gopheridge2))) | ||
gopheridge <- gopheridge2 | ||
|
||
if (all(profile_id(mineralKing) %in% profile_id(mineralKing2))) | ||
mineralKing <- mineralKing2 | ||
|
||
# save to .rda | ||
usethis::use_data(loafercreek, overwrite = TRUE, compress = 'xz') | ||
usethis::use_data(gopheridge, overwrite = TRUE, compress = 'xz') | ||
usethis::use_data(mineralKing, overwrite = TRUE, compress = 'xz') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,5 @@ | ||
library(soilDB) | ||
|
||
## code to prepare `metadata` dataset goes here | ||
# make data/metadata.rda (used by uncode() when NASIS not available) | ||
metadata <- soilDB:::.get_NASIS_metadata() | ||
save(metadata, file = "data/metadata.rda") | ||
|
||
head(metadata) | ||
metadata <- soilDB:::.get_NASIS_metadata() | ||
usethis::use_data(metadata, overwrite = TRUE, compress = 'xz') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
## code to prepare `SCAN_SNOTEL_metadata` dataset goes here | ||
SCAN_SNOTEL_metadata <- read.csv("data-raw/station-metadata.csv", row.names = FALSE) | ||
|
||
usethis::use_data(SCAN_SNOTEL_metadata, overwrite = TRUE, compress = 'xz') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# library(soilDB) | ||
# x <- SDA_query("SELECT DISTINCT attributekey FROM sdvattribute") | ||
# res <- get_SDV_legend_elements(paste0("attributekey = ", x$attributekey)) | ||
# resall <- data.table::rbindlist(res, fill = TRUE) | ||
# sdvmaplegend <- resall | ||
# | ||
# # including attributedescription makes rda 10x bigger | ||
# sdvmaplegend$attributedescription <- NULL | ||
# save(sdvattribute, file="misc/sdvmaplegend.rda") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
# source data for this and output can be found in scan-snotel-data folder. | ||
# See SCAN_SNOTEL_metadata.R for current build routine which is abbreviated/derived from this | ||
|
||
# library(soilDB) | ||
# library(rvest) | ||
# library(xml) | ||
# library(plyr) | ||
# | ||
# # https://github.com/ncss-tech/soilDB/issues/61 | ||
# | ||
# # attempt to cross-reference a lab ID via pedon ID | ||
# # using a LIMS report and HTML scraping | ||
# # about 5 seconds per request | ||
# getLabPedon <- function(pedonID) { | ||
# url <- sprintf('https://nasis.sc.egov.usda.gov/NasisReportsWebSite/limsreport.aspx?report_name=Pedon+Description+html+(userpedid)&pedon_id=%s', pedonID) | ||
# | ||
# rpt <- read_html(url) | ||
# n <- html_node(rpt, xpath = "//*/table/tr[10]/td[1]/*") | ||
# n <- xml_text(n) | ||
# lab.id <- gsub(' ', '', strsplit(n, ':')[[1]][2]) | ||
# | ||
# return(lab.id) | ||
# } | ||
# | ||
# getLabPedon <- Vectorize(getLabPedon) | ||
# | ||
# | ||
# | ||
# ## | ||
# ## station list / site information | ||
# ## | ||
# | ||
# # 2021-02-25 DEB update site data from www map | ||
# | ||
# # get these data from SCAN/SNOTEL www map, zoom all the way out and then click on export to CSV | ||
# # there are some trash data in here, trailing tabs | ||
# x <- read.csv('scan-snotel-data/scan-snotel-site-data.csv', stringsAsFactors = FALSE, colClasses = 'character') | ||
# | ||
# # fix formatting | ||
# x$Name <- trimws(x$Name) | ||
# x$ID <- as.numeric(trimws(x$ID)) | ||
# x$State <- trimws(x$State) | ||
# x$Network <- trimws(x$Network) | ||
# x$County <- trimws(x$County) | ||
# x$Elevation_ft <- as.numeric(trimws(x$Elevation_ft)) | ||
# x$Latitude <- as.numeric(trimws(x$Latitude)) | ||
# x$Longitude <- as.numeric(trimws(x$Longitude)) | ||
# x$HUC <- trimws(x$HUC) | ||
# | ||
# # re-name ID | ||
# names(x)[2] <- 'Site' | ||
# | ||
# # check: OK | ||
# nrow(x) | ||
# str(x) | ||
# | ||
# | ||
# ## | ||
# ## pedon / lab IDs | ||
# ## | ||
# | ||
# ## SCAN / SNOTEL sites from western US | ||
# # most of these files are maintained by regional staff | ||
# # naming convention from NASIS site table | ||
# p.west <- read.csv('scan-snotel-data/Utah_DCO_Soil_Lab_Data.csv', stringsAsFactors = FALSE) | ||
# | ||
# # whats in here: | ||
# # many sites from several states! | ||
# str(p.west) | ||
# table(p.west$state) | ||
# | ||
# # keep subset of columns | ||
# p.west <- p.west[, c('climstaid', 'climstanm', 'upedonid', 'pedlabsampnum')] | ||
# | ||
# # re-name ID | ||
# names(p.west)[1] <- 'Site' | ||
# | ||
# # re-name for mixing | ||
# names(p.west)[-1] <- paste0(names(p.west)[-1], '-WEST') | ||
# | ||
# # check: ok | ||
# str(p.west) | ||
# | ||
# | ||
# | ||
# ## SCAN data via Steve Campbell / soil climate center | ||
# # missing lab IDs | ||
# # missing SNOTEL sites | ||
# p.scan <- read.csv('scan-snotel-data/SCAN_Pedon_Master.csv', stringsAsFactors = FALSE) | ||
# | ||
# str(p.scan) | ||
# table(p.scan$State) | ||
# | ||
# # re-name to match other metadata | ||
# names(p.scan) <- c('Site', 'climstanm', 'state', 'upedonid') | ||
# | ||
# # look-up lab ID via LIMS report | ||
# # takes a couple of minutes | ||
# # some pedon IDs won't map to a lab ID (not linked in NASIS) | ||
# p.scan$pedlabsampnum <- getLabPedon(p.scan$upedonid) | ||
# | ||
# # replace missing values with NA | ||
# p.scan$pedlabsampnum[which(p.scan$pedlabsampnum == '')] <- NA | ||
# | ||
# # re-name and subset columns | ||
# p.scan <- p.scan[, c('Site', 'climstanm', 'upedonid', 'pedlabsampnum')] | ||
# names(p.scan)[-1] <- paste0(names(p.scan)[-1], '-SCAN') | ||
# | ||
# | ||
# ## | ||
# ## merge metadata from various sources, filling in the missing values with best available data | ||
# ## | ||
# | ||
# # unique set of site IDs | ||
# m <- data.frame(Site=unique(c(p.west$Site, p.scan$Site)), stringsAsFactors = FALSE) | ||
# | ||
# # western data | ||
# m <- join(m, p.west, by='Site', type='left') | ||
# | ||
# # SCAN master list | ||
# m <- join(m, p.scan, by='Site', type='left') | ||
# | ||
# # new columns for best-available | ||
# m$climstanm <- NA | ||
# m$upedonid <- NA | ||
# m$pedlabsampnum <- NA | ||
# | ||
# ### TODO: double check logic | ||
# # select best available | ||
# m$climstanm <- ifelse(! is.na(m$`climstanm-WEST`), m$`climstanm-WEST`, m$`climstanm-SCAN`) | ||
# m$upedonid <- ifelse(! is.na(m$`upedonid-WEST`), m$`upedonid-WEST`, m$`upedonid-SCAN`) | ||
# m$pedlabsampnum <- ifelse(! is.na(m$`pedlabsampnum-WEST`), m$`pedlabsampnum-WEST`, m$`pedlabsampnum-SCAN`) | ||
# | ||
# | ||
# ## | ||
# ## combine site metadata and pedon links | ||
# ## | ||
# | ||
# SCAN_SNOTEL_metadata <- join(x, m[, c('Site', 'climstanm', 'upedonid', 'pedlabsampnum')], by='Site', type='left') | ||
# | ||
# # check for possible errors via station name comparison | ||
# idx <- which( ! SCAN_SNOTEL_metadata$Name == SCAN_SNOTEL_metadata$climstanm ) | ||
# SCAN_SNOTEL_metadata[idx, c('Site', 'Name', 'climstanm')] | ||
# | ||
# # hmm... mostly abbreviations and spelling | ||
# | ||
# # save as R data file | ||
# save(SCAN_SNOTEL_metadata, file='../data/SCAN_SNOTEL_metadata.rda') | ||
# | ||
# | ||
# | ||
# | ||
# |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Oops, something went wrong.