Skip to content

Commit

Permalink
Merge pull request #74 from bcgov/mdl
Browse files Browse the repository at this point in the history
Functionality for dealing with minimum detection limits
  • Loading branch information
ateucher authored May 30, 2017
2 parents ccaf700 + f44ee5f commit 4d18be2
Show file tree
Hide file tree
Showing 8 changed files with 218 additions and 8 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: wqbc
Type: Package
Title: Water Quality Thresholds and Index Calculation for British Columbia
Version: 0.2.1
Version: 0.3.0
Date: 2016-10-21
Authors@R: c(person("Joe", "Thorley", email = "joe@poissonconsulting.ca",
role = c("aut", "ctr"),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export(plot_map)
export(plot_map_wqis)
export(plot_timeseries)
export(plot_wqis)
export(set_non_detects)
export(standardize_wqdata)
export(substitute_units)
export(substitute_variables)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# wqbc 0.3.0

- Added function `set_non_detects()` to flexibily deal with values below the
detection limit (#73)
- Added flexibility in dealing with non-detects to `tidy_ems_data()` and `tidy_ec_data()` (#73)

# wqbc 0.2.1

- Fixed a bug where `substitute_variables()` matched too many variables (#72)
Expand Down
81 changes: 76 additions & 5 deletions R/tidy-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@
#'
#' @param x The rems data to tidy.
#' @param cols additional columns from the EMS data to retain.
#' @param mdl_action What to do with results that are below the detection limit.
#' Can be set to \code{zero} (the default), set at the detection limit (\code{mdl}),
#' or set to half the detection limit (\code{half}).
#'
#' @return A tibble of the tidied rems data.
#' @export
tidy_ems_data <- function(x, cols = character(0)) {
tidy_ems_data <- function(x, cols = character(0), mdl_action = "zero") {
check_cols(x, c("EMS_ID", "MONITORING_LOCATION", "COLLECTION_START", "PARAMETER_CODE",
"RESULT", "UNIT", "METHOD_DETECTION_LIMIT", "PARAMETER", "RESULT_LETTER", cols))

Expand All @@ -27,7 +31,9 @@ tidy_ems_data <- function(x, cols = character(0)) {
DetectionLimit = ~METHOD_DETECTION_LIMIT,
ResultLetter = ~RESULT_LETTER)

x$Value[!is.na(x$Value) & !is.na(x$ResultLetter) & x$ResultLetter == "<"] <- 0
x$Value = set_non_detects(value = x$Value,
mdl_flag = x$ResultLetter,
mdl_action = mdl_action)

x
}
Expand All @@ -38,9 +44,12 @@ tidy_ems_data <- function(x, cols = character(0)) {
#' It retains and renames required columns and sets the timezone to PST.
#'
#' @param x The rems data to tidy.
#' @param mdl_action What to do with results that are below the detection limit.
#' Can be set to \code{zero} (the default), set at the detection limit (\code{mdl}),
#' or set to half the detection limit (\code{half}).
#' @return A tibble of the tidied rems data.
#' @export
tidy_ec_data <- function(x) {
tidy_ec_data <- function(x, mdl_action = "zero") {
check_cols(x, c("SITE_NO", "DATE_TIME_HEURE", "VALUE_VALEUR", "SDL_LDE", "VMV_CODE", "VARIABLE"))

unit <- dplyr::select_(x, ~starts_with("UNIT_UNIT"))
Expand All @@ -58,9 +67,71 @@ tidy_ec_data <- function(x) {

x %<>% dplyr::select_(~dplyr::everything(), ~DetectionLimit)

x$Value[!is.na(x$Value) & !is.na(x$DetectionLimit) & x$DetectionLimit > 0 & x$Value <= x$DetectionLimit] <- 0

x %<>% dplyr::mutate_(DateTime = ~lubridate::dmy_hm(DateTime, tz = "Etc/GMT+8"))

x$Value = set_non_detects(value = x$Value,
mdl_value = x$DetectionLimit,
mdl_action = mdl_action)

x
}

#' Set value for 'non-detects'
#'
#' Set a value where the actual value of a measurement
#' is less than the method detection limit (MDL)
#'
#' @param value a numeric vector of measured values
#' @param mdl_flag a character vector the same length as \code{value} that has a "flag" (assumed to be \code{"<"}) for values that are below the MDL
#' @param mdl_value a numeric vector the same length as \code{value} that contains the MDL values.
#' @param mdl_action What to do with values below the detection limit. Options are
#' \code{"zero"} (set the value to \code{0}), \code{"half"} (set the value to half the MDL), or \code{"mdl"} (set the value to equal to the MDL).
#'
#' @details You must only supply either \code{mdl_flag} or \code{mdl_value}.
#' When \code{mdl_flag} is supplied, it is assumed that the original \code{value} has
#' been set to the MDL.
#'
#' @return a numeric vector the same length as value with non-detects adjusted accordingly
#' @export
set_non_detects <- function(value, mdl_flag = NULL, mdl_value = NULL, mdl_action = c("zero", "half", "mdl")) {

mdl_action <- match.arg(mdl_action)

if (!is.null(mdl_flag)) {

if (!is.null(mdl_value)) stop ("You must supply only one of mdl_flag or mdl_value")
if (length(value) != length(mdl_flag)) {
stop("value and mdl_flag must be the same length")
}

replacements <- !is.na(value) & !is.na(mdl_flag) & mdl_flag == "<"

} else if (!is.null(mdl_value)) {

if (length(value) != length(mdl_value)) {
stop("value and mdl_value must be the same length")
}

replacements <- !is.na(value) & !is.na(mdl_value) & mdl_value > 0 & value <= mdl_value

} else {
stop("You must supply either a mdl_flag vector, or a mdl_value vector")
}

if (mdl_action == "zero") {
value[replacements] <- 0
} else {

multiplier <- ifelse(mdl_action == "half", 0.5, 1)

if (!is.null(mdl_flag)) {
value[replacements] <- value[replacements] * multiplier
} else {
value[replacements] <- mdl_value[replacements] * multiplier
}

}

value

}
31 changes: 31 additions & 0 deletions man/set_non_detects.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/tidy_ec_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/tidy_ems_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

93 changes: 93 additions & 0 deletions tests/testthat/test-tidy-data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
context("tidy data")
library(dplyr)

test_ec <- structure(list(
SITE_NO = c("BC08NL0001", "BC08NL0001", "BC08NL0001"),
DATE_TIME_HEURE = c("11/01/2000 9:15", "11/01/2000 9:15", "11/01/2000 9:15"),
FLAG_MARQUEUR = c(NA, NA, "<"),
VALUE_VALEUR = c(19.6, NA, 0.002),
SDL_LDE = c(0.01, 2e-04, 0.002),
MDL_LDM = c(0.01, 2e-04, 0.002),
VMV_CODE = c(100493L, 100217L, 100474L),
UNIT_UNITÉ = c("UG/L", "MG/L", "UG/L"),
VARIABLE = c("BARIUM EXTRACTABLE", "BARIUM TOTAL", "BERYLLIUM EXTRACTABLE"),
VARIABLE_FR = c("BARYUM EXTRACTIBLE", "BARYUM TOTAL", "BÉRYLLIUM EXTRACTIBL"),
STATUS_STATUT = c("P", "P", "P")),
class = "data.frame",
.Names = c("SITE_NO", "DATE_TIME_HEURE", "FLAG_MARQUEUR",
"VALUE_VALEUR", "SDL_LDE", "MDL_LDM", "VMV_CODE","UNIT_UNITÉ", "VARIABLE",
"VARIABLE_FR", "STATUS_STATUT"), row.names = c(NA, -3L))

test_ems <- select(test_ec,
EMS_ID = SITE_NO,
COLLECTION_START = DATE_TIME_HEURE,
RESULT_LETTER = FLAG_MARQUEUR,
PARAMETER = VARIABLE,
RESULT = VALUE_VALEUR,
UNIT = UNIT_UNITÉ,
METHOD_DETECTION_LIMIT = SDL_LDE) %>%
mutate(MONITORING_LOCATION = letters[1:3],
PARAMETER_CODE = LETTERS[1:3],
COLLECTION_START = as.POSIXct(COLLECTION_START, format = "%d/%m/%Y %k"))

test_that("set_non_detects works with zeros", {
expect_equal(set_non_detects(c(1.5, 2.0, 2.5), mdl_flag = c("<", NA, NA), mdl_action = "zero"),
c(0, 2.0, 2.5))
expect_equal(set_non_detects(c(1.5, 2.0, 2.5), mdl_value = c(1.9, 1.9, 1.9),
mdl_action = "zero"),
c(0, 2.0, 2.5))
expect_equal(set_non_detects(c(1.5, 2.0, 2.5), mdl_value = c(2.0, 2.0, 2.0),
mdl_action = "zero"),
c(0, 0, 2.5))
expect_equal(set_non_detects(c(NA, 2.0, 2.5), mdl_flag = c("<", "<", NA), mdl_action = "zero"),
c(NA, 0, 2.5))
expect_equal(set_non_detects(c(NA, 2.0, 2.5), mdl_value = c(2.0, 2.0, 2.0),
mdl_action = "zero"),
c(NA, 0, 2.5))
})

test_that("set_non_detects works with half mdl", {
expect_equal(set_non_detects(c(1.5, 2.0, 2.5), mdl_flag = c("<", NA, NA), mdl_action = "half"),
c(0.75, 2.0, 2.5))
expect_equal(set_non_detects(c(1.5, 2.0, 2.5), mdl_value = c(1.9, 1.9, 1.9),
mdl_action = "half"),
c(0.95, 2.0, 2.5))
expect_equal(set_non_detects(c(1.5, 2.0, 2.5), mdl_value = c(2.0, 2.0, 2.0),
mdl_action = "half"),
c(1.0, 1.0, 2.5))
expect_equal(set_non_detects(c(NA, 2.0, 2.5), mdl_flag = c("<", "<", NA), mdl_action = "half"),
c(NA, 1.0, 2.5))
expect_equal(set_non_detects(c(NA, 2.0, 2.5), mdl_value = c(2.0, 2.0, 2.0),
mdl_action = "half"),
c(NA, 1.0, 2.5))
})

test_that("set_non_detects works with mdl", {
expect_equal(set_non_detects(c(1.5, 2.0, 2.5), mdl_flag = c("<", NA, NA), mdl_action = "mdl"),
c(1.5, 2.0, 2.5))
expect_equal(set_non_detects(c(1.5, 2.0, 2.5), mdl_value = c(1.9, 1.9, 1.9),
mdl_action = "mdl"),
c(1.9, 2.0, 2.5))
expect_equal(set_non_detects(c(1.5, 2.0, 2.5), mdl_value = c(2.0, 2.0, 2.0),
mdl_action = "mdl"),
c(2.0, 2.0, 2.5))
expect_equal(set_non_detects(c(NA, 2.0, 2.5), mdl_flag = c("<", "<", NA), mdl_action = "mdl"),
c(NA, 2.0, 2.5))
expect_equal(set_non_detects(c(NA, 2.0, 2.5), mdl_value = c(2.0, 2.0, 2.0),
mdl_action = "mdl"),
c(NA, 2.0, 2.5))
})

test_that("tidy_ems_data works", {
tidied_ems <- tidy_ems_data(test_ems)
expect_equal(names(tidied_ems),
c("EMS_ID", "Station", "DateTime", "Variable", "Code", "Value",
"Units", "DetectionLimit", "ResultLetter"))
})

test_that("tidy_ec_data works", {
tidied_ec <- tidy_ec_data(test_ec)
expect_equal(names(tidied_ec),
c("Station", "DateTime", "Variable", "Code", "Value",
"DetectionLimit", "Units"))
})

0 comments on commit 4d18be2

Please sign in to comment.