Skip to content

Commit

Permalink
better function org, add_r_stage
Browse files Browse the repository at this point in the history
  • Loading branch information
João Santiago committed Jul 3, 2021
1 parent c2a0280 commit 39bc83e
Show file tree
Hide file tree
Showing 19 changed files with 326 additions and 115 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: dvthis
Type: Package
Title: Utilities for DVC (Data Version Control) Pipelines
Version: 0.2.3.9000
Version: 0.3.0.9000
Authors@R: person("Joao", "Santiago", , "me@jcpsantiago.xyz", role = c("aut", "cre"))
Description: DVC enables the creation of data pipelines and tracking data and models via git.
This package contains an RStudio project template to boostrap development, and
Expand All @@ -22,7 +22,8 @@ Imports:
qs,
rstudioapi,
purrr,
yaml
yaml,
yesno
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Generated by roxygen2: do not edit by hand

export(add_r_stage)
export(dvc_repro)
export(log_stage_step)
export(read_intermediate_result)
export(read_raw_data)
export(save_intermediate_result)
export(stage_footer)
export(stage_header)
2 changes: 1 addition & 1 deletion R/create_dvc_project.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Create a new DVC project
#'
#' @param path user input; path to the new project
#' @param ... for future expansion, currenly unused
#' @param ... for future expansion, currently unused
#'
create_dvc_project_gui <- function(path, ...) {
tryCatch(
Expand Down
86 changes: 86 additions & 0 deletions R/input_output.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#' Saves data to intermediate
#' @param r_object the R object to be saved.
#' @param rel_path optional path (relative to `data/intermediate`) to pass on to serializer. Defaults to the object's name without an extension added to the file on disk.
#' @param serializer function used to serialize the data to disk. Defaults to `qs::save`.
#' @param ... extra arguments passed to the serializer.
#' @return serializes an R object to the `data/intermediate` directory.
#' @author João Santiago
#' @export
save_intermediate_result <- function(r_object, rel_path = NULL, serializer = qs::qsave, ...) {
path_prefix <- here::here("data/intermediate")

if (!fs::dir_exists(path_prefix)) {
if (!yesno::yesno(
glue::glue("This will create {path_prefix}. Continue?")
)) {
stop(
glue::glue("Aborting creation of {path_prefix}."),
call. = FALSE
)
}

message(glue::glue("Creating dir {path_prefix}"))
fs::dir_create(path_prefix)
}

if (!is.null(rel_path)) {
r_object_name <- rel_path
} else {
r_object_name <- substitute(r_object)
}

message(
glue::glue("Saving {path_prefix}/{r_object_name}")
)
serializer(r_object, glue::glue("{path_prefix}/{r_object_name}"), ...)
}


#' Create function to read data level
#' @param data_level the level of data to read from. Example `intermediate` or `raw`.
#' @param .reader a reader function to be used as default. Optional.
#' @return a function.
#' @author João Santiago
make_read_data_function <- function(data_level, .reader = NULL) {
function(rel_path, reader = .reader, ...) {
if (is.null(reader) | class(reader) != "function") {
stop(
"Please provide a reader function. Example `qs::qread` or `readr::read_rds`",
call. = FALSE
)
}

path_prefix <- here::here(
glue::glue("data/{data_level}")
)
full_path <- glue::glue("{path_prefix}/{rel_path}")

if (!fs::file_exists(full_path)) {
stop(
glue::glue("Can't find {full_path}! Is there a typo?")
)
}

reader(full_path, ...)
}
}


#' Read an intermediate result
#' @param rel_path file path relative to `data/intermediate`.
#' @param reader function used to read the serialized data. Defaults to `qs::qread`.
#' @param ... extra arguments passed to the reader.
#' @return an R object.
#' @author João Santiago
#' @export
read_intermediate_result <- make_read_data_function("intermediate", qs::qread)


#' Read raw data
#' @param rel_path file path relative to `data/raw`.
#' @param reader function used to read the serialized data.
#' @param ... extra arguments passed to the reader.
#' @return an R object.
#' @author João Santiago
#' @export
read_raw_data <- make_read_data_function("raw")
51 changes: 51 additions & 0 deletions R/logging.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#' Header to separate stages
#'
#' @param this_stage name of the stage.
#' @param .right additional information to show on the right side of the header. Defaults to NULL.
#'
#' @return string with name of stage
#' @author João Santiago
#' @export
stage_header <- function(this_stage, .right = NULL) {
rendered_stage <- glue::glue(this_stage, .envir = .GlobalEnv)

cli::cat_line()
cli::cli_rule(
crayon::yellow(
rendered_stage
),
right = .right
)

as.character(rendered_stage)
}


#' Message to finish stage
#'
#' @param .stage the name of the stage as a string. Default behavior is to search the global environment for a `this_stage` object containing the return string of `dvthis::stage_header`.
#'
#' @return prints a closing footer
#' @export
stage_footer <- function(.stage = this_stage) {
message(
crayon::green(cli::symbol$tick),
glue::glue(" Stage {cli::style_italic(.stage)} is done!")
)
cli::cat_line()
}



#' Log a stage step
#'
#' @param msg log message.
#'
#' @return prints message to stdout
#' @author João Santiago
#' @export
log_stage_step <- function(msg) {
cli::cat_bullet(
glue::glue(msg)
)
}
75 changes: 74 additions & 1 deletion R/project_utilities.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,76 @@
add_stage <- function(name, open) {
add_stage_to_dvc_yaml <- function(current_dvc_yaml, new_stage_name, .deps = NULL, .outs = NULL) {
cmd <- c(glue::glue("Rscript stages/{new_stage_name}.R"))
deps <- c(glue::glue("stages/{new_stage_name}.R"), .deps)
outs <- .outs

new_stage_list_start <- list(
stages = list(
new_stage = list("cmd" = cmd, "deps" = deps, "outs" = outs)
)
)
names(new_stage_list_start$stages) <- new_stage_name
new_stage_list <- modifyList(new_stage_list_start, new_stage_list_start)

new_dvc_yaml <- modifyList(current_dvc_yaml, new_stage_list)

return(new_dvc_yaml)
}


#' Title
#'
#' @param stage_name
#' @param .deps
#' @param .outs
#' @param dvc_yaml_path
#'
#' @return
#' @export
#'
#' @examples
add_r_stage <- function(stage_name, .deps = NULL, .outs = NULL, dvc_yaml_path = NULL) {
dvc_yaml_path <- here::here("dvc.yaml")
if (!fs::file_exists(dvc_yaml_path)) {
stop(
glue::glue("Can't find {dvc_yaml_path}. Did you run `dvc init` in the project root?")
)
}

# this is a requirement because dvc.yaml is yaml and the stage names are top-level
# objects which cannot have spaces
if (grepl("\\s", stage_name)) {
no_white_spaces <- gsub("\\s", "_", stage_name)
if (!yesno::yesno(glue::glue("`{stage_name}` cannot contain spaces. Can I call the stage `{no_white_spaces}` instead?"))) {
stop("Aborting adding new stage.", call. = FALSE)
}
name <- no_white_spaces
} else {
name <- stage_name
}

current_dvc_yaml <- yaml::read_yaml(dvc_yaml_path)
if (name %in% names(current_dvc_yaml$stages)) {
stop(
glue::glue("`{name}` is already a stage in {dvc_yaml_path}, please use a different name."),
call. = FALSE
)
}

if (!yesno::yesno(
glue::glue("This will write to {dvc_yaml_path}. Continue?")
)) {
stop("Aborting writing `dvc.yaml`", call. = FALSE)
}

new_dvc_yaml <- add_stage_to_dvc_yaml(current_dvc_yaml, name, .deps, .outs)

yaml::write_yaml(new_dvc_yaml, dvc_yaml_path)

new_stage_path <- here::here(glue::glue("stages/{name}.R"))
fs::file_copy(
fs::path_package("dvthis", "templates", "new_stage.R"),
new_stage_path
)

rstudioapi::navigateToFile(new_stage_path)
}
90 changes: 0 additions & 90 deletions R/stage_utilities.R

This file was deleted.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ n_of_dragons <- commandArgs(trailingOnly = TRUE)[1]
this_stage <- dvthis::stage_header("Choosing dragons")

dvthis::log_stage_step("Loading dragon data")
dragons_raw <- readr::read_csv(here::here("data/raw/dragons.csv"))
dragons_raw <- dvthis::read_raw_data("dragons.csv", readr::read_csv)

dvthis::log_stage_step("Loading clean kingdom data")
kingdoms <- dvthis::read_intermediate_result("kingdoms")
Expand Down
2 changes: 1 addition & 1 deletion inst/project-skeleton/stages/hello_world.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env Rscript
#

this_stage <- dvthis::stage_header("Hello and welcome to this DVC pipeline!")

# do some meaningful work
Expand Down
12 changes: 12 additions & 0 deletions inst/templates/new_stage.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env Rscript

this_stage <- dvthis::stage_header("Hello please edit me with a meaningful header!")


# add your work here


dvthis::log_stage_step("Saving intermediate results")
# save_intermediate_result()

dvthis::stage_footer()
17 changes: 17 additions & 0 deletions man/add_r_stage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 39bc83e

Please sign in to comment.