-
Notifications
You must be signed in to change notification settings - Fork 57
/
Copy pathstep-nest.R
80 lines (73 loc) · 2.15 KB
/
step-nest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#' Nest
#'
#' @description
#' This is a method for the tidyr [tidyr::nest()] generic. It is translated
#' using the non-nested variables in the `by` argument and `.SD` in the `j`
#' argument.
#'
#' @inheritParams tidyr::nest
#' @param ... <[`tidy-select`][tidyr::tidyr_tidy_select]> Columns to nest, specified
#' using name-variable pairs of the form `new_col = c(col1, col2, col3)`.
#' The right hand side can be any valid tidy select expression.
#' @param .key Not supported.
#' @param data A [lazy_dt()].
#' @examples
#' if (require("tidyr", quietly = TRUE)) {
#' dt <- lazy_dt(tibble(x = c(1, 2, 1), y = c("a", "a", "b")))
#' dt %>% nest(data = y)
#'
#' dt %>% dplyr::group_by(x) %>% nest()
#' }
# exported onLoad
nest.dtplyr_step <- function(.data, ..., .names_sep = NULL, .key = deprecated()) {
if (lifecycle::is_present(.key)) {
abort(c(
"`nest()` for lazy data.tables doesn't support the `.key` argument.",
i = "Use a name in the `...` argument instead."
))
}
cols <- eval_nest_dots(.data, ...)
cols <- lapply(cols, set_names)
if (!is.null(.names_sep)) {
cols <- imap(cols, strip_names, .names_sep)
}
if (length(cols) == 1 && is.null(.names_sep)) {
# use `.SD` as it is shorter and faster
nm <- names(cols)
j_exprs <- exprs(!!nm := .(.SD))
} else {
j_exprs <- imap(
cols,
function(x, name) {
x <- simplify_names(x)
expr(.(data.table(!!!syms(x))))
}
)
}
asis <- setdiff(.data$vars, unlist(cols))
out <- step_subset_j(
.data,
vars = c(asis, names(cols)),
j = expr(.(!!!j_exprs)),
groups = asis,
arrange = FALSE
)
groups <- intersect(out$vars, group_vars(.data))
group_by(out, !!!syms(groups))
}
eval_nest_dots <- function(.data, ...) {
if (missing(...)) {
groups <- group_vars(.data)
if (is_empty(groups)) {
warn(paste0(
"`...` must not be empty for ungrouped data frames.\n",
"Did you want `data = everything()`?"
))
}
nest_vars <- setdiff(.data$vars, groups)
list(data = nest_vars)
} else {
cols <- enquos(...)
lapply(cols, function(.x) names(tidyselect::eval_select(.x, .data)))
}
}