-
Notifications
You must be signed in to change notification settings - Fork 991
/
utils.R
214 lines (189 loc) · 8.56 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# all non-exported / unused internal (utility) functions
# R 3.5.0 made isTRUE longer but more efficient :
# `is.logical(x) && length(x)==1L && !is.na(x) && x`
# Before R 3.5.0, isTRUE was defined as simply:
# identical(TRUE,x)
# See PR#3421 for timings.
# It was changed in R so that isTRUE(c(a=TRUE)) returned TRUE: https://github.com/wch/r-source/commit/828997ac6ecfb73aaa0aae9d1d0584a4ffc50881#diff-b41e3f9f1d389bb6f7a842cd5a3308b8
if (base::getRversion() < "3.5.0") {
isTRUE = function(x) is.logical(x) && length(x)==1L && !is.na(x) && x # backport R's new implementation of isTRUE
isFALSE = function(x) is.logical(x) && length(x)==1L && !is.na(x) && !x # backport isFALSE that was added in R 3.5.0
}
isTRUEorNA = function(x) is.logical(x) && length(x)==1L && (is.na(x) || x)
isTRUEorFALSE = function(x) is.logical(x) && length(x)==1L && !is.na(x)
allNA = function(x) .Call(C_allNAR, x)
# helper for nan argument (e.g. nafill): TRUE -> treat NaN as NA
nan_is_na = function(x) {
if (length(x) != 1L) stopf("Argument 'nan' must be length 1")
if (identical(x, NA) || identical(x, NA_real_)) return(TRUE)
if (identical(x, NaN)) return(FALSE)
stopf("Argument 'nan' must be NA or NaN")
}
internal_error = function(...) {
e1 = gettext("Internal error in")
e2 = deparse(head(tail(sys.calls(), 2L), 1L)[[1L]][[1L]])
e3 = do.call(sprintf, list(...))
e4 = gettext("Please report to the data.table issues tracker.")
e = paste0(e1, ' ', e2, ': ', e3, '. ', e4)
stop(e, call. = FALSE, domain = NA)
}
check_duplicate_names = function(x, table_name=deparse(substitute(x))) {
if (!anyDuplicated(nm <- names(x))) return(invisible())
duplicate_names = unique(nm[duplicated(nm)])
stopf(ngettext(length(duplicate_names),
"%s has duplicated column name %s. Please remove or rename the duplicate and try again.",
"%s has duplicated column names %s. Please remove or rename the duplicates and try again."),
table_name, brackify(duplicate_names), domain=NA)
}
duplicated_values = function(x) {
# fast anyDuplicated for the typical/non-error case; second duplicated() pass for (usually) error case
if (!anyDuplicated(x)) return(vector(typeof(x)))
unique(x[duplicated(x)])
}
# TODO(R>=4.0.0): Remove this workaround. From R 4.0.0, rep_len() dispatches rep.Date(), which we need.
# Before that, rep_len() strips attributes --> breaks data.table()'s internal recycle() helper.
# This also impacts test 2 in S4.Rraw, because the error message differs for rep.int() vs. rep_len().
if (inherits(rep_len(Sys.Date(), 1L), "Date")) {
# NB: safe_rep_len=rep_len throws an R CMD check error because it _appears_ to the AST
# walker that we've used .Internal ourselves (which is not true, but codetools can't tell:
# safe_rep_len = rep_len; body(safe_rep_len)[[1]] # .Internal)
safe_rep_len = function(x, n) rep_len(x, n)
} else {
safe_rep_len = function(x, n) rep(x, length.out = n) # nolint: rep_len_linter.
}
# endsWith no longer used from #5097 so no need to backport; prevent usage to avoid dev delay until GLCI's R 3.1.0 test
endsWith = function(...) internal_error("use endsWithAny instead of base::endsWith")
startsWithAny = function(x,y) .Call(CstartsWithAny, x, y, TRUE)
endsWithAny = function(x,y) .Call(CstartsWithAny, x, y, FALSE)
# For fread.R #5097 we need if any of the prefixes match, which one, and can return early on the first match
# Hence short and simple ascii-only at C level
# which.first
which.first = function(x)
{
if (!is.logical(x)) {
stopf("x not boolean")
}
match(TRUE, x)
}
# which.last
which.last = function(x)
{
if (!is.logical(x)) {
stopf("x not boolean")
}
length(x) - match(TRUE, rev(x)) + 1L
}
require_bit64_if_needed = function(DT) {
# called in fread and print.data.table
if (!isNamespaceLoaded("bit64") && any(vapply_1b(DT, inherits, "integer64"))) {
# nocov start
# a test was attempted to cover the requireNamespace() by using unloadNamespace() first, but that fails when nanotime is loaded because nanotime also uses bit64
if (!requireNamespace("bit64",quietly=TRUE)) {
warningf("Some columns are type 'integer64' but package bit64 is not installed. Those columns will print as strange looking floating point data. There is no need to reload the data. Simply install.packages('bit64') to obtain the integer64 print method and print the data again.")
}
# nocov end
}
}
# vapply for return value character(1)
vapply_1c = function(x, fun, ..., use.names = TRUE) {
vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_character_, USE.NAMES = use.names)
}
# vapply for return value logical(1)
vapply_1b = function(x, fun, ..., use.names = TRUE) {
vapply(X = x, FUN = fun, ..., FUN.VALUE = NA, USE.NAMES = use.names)
}
# vapply for return value integer(1)
vapply_1i = function(x, fun, ..., use.names = TRUE) {
vapply(X = x, FUN = fun, ..., FUN.VALUE = NA_integer_, USE.NAMES = use.names)
}
class1 = function(x) class(x)[1L] # nolint: class1_linter.
classes1 = function(x, ..., use.names=FALSE) vapply_1c(x, class1, ..., use.names=use.names)
# base::xor(), but with scalar operators
XOR = function(x, y) (x || y) && !(x && y)
# not is.atomic because is.atomic(matrix) is true
cols_with_dims = function(x) vapply_1i(x, function(j) length(dim(j))) > 0L
more = function(f) system(paste("more",f)) # nocov (just a dev helper)
# helper used to auto-name columns in data.table(x,y) as c("x","y"), CJ(x,y) and similar
# naming of unnested matrices still handled by data.table()
name_dots = function(...) {
dot_sub = as.list(substitute(list(...)))[-1L]
vnames = names(dot_sub)
if (is.null(vnames)) {
vnames = character(length(dot_sub))
} else {
vnames[is.na(vnames)] = ""
}
notnamed = !nzchar(vnames)
if (any(notnamed)) {
syms = vapply_1b(dot_sub, is.symbol) # save the deparse() in most cases of plain symbol
for (i in which(notnamed)) {
tmp = if (syms[i]) as.character(dot_sub[[i]]) else deparse(dot_sub[[i]], nlines=1L)[1L]
if (tmp == make.names(tmp)) vnames[i]=tmp
}
}
list(vnames=vnames, .named=!notnamed)
}
# convert a vector like c(1, 4, 3, 2) into a string like [1, 4, 3, 2]
# (common aggregation method for error messages)
brackify = function(x, quote=FALSE) {
# arbitrary
CUTOFF = 10L
# keep one more than needed to trigger dots if needed
if (quote && is.character(x)) x = paste0("'",head(x,CUTOFF+1L),"'")
if (length(x) > CUTOFF) x = c(x[1:CUTOFF], '...')
sprintf('[%s]', toString(x))
}
# convenience for specifying columns in some cases, e.g. by= and key=
# caller should ensure length(x) == 1 & handle accordingly.
cols_from_csv = function(x) strsplit(x, ',', fixed=TRUE)[[1L]]
# patterns done via NSE in melt.data.table and .SDcols in `[.data.table`
# was called do_patterns() before PR#4731
eval_with_cols = function(orig_call, all_cols) {
parent = parent.frame(2L)
fun_uneval = orig_call[[1L]]
# take fun from either calling env (parent) or from data.table
fun = tryCatch({
maybe_fun = eval(fun_uneval, parent)
# parent env could have a non-function with this name, which we
# should ignore.
stopifnot(is.function(maybe_fun))
maybe_fun
}, error=function(e) {
eval(fun_uneval)#take function from data.table namespace.
})
if (!is.primitive(fun)) {
named_call = match.call(fun, orig_call)
if ("cols" %in% names(formals(fun)) && !"cols" %in% names(named_call)) {
named_call[["cols"]] = all_cols
}
named_call[[1L]] = fun
eval(named_call, parent)
}
}
# check UTC status
is_utc = function(tz) {
# via grep('UTC|GMT', OlsonNames(), value = TRUE); ordered by "prior" frequency
utc_tz = c("UTC", "GMT", "Etc/UTC", "Etc/GMT", "GMT-0", "GMT+0", "GMT0")
if (is.null(tz)) tz = Sys.timezone()
tz %chin% utc_tz
}
# very nice idea from Michael to avoid expression repetition (risk) in internal code, #4226
`%iscall%` = function(e, f) {
if (!is.call(e)) return(FALSE)
if (is.name(e1 <- e[[1L]])) return(e1 %chin% f)
if (e1 %iscall% c('::', ':::')) return(e1[[3L]] %chin% f)
paste(deparse(e1), collapse = " ") %chin% f # complicated cases e.g. a closure/builtin on LHS of call; note that format() is much (e.g. 40x) slower than deparse()
}
# nocov start #593 always return a data.table
edit.data.table = function(name, ...) {
setDT(NextMethod('edit', name))[]
}
# nocov end
rss = function() { #5515 #5517
# nocov start
cmd = paste0("ps -o rss --no-headers ", Sys.getpid()) # ps returns KB
ans = tryCatch(as.numeric(system(cmd, intern=TRUE)), warning=function(w) NA_real_, error=function(e) NA_real_)
if (length(ans)!=1L || !is.numeric(ans)) ans=NA_real_ # just in case
round(ans / 1024, 1L) # return MB
# nocov end
}