-
Notifications
You must be signed in to change notification settings - Fork 25
/
wordcount.R
50 lines (48 loc) · 1.53 KB
/
wordcount.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#' Count words, sentences, characters
#'
#' Count words, sentences, and characters in input texts. These functions use
#' the \code{stringi} package, so they handle the counting of Unicode strings
#' (e.g., characters with diacritical marks) in a way that makes sense to people
#' counting characters.
#'
#' @param x A character vector or a list of character vectors. If \code{x} is a
#' character vector, it can be of any length, and each element will be
#' tokenized separately. If \code{x} is a list of character vectors, each
#' element of the list should have a length of 1.
#' @return An integer vector containing the counted elements. If the input
#' vector or list has names, they will be preserved.
#' @rdname word-counting
#' @examples
#' count_words(mobydick)
#' count_sentences(mobydick)
#' count_characters(mobydick)
#' @export
count_words <- function(x) {
check_input(x)
named <- names(x)
out <- stringi::stri_count_words(x)
if (!is.null(named)) names(out) <- named
out
}
#' @export
#' @rdname word-counting
count_characters <- function(x) {
check_input(x)
named <- names(x)
out <- stringi::stri_count_boundaries(x,
opts_brkiter = stringi::stri_opts_brkiter(type = "character")
)
if (!is.null(named)) names(out) <- named
out
}
#' @export
#' @rdname word-counting
count_sentences <- function(x) {
check_input(x)
named <- names(x)
out <- stringi::stri_count_boundaries(x,
opts_brkiter = stringi::stri_opts_brkiter(type = "sentence")
)
if (!is.null(named)) names(out) <- named
out
}