-
Notifications
You must be signed in to change notification settings - Fork 0
/
menzerath.R
157 lines (147 loc) · 5.58 KB
/
menzerath.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#' @importFrom glue glue
#' @importFrom tibble is_tibble tibble
#' @importFrom stats lm predict nobs
#' @importFrom methods is
#' @importFrom generics fit
#' @export
generics::fit
#' Density function for Menzerath-Altmann law
#'
#' @param x single number or numeric vector
#' @param method one of MAL (default), simplified_1, simplified_2, Milicka_1, Milicka_2, Milicka_4, Milicka_8
#' @param parameters named vector for parameters a, b and c
#'
#' @return single number or numeric vector
#' @export
dmenzerath <- function(x, parameters, method = "MAL"){
switch(method,
"MAL" = parameters['a']*x^parameters['b']*exp(-parameters['c']*x),
"simplified_1" = parameters['a']*exp(-parameters['c']*x),
"simplified_2" = parameters['a']*x^parameters['b'],
"Milicka_1" = parameters['a']*x^(-parameters['b'])*exp(parameters['c']*x),
"Milicka_2" = parameters['a']*x^(-parameters['b']),
"Milicka_4" = parameters['a'] + parameters['b']/x,
"Milicka_8" = parameters['a'] + parameters['b']/x + parameters['c']*min(1, x-1)/x,
stop(paste("Unknown method:", method))
)
}
#' Return parameters of the Menzerath-Altmann law.
#'
#' The parameters are estimated after taking logs
#'
#' @param x An object with class lm
#'
#' @return named list with a, b and c
#' @export
get_parameters <- function(x){
if(!is(x,"lm")){
stop("Expecting an lm fit")
}
if(is(x, "MAL")){
return(list(a = exp(summary(x)$coefficients[1]),
b = summary(x)$coefficients[2],
c = -summary(x)$coefficients[3]))
}else if(is(x, "simplified_1")){
return(list(a = exp(summary(x)$coefficients[1]),
c = -summary(x)$coefficients[2]))
}else if(is(x, "simplified_2")){
return(list(a = exp(summary(x)$coefficients[1]),
b = summary(x)$coefficients[2]))
}else if(is(x, "Milicka_1")){
return(list(a = exp(summary(x)$coefficients[1]),
b = -summary(x)$coefficients[2],
c = summary(x)$coefficients[3]))
}else if(is(x, "Milicka_2")){
return(list(a = exp(summary(x)$coefficients[1]),
b = -summary(x)$coefficients[2]))
}else if(is(x, "Milicka_4")){
stop("unimplemented")
}else if(is(x, "Milicka_8")){
stop("unimplemented")
}else{
stop(paste("Unknown fitted method for an object of class menzerath: ", class(x)))
}
}
#' Fit a menzerath object
#'
#' @param object a menzerath type object
#'
#' @param method string Method to perform the fitting, could be one of MAL, simplified_1, simplified_2, Milicka_1, Milicka_2, Milicka_4 or Milicka_8
#' @param ... Other arguments passed to lm
#'
#' @export
fit.menzerath <- function(object, method="MAL",...){
result <- switch(method,
"MAL" = lm(log(object$y) ~ log(object$x) + object$x, as.data.frame(x=object$x, y = object$y, stringsAsFactors=FALSE), ...),
"simplified_1" = lm(log(object$y) ~ object$x, as.data.frame(x=object$x, y = object$y, stringsAsFactors=FALSE), ...),
"simplified_2" = lm(log(object$y) ~ log(object$x), as.data.frame(x=object$x, y = object$y, stringsAsFactors=FALSE), ...),
"Milicka_1" = lm(log(object$y) ~ log(object$x) + object$x, as.data.frame(x=object$x, y = object$y, stringsAsFactors=FALSE), ...),
"Milicka_2" = lm(log(object$y) ~ log(object$x), as.data.frame(x=object$x, y = object$y, stringsAsFactors=FALSE), ...),
"Milicka_4" = stop("unimplemented"),
"Milicka_8" = stop("unimplemented"),
stop(paste("Unknown method: ", method))
)
class(result) <- c(method, class(result))
result
}
#' @export
predict.menzerath <- function(object, method="MAL", ...){
predict(fit.menzerath(object, method), interval = "confidence", ...)
}
#' @export
print.menzerath <- function(x, ...){
glue::glue('Observations: {length(x$x)}\n',
'y: {paste(head(x$y), collapse=",")}...\n',
'x: {paste(head(x$x), collapse=",")}...')
}
#' @export
plot.menzerath <- function(x, fit = NULL, method="MAL", ...){
p <- (ggplot2::ggplot(data = x, ggplot2::aes(x=log(x), y=log(y))) +
ggplot2::geom_point(...))
if(is.null(fit)){
# no prediction plot raw data
p
}else if(isTRUE(fit)){
# fit and then plot
predict_fit <- predict.menzerath(x, method)
p + ggplot2::geom_ribbon(ggplot2::aes(ymin=predict_fit[,"lwr"], ymax=predict_fit[,"upr"]), alpha=0.1, fill="blue") +
ggplot2::geom_line(ggplot2::aes(y=predict_fit[,"fit"]))
p + ggplot2::geom_ribbon(ggplot2::aes(ymin=predict_fit[,"lwr"], ymax=predict_fit[,"upr"]), alpha=0.1, fill="blue") +
ggplot2::geom_line(ggplot2::aes(y=predict_fit[,"fit"]))
}else{
p
}
}
#' A class to describe and plot data following the Menzerath-Altman law
#'
#' To initialize the menzerath object we need a data.frame or a tibble with at
#' least two columns:
#' - Size of construct (L_n) measured in units of
#' its direct constituents
#' - Average size of constituents (L_{n-1}) measured in units of its direct
#' subconstituents
#'
#' @param tb data.frame or tibble, A table with the data
#' @param x String, The column name containing the construct size
#' @param y String, The column name containing the average constituent size
#'
#' @export
menzerath <- function(tb=tibble(), x = "x", y = "y"){
if(!tibble::is_tibble(tb)){
if(is.data.frame(tb)){
tb <- tibble::as_tibble(tb)
}else{
stop("Constructor expects a tibble")
}
}
m <- tb[c(x, y)]
names(m) <- c("x","y")
structure(m,
x_name = x,
y_name = y,
class = c("menzerath", class(m)))
}
#' @export
nobs.menzerath <- function(object, ...){
length(object$x)
}