Docs, etc.

JuliaStats · nalimilan · Nov 20, 2022 · Oct 3, 2019 · Jun 19, 2022 · Jun 19, 2022
commit c244147bc8321979d358273ff8516e623d0fd3ee
diff --git a/docs/src/api.md b/docs/src/api.md
@@ -25,7 +25,7 @@ The most general approach to fitting a model is with the `fit` function, as in
 julia> using Random
 
 julia> fit(LinearModel, hcat(ones(10), 1:10), randn(MersenneTwister(12321), 10))
-LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}:
+LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ────────────────────────────────────────────────────────────────
@@ -41,7 +41,7 @@ This model can also be fit as
 julia> using Random
 
 julia> lm(hcat(ones(10), 1:10), randn(MersenneTwister(12321), 10))
-LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}:
+LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ────────────────────────────────────────────────────────────────

diff --git a/docs/src/examples.md b/docs/src/examples.md
@@ -20,9 +20,7 @@ julia> data = DataFrame(X=[1,2,3], Y=[2,4,7])
    3 │     3      7
 
 julia> ols = lm(@formula(Y ~ X), data)
-StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-Y ~ 1 + X
+LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ─────────────────────────────────────────────────────────────────────────
@@ -56,9 +54,7 @@ julia> data = DataFrame(X=[1,2,2], Y=[1,0,1])
    3 │     2      1
 
 julia> probit = glm(@formula(Y ~ X), data, Binomial(), ProbitLink())
-StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-Y ~ 1 + X
+GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink}, GLM.DensePredChol{Float64, Cholesky{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ────────────────────────────────────────────────────────────────────────
@@ -97,9 +93,7 @@ julia> quine = dataset("MASS", "quine")
                      131 rows omitted
 
 julia> nbrmodel = glm(@formula(Days ~ Eth+Sex+Age+Lrn), quine, NegativeBinomial(2.0), LogLink())
-StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-Days ~ 1 + Eth + Sex + Age + Lrn
+GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink}, GLM.DensePredChol{Float64, Cholesky{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ────────────────────────────────────────────────────────────────────────────
@@ -115,9 +109,7 @@ Lrn: SL       0.296768     0.185934   1.60    0.1105  -0.0676559   0.661191
 ────────────────────────────────────────────────────────────────────────────
 
 julia> nbrmodel = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink())
-StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-Days ~ 1 + Eth + Sex + Age + Lrn
+GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink}, GLM.DensePredChol{Float64, Cholesky{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ────────────────────────────────────────────────────────────────────────────
@@ -164,9 +156,7 @@ julia> form = dataset("datasets", "Formaldehyde")
    6 │     0.9    0.782
 
 julia> lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form)
-StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-OptDen ~ 1 + Carb
+LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ───────────────────────────────────────────────────────────────────────────
@@ -213,9 +203,7 @@ julia> LifeCycleSavings = dataset("datasets", "LifeCycleSavings")
                                                     35 rows omitted
 
 julia> fm2 = fit(LinearModel, @formula(SR ~ Pop15 + Pop75 + DPI + DDPI), LifeCycleSavings)
-StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-SR ~ 1 + Pop15 + Pop75 + DPI + DDPI
+LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ─────────────────────────────────────────────────────────────────────────────────
@@ -321,9 +309,7 @@ julia> dobson = DataFrame(Counts    = [18.,17,15,20,10,21,25,13,13],
    9 │    13.0  3        3
 
 julia> gm1 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson())
-StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Poisson{Float64}, LogLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-Counts ~ 1 + Outcome + Treatment
+GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Poisson{Float64}, LogLink}, GLM.DensePredChol{Float64, Cholesky{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ────────────────────────────────────────────────────────────────────────────
@@ -378,9 +364,7 @@ julia> round(optimal_bic.minimizer, digits = 5) # Optimal λ
 0.40935
 
 julia> glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(optimal_bic.minimizer)) # Best model
-StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Normal{Float64}, PowerLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-Volume ~ 1 + Height + Girth
+GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Normal{Float64}, PowerLink}, GLM.DensePredChol{Float64, Cholesky{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ────────────────────────────────────────────────────────────────────────────

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -85,9 +85,7 @@ julia> data = DataFrame(y = rand(rng, 100), x = categorical(repeat([1, 2, 3, 4],
 
 
 julia> lm(@formula(y ~ x), data)
-StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-y ~ 1 + x
+LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ───────────────────────────────────────────────────────────────────────────
@@ -108,9 +106,7 @@ julia> using StableRNGs
 julia> data = DataFrame(y = rand(StableRNG(1), 100), x = repeat([1, 2, 3, 4], 25));
 
 julia> lm(@formula(y ~ x), data, contrasts = Dict(:x => DummyCoding()))
-StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}
-
-y ~ 1 + x
+LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:
 
 Coefficients:
 ───────────────────────────────────────────────────────────────────────────

diff --git a/src/glmfit.jl b/src/glmfit.jl
@@ -226,10 +226,10 @@ end
 
 abstract type AbstractGLM <: LinPredModel end
 
-mutable struct GeneralizedLinearModel{G<:GlmResp,L<:LinPred,F<:Union{FormulaTerm,Nothing}} <: AbstractGLM
+mutable struct GeneralizedLinearModel{G<:GlmResp,L<:LinPred} <: AbstractGLM
     rr::G
     pp::L
-    f::F
+    f::Union{FormulaTerm,Nothing}
     fit::Bool
 end
 
@@ -504,7 +504,6 @@ function fit(::Type{M},
              data,
              d::UnivariateDistribution,
              l::Link=canonicallink(d);
-             # TODO: support passing wts and offset as symbols
              offset::Union{AbstractVector, Nothing} = nothing,
              wts::Union{AbstractVector, Nothing} = nothing,
              dofit::Bool = true,
@@ -517,11 +516,9 @@ function fit(::Type{M},
         throw(DimensionMismatch("number of rows in X and y must match"))
     end
 
-    # TODO: allocate right type upfront
-    yf = float(y)
-    off = offset === nothing ? similar(yf, 0) : offset
-    wts = wts === nothing ? similar(yf, 0) : wts
-    rr = GlmResp(yf, d, l, off, wts)
+    off = offset === nothing ? similar(y, 0) : offset
+    wts = wts === nothing ? similar(y, 0) : wts
+    rr = GlmResp(y, d, l, off, wts)
     res = M(rr, cholpred(X), f, false)
     return dofit ? fit!(res; fitargs...) : res
 end

diff --git a/src/linpred.jl b/src/linpred.jl
@@ -237,7 +237,10 @@ end
 function modelframe(f::FormulaTerm, data, contrasts::AbstractDict)
     Tables.istable(data) ||
         throw(ArgumentError("expected data in a Table, got $(typeof(data))"))
-    data, _ = StatsModels.missing_omit(Tables.columntable(data), f)
+    t = Tables.columntable(data)
+    msg = StatsModels.checknamesexist(f, t)
+    msg != "" && throw(ArgumentError(msg))
+    data, _ = StatsModels.missing_omit(t, f)
     sch = schema(f, data, contrasts)
     f = apply_schema(f, sch, LinPredModel)
     f, modelcols(f, data)
@@ -299,8 +302,8 @@ function StatsBase.predict(mm::LinPredModel, data;
         lower = Vector{Union{Float64, Missing}}(missing, nr)
         upper = Vector{Union{Float64, Missing}}(missing, nr)
         tup = (prediction=view(prediction, nonmissinginds),
-                lower=view(lower, nonmissinginds),
-                upper=view(upper, nonmissinginds))
+               lower=view(lower, nonmissinginds),
+               upper=view(upper, nonmissinginds))
         predict!(tup, mm, new_x; kwargs...)
         return (prediction=prediction, lower=lower, upper=upper)
     end

diff --git a/src/lm.jl b/src/lm.jl
@@ -75,17 +75,19 @@ residuals(r::LmResp) = r.y - r.mu
 """
     LinearModel
 
-A combination of a [`LmResp`](@ref) and a [`LinPred`](@ref)
+A combination of a [`LmResp`](@ref), a [`LinPred`](@ref),
+and possibly a `FormulaTerm`
 
 # Members
 
 - `rr`: a `LmResp` object
 - `pp`: a `LinPred` object
+- `f`: either a `FormulaTerm` object or `nothing`
 """
-struct LinearModel{L<:LmResp,T<:LinPred,F<:Union{FormulaTerm,Nothing}} <: LinPredModel
+struct LinearModel{L<:LmResp,T<:LinPred} <: LinPredModel
     rr::L
     pp::T
-    f::F
+    f::Union{FormulaTerm,Nothing}
 end
 
 LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp)

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -730,8 +730,8 @@ end
                    gm11, newX, interval=:confidence, interval_method=:delta) ==
         predict(gm11, newX, interval=:confidence, interval_method=:delta)
    @test predict!((prediction=similar(Y, size(newX, 1)),
-                    lower=similar(Y, size(newX, 1)),
-                    upper=similar(Y, size(newX, 1))),
+                   lower=similar(Y, size(newX, 1)),
+                   upper=similar(Y, size(newX, 1))),
                    gm11, newX, interval=:confidence, interval_method=:transformation) ==
         predict(gm11, newX, interval=:confidence, interval_method=:transformation)
     @test_throws ArgumentError predict!((prediction=similar(Y, size(newX, 1)),
@@ -819,8 +819,8 @@ end
                     mm, newX, interval=:confidence) ==
         predict(mm, newX, interval=:confidence)
    @test predict!((prediction=similar(Y, size(newX, 1)),
-                    lower=similar(Y, size(newX, 1)),
-                    upper=similar(Y, size(newX, 1))),
+                   lower=similar(Y, size(newX, 1)),
+                   upper=similar(Y, size(newX, 1))),
                     mm, newX, interval=:prediction) ==
         predict(mm, newX, interval=:prediction)
     @test_throws ArgumentError predict!((prediction=similar(Y, size(newX, 1)),