Skip to content

Commit

Permalink
relax some checks on eltypes for contrasts and add tests (#242)
Browse files Browse the repository at this point in the history
* relax some checks on eltypes for contrasts and add tests

* actually use CSV

* remove test for different eltype (wrong on 1.0)

* compat bounds for CSV

* drop 1.0, use WeakRefStrings in tests

Co-authored-by: Phillip Alday <me@phillipalday.com>
Co-authored-by: Alex Arslan <ararslan@comcast.net>
Co-authored-by: Phillip Alday <palday@users.noreply.github.com>
  • Loading branch information
4 people authored Feb 28, 2022
1 parent 61de82a commit 4d1138b
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 9 deletions.
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ ShiftedArrays = "1"
StatsBase = "0.33.5"
StatsFuns = "0.9"
Tables = "0.2, 1"
WeakRefStrings = "1"
julia = "1.6"

[extras]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
WeakRefStrings = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"

[targets]
test = ["CategoricalArrays", "DataFrames", "Statistics", "Test"]
test = ["CategoricalArrays", "DataFrames", "Statistics", "Test", "WeakRefStrings"]
17 changes: 9 additions & 8 deletions src/contrasts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ end
# only check equality of matrix, termnames, and levels, and that the type is the
# same for the contrasts (values are irrelevant). This ensures that the two
# will behave identically in creating modelmatrix columns
Base.:(==)(a::ContrastsMatrix{C,T}, b::ContrastsMatrix{C,T}) where {C<:AbstractContrasts,T} =
Base.:(==)(a::ContrastsMatrix{C}, b::ContrastsMatrix{C}) where {C<:AbstractContrasts} =
a.matrix == b.matrix &&
a.termnames == b.termnames &&
a.levels == b.levels
Expand Down Expand Up @@ -166,18 +166,19 @@ function ContrastsMatrix(contrasts::C, levels::AbstractVector{T}) where {C<:Abst
# 3. contrast levels missing from data: would have empty columns, generate a
# rank-deficient model matrix.
c_levels = something(DataAPI.levels(contrasts), levels)
if eltype(c_levels) != eltype(levels)
throw(ArgumentError("mismatching levels types: got $(eltype(levels)), expected " *
"$(eltype(c_levels)) based on contrasts levels."))
end

mismatched_levels = symdiff(c_levels, levels)
if !isempty(mismatched_levels)
throw(ArgumentError("contrasts levels not found in data or vice-versa: " *
"$mismatched_levels." *
"\n Data levels: $levels." *
"\n Contrast levels: $c_levels"))
"\n Data levels ($(eltype(levels))): $levels." *
"\n Contrast levels ($(eltype(c_levels))): $c_levels"))
end

# do conversion AFTER checking for levels so users get a nice error message
# when they've made a mistake with the level types
c_levels = convert(Vector{T}, c_levels)

n = length(c_levels)
if n == 0
throw(ArgumentError("empty set of levels found (need at least two to compute " *
Expand All @@ -187,7 +188,7 @@ function ContrastsMatrix(contrasts::C, levels::AbstractVector{T}) where {C<:Abst
"compute contrasts)."))
end

# find index of base level. use contrasts.base, then default (1).
# find index of base level. use baselevel(contrasts), then default (1).
base_level = baselevel(contrasts)
baseind = base_level === nothing ?
1 :
Expand Down
20 changes: 20 additions & 0 deletions test/contrasts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -355,5 +355,25 @@
@testset "Non-unique levels" begin
@test_throws ArgumentError ContrastsMatrix(DummyCoding(), ["a", "a", "b"])
end

@testset "other string types" begin
using WeakRefStrings

using StatsModels: ContrastsMatrix
using DataAPI: levels

x = ["a", "b", "c", "a", "a", "b"]
x1 = WeakRefStrings.String1.(x)
x1_levs = levels(x1)

@test issetequal(x, x1)

c1 = ContrastsMatrix(DummyCoding(), x1_levs)
c = ContrastsMatrix(DummyCoding(levels=["a", "b", "c"]), x1_levs)
@test c == c1
@test eltype(c.levels) == eltype(c1.levels)

@test_throws ArgumentError ContrastsMatrix(DummyCoding(levels=[1, 2, 3]), x1_levs)
end

end

2 comments on commit 4d1138b

@kleinschmidt
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/55662

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.6.29 -m "<description of version>" 4d1138b6886e362a58c386759896bd8adcea33c9
git push origin v0.6.29

Please sign in to comment.