From 91d789cf0c747fb2d82ed46b213848adeb618a5e Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Tue, 13 Sep 2016 21:18:29 +0200 Subject: [PATCH] Use CategoricalArray instead of NominalArray New type merging NominalArray and OrdinalArray in 0.0.5. --- test/contrasts.jl | 2 +- test/formula.jl | 26 +++++++++++++------------- test/statsmodel.jl | 6 +++--- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/test/contrasts.jl b/test/contrasts.jl index 08c6b9ee..0a6b7667 100644 --- a/test/contrasts.jl +++ b/test/contrasts.jl @@ -4,7 +4,7 @@ using Base.Test using DataFrames -d = DataFrame(x = NominalVector([:a, :b, :c, :a, :a, :b])) +d = DataFrame(x = CategoricalVector([:a, :b, :c, :a, :a, :b])) mf = ModelFrame(Formula(nothing, :x), d) diff --git a/test/formula.jl b/test/formula.jl index 0dbc8aba..e0d74f03 100644 --- a/test/formula.jl +++ b/test/formula.jl @@ -136,7 +136,7 @@ module TestFormula #test_group("expanding a nominal array into a design matrix of indicators for each dummy variable") - d[:x1p] = NullableNominalArray(d[:x1]) + d[:x1p] = NullableCategoricalArray(d[:x1]) mf = ModelFrame(y ~ x1p, d) mm = ModelMatrix(mf) @@ -181,24 +181,24 @@ module TestFormula ## @test r[:,1] == DataVector(df["x1"]) ## @test r[:,2] == DataVector(df["x2"]) - ## df["x1"] = NominalArray(x1) + ## df["x1"] = CategoricalArray(x1) ## r = expand(:x1, df) ## @test isa(r, DataFrame) ## @test ncol(r) == 3 - ## @test r == expand(NominalArray(x1), "x1", DataFrame()) + ## @test r == expand(CategoricalArray(x1), "x1", DataFrame()) ## r = expand(:(x1 + x2), df) ## @test isa(r, DataFrame) ## @test ncol(r) == 4 - ## @test r[:,1:3] == expand(NominalArray(x1), "x1", DataFrame()) + ## @test r[:,1:3] == expand(CategoricalArray(x1), "x1", DataFrame()) ## @test r[:,4] == DataVector(df["x2"]) - ## df["x2"] = NominalArray(x2) + ## df["x2"] = CategoricalArray(x2) ## r = expand(:(x1 + x2), df) ## @test isa(r, DataFrame) ## @test ncol(r) == 6 - ## @test r[:,1:3] == expand(NominalArray(x1), "x1", DataFrame()) - ## @test r[:,4:6] == expand(NominalArray(x2), "x2", DataFrame()) + ## @test r[:,1:3] == expand(CategoricalArray(x1), "x1", DataFrame()) + ## @test r[:,4:6] == expand(CategoricalArray(x2), "x2", DataFrame()) #test_group("Creating a model matrix using full formulas: y ~ x1 + x2, etc") @@ -215,7 +215,7 @@ module TestFormula @test mm.m == [ones(4) x1 x2 x1.*x2] @test mm.m == ModelMatrix{sparsetype}(mf).m - df[:x1] = NominalArray(x1) + df[:x1] = CategoricalArray(x1) x1e = [[0, 1, 0, 0] [0, 0, 1, 0] [0, 0, 0, 1]] f = y ~ x1 * x2 mf = ModelFrame(f, df) @@ -234,7 +234,7 @@ module TestFormula ## @test mm.m == [ones(4) x1 log(x2)] ## df = deepcopy(d) - ## df["x1"] = NominalArray([5:8]) + ## df["x1"] = CategoricalArray([5:8]) ## f = Formula(:(y ~ x1 * (log(x2) + x3))) ## mf = ModelFrame(f, df) ## mm = ModelMatrix(mf) @@ -276,7 +276,7 @@ module TestFormula ## @test model_response(mf) == y'' # fails: Int64 vs. Float64 df = deepcopy(d) - df[:x1] = NullableNominalArray(df[:x1]) + df[:x1] = NullableCategoricalArray(df[:x1]) f = y ~ x2 + x3 + x3*x2 mm = ModelMatrix(ModelFrame(f, df)) @@ -333,9 +333,9 @@ module TestFormula ## FAILS: behavior is wrong when no lower-order terms (1+x1+x2+x1&x2...) ## ## df = DataFrame(y=1:27, - ## x1 = NominalArray(vec([x for x in 1:3, y in 4:6, z in 7:9])), - ## x2 = NominalArray(vec([y for x in 1:3, y in 4:6, z in 7:9])), - ## x3 = NominalArray(vec([z for x in 1:3, y in 4:6, z in 7:9]))) + ## x1 = CategoricalArray(vec([x for x in 1:3, y in 4:6, z in 7:9])), + ## x2 = CategoricalArray(vec([y for x in 1:3, y in 4:6, z in 7:9])), + ## x3 = CategoricalArray(vec([z for x in 1:3, y in 4:6, z in 7:9]))) ## f = y ~ x1 & x2 & x3 ## mf = ModelFrame(f, df) ## @test coefnames(mf)[2:end] == diff --git a/test/statsmodel.jl b/test/statsmodel.jl index 5bf042bc..0afcb19f 100644 --- a/test/statsmodel.jl +++ b/test/statsmodel.jl @@ -61,7 +61,7 @@ io = IOBuffer() show(io, m) ## with categorical variables -d[:x1p] = NullableNominalArray(d[:x1]) +d[:x1p] = NullableCategoricalArray(d[:x1]) f2 = y ~ x1p m2 = fit(DummyMod, f2, d) @@ -73,11 +73,11 @@ m2 = fit(DummyMod, f2, d) ## predict w/ new data with _extra_ levels (throws an error) d3 = deepcopy(d) d3[1, :x1] = 0 -d3[:x1p] = NullableNominalVector(d3[:x1]) +d3[:x1p] = NullableCategoricalVector(d3[:x1]) @test_throws ArgumentError predict(m2, d3) ## fit with contrasts specified -d[:x2p] = NullableNominalVector(d[:x2]) +d[:x2p] = NullableCategoricalVector(d[:x2]) f3 = y ~ x1p + x2p m3 = fit(DummyMod, f3, d) fit(DummyMod, f3, d, contrasts = Dict(:x1p => EffectsCoding()))