-
Notifications
You must be signed in to change notification settings - Fork 32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[WIP] Make apply_schema work within the arguments to a FunctionalTerm #117
Changes from all commits
ea7ba60
e99a244
093abba
274e220
45126c1
c289bdc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
abstract type PolyModel end | ||
|
||
protect(x) = error("protect should only be used within a @formula") | ||
|
||
""" | ||
ProtectedCtx{OldCtx} | ||
is a context type that is entered during the applictation of a schema to a | ||
`ProtectedTerm`. It holds the `OldCtx` | ||
""" | ||
struct ProtectedCtx{OldCtx} end | ||
function StatsModels.apply_schema(t::CallTerm{typeof(protect)}, sch, Mod::Type) | ||
length(t.args_parsed) == 1 || throw(ArgumentError("`protect` only applies to a single term.")) | ||
parsed_term = t.args_parsed[1] | ||
return apply_schema(parsed_term, sch, ProtectedCtx{Mod}) | ||
end | ||
|
||
|
||
# Outside of a @formula unprotect strips the protect wrapper | ||
unprotect(t::CallTerm{typeof(protect)}) = t.args_parsed[1] | ||
unprotect(t) = t | ||
function StatsModels.apply_schema(t::CallTerm{typeof(unprotect)}, sch, Mod::Type) | ||
throw(DomainError("`unprotect` used outside a protected context.")) | ||
end | ||
function StatsModels.apply_schema(t::CallTerm{typeof(unprotect)}, sch, Mod::Type{<:ProtectedCtx{OldCtx}}) where OldCtx | ||
length(t.args_parsed) == 1 || throw(ArgumentError("`unprotect` only applies to a single term.")) | ||
parsed_term = t.args_parsed[1] | ||
return apply_schema(parsed_term, sch, OldCtx) | ||
end | ||
|
||
## Defintion of how things act while protected: | ||
|
||
# TODO: Transform * into FunctionTerms | ||
# https://github.com/JuliaStats/StatsModels.jl/issues/119 | ||
|
||
apply_schema(t::ConstantTerm, schema, Mod::Type{<:ProtectedCtx}) = t | ||
|
||
function direct_call(op, arg_terms::Tuple) | ||
names = Tuple(termvars(arg_terms)) | ||
ex = Expr(:call, nameof(op), names...) | ||
ct = CallTerm{typeof(op), names}(+, ex, t) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
return call_fallback_apply_schema(ct, schema, Mod) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here as below...why not just call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, probably not needed. |
||
end | ||
function apply_schema(t::TupleTerm, schema, Mod::Type{<:ProtectedCtx}) | ||
# TupleTerm is what is created by `x+y`, we need to turn that back into addition. | ||
return direct_call(+, t) | ||
end | ||
|
||
function apply_schema(t::InteractionTerm, schema, Mod::Type{<:ProtectedCtx}) | ||
# InteractionTerm is what is created by `x&y`, we need to turn that back into bitwise and. | ||
return direct_call(&, t.terms) | ||
end | ||
|
||
|
||
# Lets not do the below by default. Instead overloaded call terms should opt into the fallback for during ProtectedCtx | ||
# that way we avoid and ambiguity error. | ||
# apply_schema(ct::CallTerm, schema, Mod::Type{<:ProtectedCtx}) = call_fallback_apply_schema(ct, schema, Mod) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,11 +10,13 @@ | |
|
||
terms(t::FormulaTerm) = union(terms(t.lhs), terms(t.rhs)) | ||
terms(t::InteractionTerm) = terms(t.terms) | ||
terms(t::FunctionTerm{Fo,Fa,names}) where {Fo,Fa,names} = Term.(names) | ||
terms(t::CallTerm) = Term.(termnames(t)) # TODO: This is wrong because termnames is wrong | ||
terms(t::FunctionCallTerm) = t.terms | ||
terms(t::AbstractTerm) = [t] | ||
terms(t::MatrixTerm) = terms(t.terms) | ||
terms(t::TupleTerm) = mapreduce(terms, union, t) | ||
|
||
|
||
needs_schema(::AbstractTerm) = true | ||
needs_schema(::ConstantTerm) = false | ||
needs_schema(t) = false | ||
|
@@ -26,7 +28,7 @@ needs_schema(t) = false | |
Compute all the invariants necessary to fit a model with `terms`. A schema is a dict that | ||
maps `Term`s to their concrete instantiations (either `CategoricalTerm`s or | ||
`ContinuousTerm`s. "Hints" may optionally be supplied in the form of a `Dict` mapping term | ||
names (as `Symbol`s) to term or contrast types. If a hint is not provided for a variable, | ||
names (as `Symbol`s) to term or contrast types. If a hint is not provided for a variable, | ||
the appropriate term type will be guessed based on the data type from the data column: any | ||
numeric data is assumed to be continuous, and any non-numeric data is assumed to be | ||
categorical. | ||
|
@@ -56,7 +58,7 @@ Dict{Any,Any} with 1 entry: | |
y => y | ||
``` | ||
|
||
Note that concrete `ContinuousTerm` and `CategoricalTerm` and un-typed `Term`s print the | ||
Note that concrete `ContinuousTerm` and `CategoricalTerm` and un-typed `Term`s print the | ||
same in a container, but when printed alone are different: | ||
|
||
```jldoctest 1 | ||
|
@@ -159,9 +161,9 @@ end | |
Return a new term that is the result of applying `schema` to term `t` with | ||
destination model (type) `Mod`. If `Mod` is omitted, `Nothing` will be used. | ||
|
||
When `t` is a `ContinuousTerm` or `CategoricalTerm` already, the term will be returned | ||
unchanged _unless_ a matching term is found in the schema. This allows | ||
selective re-setting of a schema to change the contrast coding or levels of a | ||
When `t` is a `ContinuousTerm` or `CategoricalTerm` already, the term will be returned | ||
unchanged _unless_ a matching term is found in the schema. This allows | ||
selective re-setting of a schema to change the contrast coding or levels of a | ||
categorical term, or to change a continuous term to categorical or vice versa. | ||
""" | ||
apply_schema(t, schema) = apply_schema(t, schema, Nothing) | ||
|
@@ -180,6 +182,27 @@ apply_schema(t::Union{ContinuousTerm, CategoricalTerm}, schema, Mod::Type) = | |
get(schema, term(t.sym), t) | ||
apply_schema(t::MatrixTerm, sch, Mod::Type) = MatrixTerm(apply_schema.(t.terms, Ref(sch), Mod)) | ||
|
||
function call_fallback_apply_schema(ct::CallTerm{F, Names}, schema, Mod) where {F, Names} | ||
# First we apply schema to all terms inside the CallTerm arguments. | ||
# Thus allowing them to have overloaded `apply_schema` behavour | ||
terms = map(ct.args_parsed) do arg | ||
apply_schema(arg, schema, Mod) | ||
end | ||
names = Symbol[Names...] | ||
ft = FunctionCallTerm(ct.forig, names, terms, ct.exorig) | ||
|
||
# Last, we apply the schema to the FunctionCallTerm, so it can have overloaded | ||
# apply_schema behavour -- but the fallback it to leave it as is | ||
# which will result in a FunctionCallTerm in the final formula | ||
# so the function will be called in `modelcols` | ||
return apply_schema(ft, schema, Mod) | ||
end | ||
apply_schema(ct::CallTerm, schema, Mod::Type) = call_fallback_apply_schema(ct, schema, Mod) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this indirection necessary? why not just put the body of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For reasons of making it easy to write things that avoid ambiguities, Though I am not sure it is used in any code I have committed, |
||
|
||
# To get back (approx) old behavour of FunctionTerm do | ||
# apply_schema(ct::CallTerm, schema, Mod::Type) = call_fallback_apply_schema(ct, schema, ProtectedCtx{Mod}) | ||
|
||
|
||
|
||
# TODO: special case this for <:RegressionModel ? | ||
function apply_schema(t::ConstantTerm, schema, Mod::Type) | ||
|
@@ -234,7 +257,7 @@ function apply_schema(t::FormulaTerm, schema, Mod::Type{<:StatisticalModel}) | |
end | ||
|
||
# strategy is: apply schema, then "repair" if necessary (promote to full rank | ||
# contrasts). | ||
# contrasts). | ||
# | ||
# to know whether to repair, need to know context a term appears in. main | ||
# effects occur in "own" context. | ||
|
@@ -309,7 +332,7 @@ termsyms(t::InterceptTerm{true}) = Set(1) | |
termsyms(t::ConstantTerm) = Set((t.n,)) | ||
termsyms(t::Union{Term, CategoricalTerm, ContinuousTerm}) = Set([t.sym]) | ||
termsyms(t::InteractionTerm) = mapreduce(termsyms, union, t.terms) | ||
termsyms(t::FunctionTerm) = Set([t.exorig]) | ||
termsyms(t::Union{CallTerm,FunctionCallTerm}) = Set(termnames(t)) | ||
|
||
symequal(t1::AbstractTerm, t2::AbstractTerm) = issetequal(termsyms(t1), termsyms(t2)) | ||
|
||
|
@@ -325,4 +348,8 @@ termvars(t::InteractionTerm) = mapreduce(termvars, union, t.terms) | |
termvars(t::TupleTerm) = mapreduce(termvars, union, t, init=Symbol[]) | ||
termvars(t::MatrixTerm) = termvars(t.terms) | ||
termvars(t::FormulaTerm) = union(termvars(t.lhs), termvars(t.rhs)) | ||
termvars(t::FunctionTerm{Fo,Fa,names}) where {Fo,Fa,names} = collect(names) | ||
termvars(t::Union{CallTerm,FunctionCallTerm}) = collect(termnames(t)) | ||
|
||
|
||
termnames(::CallTerm{<:Any, Names}) where Names = Names | ||
termnames(ft::FunctionCallTerm) = ft.names |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
using a DomainError seems a little punny.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, but for once
ArgumentError
seems wrong.Could be a
FormulaSyntaxError
maybe?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or even
ParseError
orLoadError
or whatever the error is that's thrown for invalid syntax? (but that's maybe too punny as well)