forked from dmlc/XGBoost.jl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgeneralized_linear_model.jl
38 lines (27 loc) · 1.23 KB
/
generalized_linear_model.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
using XGBoost
##
# this script demonstrate how to fit generalized linear model in xgboost
# basically, we are using linear model, instead of tree for our boosters
##
const DATAPATH = joinpath(@__DIR__, "../data")
dtrain = DMatrix(joinpath(DATAPATH, "agaricus.txt.train"))
dtest = DMatrix(joinpath(DATAPATH, "agaricus.txt.test"))
# change booster to gblinear, so that we are fitting a linear model
# alpha is the L1 regularizer
# lambda is the L2 regularizer
# you can also set lambda_bias which is L2 regularizer on the bias term
param = ["booster"=>"gblinear", "eta"=>1, "silent"=>0,
"objective"=>"binary:logistic", "alpha"=>0.0001, "lambda"=>1]
# normally, you do not need to set eta (step_size)
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
# there could be affection on convergence with parallelization on certain cases
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
##
# the rest of settings are the same
#
watchlist = [(dtest,"eval"), (dtrain,"train")]
num_round = 4
bst = xgboost(dtrain, num_round, param=param, watchlist=watchlist)
preds = predict(bst, dtest)
labels = get_info(dtest, "label")
print("test-error=", sum((preds .> 0.5) .!= labels) / float(size(preds)[1]), "\n")