Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Categorical feature support #108

Merged
merged 26 commits into from
Dec 5, 2016
Merged
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
261145a
The logic for tree prediction
guolinke Dec 1, 2016
a707174
fix error for uint8_t to string
guolinke Dec 1, 2016
762c2aa
some bug fix
guolinke Dec 1, 2016
40ee200
update logic for costructing bin mapper
guolinke Dec 1, 2016
90a1fe2
add main logic for find best threshold
guolinke Dec 1, 2016
65b7739
fix bugs
guolinke Dec 1, 2016
7de3551
save/load feature_names to model file. expose c_api of set feature_names
guolinke Dec 2, 2016
790b53b
merge from master
guolinke Dec 2, 2016
0f5e9dc
use function pointer to avoid if..else
guolinke Dec 2, 2016
d997e54
update dataset_loader to support specific categorical feature
guolinke Dec 2, 2016
ce58581
some warnings fixed
guolinke Dec 3, 2016
87e3425
update format in dump_model
guolinke Dec 3, 2016
8ffa9f6
use std::function to avoid branching
guolinke Dec 3, 2016
f5795dd
fix json format
guolinke Dec 3, 2016
2e2a4b2
reduce memory cost for feature histogram
guolinke Dec 3, 2016
6e03b7d
fix json format
guolinke Dec 3, 2016
4b8b964
update join function
guolinke Dec 3, 2016
1bdfe24
support set categorical feature in python package
guolinke Dec 3, 2016
ae672b8
fix bug that using std::numeric_limits<int>::infinity().
guolinke Dec 3, 2016
a6acade
update column parse logic
guolinke Dec 3, 2016
75bd396
some naming fix
guolinke Dec 3, 2016
1510033
fix one bug in continue train
guolinke Dec 4, 2016
6f044ac
bug fix in str2tree.
guolinke Dec 4, 2016
158ddd5
refine some template functions
guolinke Dec 4, 2016
0a7712b
comment refine
guolinke Dec 5, 2016
80c1fa8
move 'other_categorical' to last bin
guolinke Dec 5, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
comment refine
  • Loading branch information
guolinke committed Dec 5, 2016
commit 0a7712ba0e9a0e3e6f911a4332953119f36112c9
5 changes: 1 addition & 4 deletions include/LightGBM/dataset.h
Original file line number Diff line number Diff line change
@@ -385,10 +385,7 @@ class Dataset {
Log::Warning("size of feature_names error, should equal with total number of features");
return;
}
feature_names_ = std::vector<std::string>(num_total_features_);
for (int i = 0; i < num_total_features_; ++i) {
feature_names_[i] = feature_names[i];
}
feature_names_ = std::vector<std::string>(feature_names);
}

/*! \brief Get Number of data */
14 changes: 7 additions & 7 deletions include/LightGBM/tree.h
Original file line number Diff line number Diff line change
@@ -173,7 +173,7 @@ class Tree {
std::vector<unsigned int> threshold_in_bin_;
/*! \brief A non-leaf node's split threshold in feature value */
std::vector<double> threshold_;
/*! \brief Decision type, 0 for '<=' 1(numerical feature) for 'is'(categorical feature) */
/*! \brief Decision type, 0 for '<='(numerical feature), 1 for 'is'(categorical feature) */
std::vector<int8_t> decision_type_;
/*! \brief A non-leaf node's split gain */
std::vector<double> split_gain_;
@@ -184,9 +184,9 @@ class Tree {
std::vector<double> leaf_value_;
/*! \brief DataCount of leaves */
std::vector<data_size_t> leaf_count_;
/*! \brief Output of internal nodes(save internal output for per inference feature importance calc) */
/*! \brief Output of non-leaf nodes */
std::vector<double> internal_value_;
/*! \brief DataCount of internal nodes */
/*! \brief DataCount of non-leaf nodes */
std::vector<data_size_t> internal_count_;
/*! \brief Depth for leaves */
std::vector<int> leaf_depth_;
@@ -208,8 +208,8 @@ inline int Tree::GetLeaf(const std::vector<std::unique_ptr<BinIterator>>& iterat
int node = 0;
while (node >= 0) {
if (inner_decision_funs[decision_type_[node]](
iterators[split_feature_[node]]->Get(data_idx),
threshold_in_bin_[node])) {
iterators[split_feature_[node]]->Get(data_idx),
threshold_in_bin_[node])) {
node = left_child_[node];
} else {
node = right_child_[node];
@@ -222,8 +222,8 @@ inline int Tree::GetLeaf(const double* feature_values) const {
int node = 0;
while (node >= 0) {
if (decision_funs[decision_type_[node]](
feature_values[split_feature_real_[node]],
threshold_[node])) {
feature_values[split_feature_real_[node]],
threshold_[node])) {
node = left_child_[node];
} else {
node = right_child_[node];