Skip to content

Commit

Permalink
I improved the way calls to the separation oracle get dispatched to the
Browse files Browse the repository at this point in the history
thread_pool.  Previously, each separation oracle call was dispatched
to a thread individually.  This is inefficient when there are a whole
lot of samples (and thus separation oracle calls which need to be made).
So now entire batches of separation oracle calls are dispatched to
each thread.  This minimizes the thread switching and synchronization
overhead.
  • Loading branch information
davisking committed Jun 11, 2011
1 parent 1b05c23 commit 84daff6
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 42 deletions.
62 changes: 48 additions & 14 deletions dlib/svm/structural_svm_distributed.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,14 @@ namespace dlib
data.loss = 0;

data.num = problem.get_num_samples();

// how many samples to process in a single task (aim for 100 jobs per thread)
const long block_size = std::max<long>(1, data.num / (1+tp.num_threads_in_pool()*100));

binder b(*this, req, data);
for (long i = 0; i < data.num; ++i)
for (long i = 0; i < data.num; i+=block_size)
{
tp.add_task(b, &binder::call_oracle, i);
tp.add_task(b, &binder::call_oracle, i, std::min(i + block_size, data.num));
}
tp.wait_for_all_tasks();

Expand All @@ -227,20 +231,50 @@ namespace dlib
) : self(self_), req(req_), data(data_) {}

void call_oracle (
long i
long begin,
long end
)
{
scalar_type loss;
feature_vector_type ftemp;
self.cache[i].separation_oracle_cached(req.skip_cache,
req.cur_risk_lower_bound,
req.current_solution,
loss,
ftemp);

auto_mutex lock(self.accum_mutex);
data.loss += loss;
sparse_vector::add_to(data.subgradient, ftemp);
// If we are only going to call the separation oracle once then
// don't run the slightly more complex for loop version of this code.
if (end-begin <= 1)
{
scalar_type loss;
feature_vector_type ftemp;
self.cache[begin].separation_oracle_cached(req.skip_cache,
req.cur_risk_lower_bound,
req.current_solution,
loss,
ftemp);

auto_mutex lock(self.accum_mutex);
data.loss += loss;
sparse_vector::add_to(data.subgradient, ftemp);
}
else
{
scalar_type loss = 0;
matrix_type faccum(data.subgradient.size(),1);
faccum = 0;

feature_vector_type ftemp;

for (long i = begin; i < end; ++i)
{
scalar_type loss_temp;
self.cache[i].separation_oracle_cached(req.skip_cache,
req.cur_risk_lower_bound,
req.current_solution,
loss_temp,
ftemp);
loss += loss_temp;
sparse_vector::add_to(faccum, ftemp);
}

auto_mutex lock(self.accum_mutex);
data.loss += loss;
sparse_vector::add_to(data.subgradient, faccum);
}
}

const node_type& self;
Expand Down
50 changes: 40 additions & 10 deletions dlib/svm/structural_svm_problem_threaded.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,42 @@ namespace dlib
) : self(self_), w(w_), subgradient(subgradient_), total_loss(total_loss_) {}

void call_oracle (
long i
long begin,
long end
)
{
scalar_type loss;
feature_vector_type ftemp;
self.separation_oracle_cached(i, w, loss, ftemp);

auto_mutex lock(self.accum_mutex);
total_loss += loss;
sparse_vector::add_to(subgradient, ftemp);
// If we are only going to call the separation oracle once then
// don't run the slightly more complex for loop version of this code.
if (end-begin <= 1)
{
scalar_type loss;
feature_vector_type ftemp;
self.separation_oracle_cached(begin, w, loss, ftemp);

auto_mutex lock(self.accum_mutex);
total_loss += loss;
sparse_vector::add_to(subgradient, ftemp);
}
else
{
scalar_type loss = 0;
matrix_type faccum(subgradient.size(),1);
faccum = 0;

feature_vector_type ftemp;

for (long i = begin; i < end; ++i)
{
scalar_type loss_temp;
self.separation_oracle_cached(i, w, loss_temp, ftemp);
loss += loss_temp;
sparse_vector::add_to(faccum, ftemp);
}

auto_mutex lock(self.accum_mutex);
total_loss += loss;
sparse_vector::add_to(subgradient, faccum);
}
}

const structural_svm_problem_threaded& self;
Expand All @@ -76,10 +102,14 @@ namespace dlib
) const
{
const long num = this->get_num_samples();

// how many samples to process in a single task (aim for 100 jobs per thread)
const long block_size = std::max<long>(1, num / (1+tp.num_threads_in_pool()*100));

binder b(*this, w, subgradient, total_loss);
for (long i = 0; i < num; ++i)
for (long i = 0; i < num; i+=block_size)
{
tp.add_task(b, &binder::call_oracle, i);
tp.add_task(b, &binder::call_oracle, i, std::min(i+block_size, num));
}
tp.wait_for_all_tasks();
}
Expand Down
55 changes: 37 additions & 18 deletions dlib/test/svm_struct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,15 +486,16 @@ namespace

void make_dataset (
std::vector<sample_type>& samples,
std::vector<scalar_type>& labels
std::vector<scalar_type>& labels,
int num
)
{
samples.clear();
labels.clear();
dlib::rand rnd;
static dlib::rand rnd;
for (int i = 0; i < 10; ++i)
{
for (int j = 0; j < 100; ++j)
for (int j = 0; j < num; ++j)
{
sample_type samp;
samp = 0;
Expand All @@ -517,7 +518,10 @@ namespace
"Runs tests on the structural svm components.")
{}

void perform_test (
void run_test (
const std::vector<sample_type>& samples,
const std::vector<scalar_type>& labels,
const double true_obj
)
{
typedef linear_kernel<sample_type> kernel_type;
Expand All @@ -530,10 +534,6 @@ namespace
trainer1.set_epsilon(1e-4);
trainer1.set_c(10);

std::vector<sample_type> samples;
std::vector<scalar_type> labels;
make_dataset(samples, labels);


multiclass_linear_decision_function<kernel_type,double> df1, df2, df3, df4, df5;
double obj1, obj2, obj3, obj4, obj5;
Expand Down Expand Up @@ -561,11 +561,11 @@ namespace
DLIB_TEST(std::abs(obj1 - obj3) < 1e-2);
DLIB_TEST(std::abs(obj1 - obj4) < 1e-2);
DLIB_TEST(std::abs(obj1 - obj5) < 1e-2);
DLIB_TEST(std::abs(obj1 - 1.155) < 1e-2);
DLIB_TEST(std::abs(obj2 - 1.155) < 1e-2);
DLIB_TEST(std::abs(obj3 - 1.155) < 1e-2);
DLIB_TEST(std::abs(obj4 - 1.155) < 1e-2);
DLIB_TEST(std::abs(obj5 - 1.155) < 1e-2);
DLIB_TEST(std::abs(obj1 - true_obj) < 1e-2);
DLIB_TEST(std::abs(obj2 - true_obj) < 1e-2);
DLIB_TEST(std::abs(obj3 - true_obj) < 1e-2);
DLIB_TEST(std::abs(obj4 - true_obj) < 1e-2);
DLIB_TEST(std::abs(obj5 - true_obj) < 1e-2);

dlog << LINFO << "weight error: "<< max(abs(df1.weights - df2.weights));
dlog << LINFO << "weight error: "<< max(abs(df1.weights - df3.weights));
Expand All @@ -589,27 +589,46 @@ namespace
matrix<double> res = test_multiclass_decision_function(df1, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
DLIB_TEST(sum(diag(res)) == samples.size());

res = test_multiclass_decision_function(df2, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
DLIB_TEST(sum(diag(res)) == samples.size());

res = test_multiclass_decision_function(df3, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
DLIB_TEST(sum(diag(res)) == samples.size());

res = test_multiclass_decision_function(df4, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
DLIB_TEST(sum(diag(res)) == samples.size());

res = test_multiclass_decision_function(df5, samples, labels);
dlog << LINFO << res;
dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res);
DLIB_TEST(sum(diag(res)) == 1000);
DLIB_TEST(sum(diag(res)) == samples.size());
}

void perform_test (
)
{
std::vector<sample_type> samples;
std::vector<scalar_type> labels;

dlog << LINFO << "test with 100 samples per class";
make_dataset(samples, labels, 100);
run_test(samples, labels, 1.155);

dlog << LINFO << "test with 1 sample per class";
make_dataset(samples, labels, 1);
run_test(samples, labels, 0.251);

dlog << LINFO << "test with 2 sample per class";
make_dataset(samples, labels, 2);
run_test(samples, labels, 0.444);
}
} a;

Expand Down

0 comments on commit 84daff6

Please sign in to comment.