Skip to content

Commit

Permalink
Add the ability to pass the return value to the collision detection f…
Browse files Browse the repository at this point in the history
…unctions.
  • Loading branch information
bluescarni committed Jul 16, 2019
1 parent 34afadb commit 0c075c6
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 20 deletions.
23 changes: 17 additions & 6 deletions include/rakau/detail/tree_coll.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ inline auto tree<NDim, F, UInt, MAC>::coll_leaves_permutation() const

template <std::size_t NDim, typename F, typename UInt, mac MAC>
template <bool Ordered, typename It>
inline auto tree<NDim, F, UInt, MAC>::compute_cgraph_impl(It it) const
inline void tree<NDim, F, UInt, MAC>::compute_cgraph_impl(std::vector<tbb::concurrent_vector<size_type>> &cgraph,
It it) const
{
simple_timer st("overall cgraph computation");

Expand All @@ -225,16 +226,28 @@ inline auto tree<NDim, F, UInt, MAC>::compute_cgraph_impl(It it) const
// The vector of additional particles for each leaf node.
std::vector<tbb::concurrent_vector<size_type>> v_add;

// The return value.
std::vector<tbb::concurrent_vector<size_type>> cgraph;

// Prepare storage for cgraph in parallel
// with the v_add computation.
tbb::task_group tg;

tg.run([this, &cgraph]() {
simple_timer st("cgraph prepare");

// Check if the return value is empty.
const auto empty = cgraph.empty();

cgraph.resize(boost::numeric_cast<decltype(cgraph.size())>(m_parts[0].size()));

if (!empty) {
// If the return value was not originally empty,
// we must make sure that all its vectors are cleared
// up before we write into them.
tbb::parallel_for(tbb::blocked_range(cgraph.begin(), cgraph.end()), [](const auto &r) {
for (auto &v : r) {
v.clear();
}
});
}
});

tg.run([this, &clp, &v_add, it, &c_begin, &c_end]() {
Expand Down Expand Up @@ -586,8 +599,6 @@ inline auto tree<NDim, F, UInt, MAC>::compute_cgraph_impl(It it) const
}
});
#endif

return cgraph;
}

} // namespace rakau
Expand Down
26 changes: 21 additions & 5 deletions include/rakau/tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,8 @@ struct morton_decoder<2, std::uint32_t> {
template <std::size_t NDim, typename UInt, bool Clamp = false, typename F>
inline UInt disc_single_coord(const F &x, const F &inv_box_size)
{
// NOTE: this factor is the total number of available discretised
// positions across a single dimension.
constexpr UInt factor = UInt(1) << cbits_v<UInt, NDim>;

// Translate and rescale the coordinate so that -box_size/2 becomes zero
Expand Down Expand Up @@ -3490,18 +3492,32 @@ class tree
private:
auto coll_leaves_permutation() const;
template <bool Ordered, typename It>
auto compute_cgraph_impl(It) const;
void compute_cgraph_impl(std::vector<tbb::concurrent_vector<size_type>> &out, It) const;

public:
template <typename It>
auto compute_cgraph_u(It it) const
std::vector<tbb::concurrent_vector<size_type>> compute_cgraph_u(It it) const
{
return compute_cgraph_impl<false>(it);
std::vector<tbb::concurrent_vector<size_type>> retval;
compute_cgraph_impl<false>(retval, it);
return retval;
}
template <typename It>
std::vector<tbb::concurrent_vector<size_type>> compute_cgraph_o(It it) const
{
std::vector<tbb::concurrent_vector<size_type>> retval;
compute_cgraph_impl<true>(retval, it);
return retval;
}
template <typename It>
void compute_cgraph_u(std::vector<tbb::concurrent_vector<size_type>> &out, It it) const
{
compute_cgraph_impl<false>(out, it);
}
template <typename It>
auto compute_cgraph_o(It it) const
void compute_cgraph_o(std::vector<tbb::concurrent_vector<size_type>> &out, It it) const
{
return compute_cgraph_impl<true>(it);
compute_cgraph_impl<true>(out, it);
}

private:
Expand Down
18 changes: 9 additions & 9 deletions test/coll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ TEST_CASE("compute_cgraph_2d")
aabb_sizes_u[i] = aabb_sizes[t.perm()[i]];
}

// Redo the testing.
cgraph_u = t.compute_cgraph_u(aabb_sizes_u.data());
// Redo the testing, this time with retvals passed in.
t.compute_cgraph_u(cgraph_u, aabb_sizes_u.data());
for (auto i = 0u; i < s; ++i) {
for (auto j = i + 1u; j < s; ++j) {
if (aabb_overlap(xc_u[i], yc_u[i], aabb_sizes_u[i], xc_u[j], yc_u[j], aabb_sizes_u[j])) {
Expand All @@ -180,7 +180,7 @@ TEST_CASE("compute_cgraph_2d")
v.clear();
}

cgraph_o = t.compute_cgraph_o(aabb_sizes.data());
t.compute_cgraph_o(cgraph_o, aabb_sizes.data());
for (auto i = 0u; i < s; ++i) {
for (auto j = i + 1u; j < s; ++j) {
if (aabb_overlap(xc_o[i], yc_o[i], aabb_sizes[i], xc_o[j], yc_o[j], aabb_sizes[j])) {
Expand Down Expand Up @@ -237,12 +237,12 @@ TEST_CASE("compute_cgraph_2d")
// All zero aabb sizes.
std::fill(aabb_sizes.begin(), aabb_sizes.end(), 0.);

cgraph_u = t.compute_cgraph_u(aabb_sizes.data());
t.compute_cgraph_u(cgraph_u, aabb_sizes.data());
for (const auto &c : cgraph_u) {
REQUIRE(c.empty());
}

cgraph_o = t.compute_cgraph_o(aabb_sizes.data());
t.compute_cgraph_o(cgraph_o, aabb_sizes.data());
for (const auto &c : cgraph_o) {
REQUIRE(c.empty());
}
Expand Down Expand Up @@ -356,7 +356,7 @@ TEST_CASE("compute_cgraph_3d")
}

// Redo the testing.
cgraph_u = t.compute_cgraph_u(aabb_sizes_u.data());
t.compute_cgraph_u(cgraph_u, aabb_sizes_u.data());
for (auto i = 0u; i < s; ++i) {
for (auto j = i + 1u; j < s; ++j) {
if (aabb_overlap(xc_u[i], yc_u[i], zc_u[i], aabb_sizes_u[i], xc_u[j], yc_u[j], zc_u[j],
Expand All @@ -372,7 +372,7 @@ TEST_CASE("compute_cgraph_3d")
v.clear();
}

cgraph_o = t.compute_cgraph_o(aabb_sizes.data());
t.compute_cgraph_o(cgraph_o, aabb_sizes.data());
for (auto i = 0u; i < s; ++i) {
for (auto j = i + 1u; j < s; ++j) {
if (aabb_overlap(xc_o[i], yc_o[i], zc_o[i], aabb_sizes[i], xc_o[j], yc_o[j], zc_o[j],
Expand Down Expand Up @@ -432,12 +432,12 @@ TEST_CASE("compute_cgraph_3d")
// All zero aabb sizes.
std::fill(aabb_sizes.begin(), aabb_sizes.end(), 0.);

cgraph_u = t.compute_cgraph_u(aabb_sizes.data());
t.compute_cgraph_u(cgraph_u, aabb_sizes.data());
for (const auto &c : cgraph_u) {
REQUIRE(c.empty());
}

cgraph_o = t.compute_cgraph_o(aabb_sizes.data());
t.compute_cgraph_o(cgraph_o, aabb_sizes.data());
for (const auto &c : cgraph_o) {
REQUIRE(c.empty());
}
Expand Down

0 comments on commit 0c075c6

Please sign in to comment.