Skip to content

Commit

Permalink
[Core] Make parallel algorithms match C++ Parallelism TS.
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D33016

llvm-svn: 302613
  • Loading branch information
Zachary Turner committed May 10, 2017
1 parent a09ff59 commit 092c767
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 76 deletions.
2 changes: 1 addition & 1 deletion lld/COFF/ICF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) {
// Split sections into 256 shards and call Fn in parallel.
size_t NumShards = 256;
size_t Step = Chunks.size() / NumShards;
parallel_for(size_t(0), NumShards, [&](size_t I) {
for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) {
forEachClassRange(I * Step, (I + 1) * Step, Fn);
});
forEachClassRange(Step * NumShards, Chunks.size(), Fn);
Expand Down
2 changes: 1 addition & 1 deletion lld/COFF/MapFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ static SymbolMapTy getSectionSyms(ArrayRef<DefinedRegular *> Syms) {
static DenseMap<DefinedRegular *, std::string>
getSymbolStrings(ArrayRef<DefinedRegular *> Syms) {
std::vector<std::string> Str(Syms.size());
parallel_for((size_t)0, Syms.size(), [&](size_t I) {
for_each_n(parallel::par, (size_t)0, Syms.size(), [&](size_t I) {
raw_string_ostream OS(Str[I]);
writeHeader(OS, Syms[I]->getRVA(), 0, 0);
OS << indent(2) << toString(*Syms[I]);
Expand Down
14 changes: 6 additions & 8 deletions lld/COFF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -745,8 +745,8 @@ void Writer::writeSections() {
// ADD instructions).
if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE)
memset(SecBuf, 0xCC, Sec->getRawSize());
parallel_for_each(Sec->getChunks().begin(), Sec->getChunks().end(),
[&](Chunk *C) { C->writeTo(SecBuf); });
for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(),
[&](Chunk *C) { C->writeTo(SecBuf); });
}
}

Expand All @@ -760,16 +760,14 @@ void Writer::sortExceptionTable() {
uint8_t *End = Begin + Sec->getVirtualSize();
if (Config->Machine == AMD64) {
struct Entry { ulittle32_t Begin, End, Unwind; };
parallel_sort(
(Entry *)Begin, (Entry *)End,
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
sort(parallel::par, (Entry *)Begin, (Entry *)End,
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
return;
}
if (Config->Machine == ARMNT) {
struct Entry { ulittle32_t Begin, Unwind; };
parallel_sort(
(Entry *)Begin, (Entry *)End,
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
sort(parallel::par, (Entry *)Begin, (Entry *)End,
[](const Entry &A, const Entry &B) { return A.Begin < B.Begin; });
return;
}
errs() << "warning: don't know how to handle .pdata.\n";
Expand Down
14 changes: 6 additions & 8 deletions lld/ELF/Threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,17 @@ namespace elf {
template <class IterTy, class FuncTy>
void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) {
if (Config->Threads)
parallel_for_each(Begin, End, Fn);
for_each(parallel::par, Begin, End, Fn);
else
std::for_each(Begin, End, Fn);
for_each(parallel::seq, Begin, End, Fn);
}

inline void parallelFor(size_t Begin, size_t End,
std::function<void(size_t)> Fn) {
if (Config->Threads) {
parallel_for(Begin, End, Fn);
} else {
for (size_t I = Begin; I < End; ++I)
Fn(I);
}
if (Config->Threads)
for_each_n(parallel::par, Begin, End, Fn);
else
for_each_n(parallel::seq, Begin, End, Fn);
}
}
}
Expand Down
147 changes: 95 additions & 52 deletions lld/include/lld/Core/Parallel.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

#include "lld/Core/LLVM.h"
#include "lld/Core/TaskGroup.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/MathExtras.h"

#include <algorithm>

Expand All @@ -24,25 +25,40 @@

namespace lld {

#if !LLVM_ENABLE_THREADS
template <class RandomAccessIterator, class Comparator>
void parallel_sort(
RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp = std::less<
typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
std::sort(Start, End, Comp);
}
#elif defined(_MSC_VER)
// Use ppl parallel_sort on Windows.
namespace parallel {
struct sequential_execution_policy {};
struct parallel_execution_policy {};

template <typename T>
struct is_execution_policy
: public std::integral_constant<
bool, llvm::is_one_of<T, sequential_execution_policy,
parallel_execution_policy>::value> {};

constexpr sequential_execution_policy seq{};
constexpr parallel_execution_policy par{};

#if LLVM_ENABLE_THREADS

namespace detail {

#if defined(_MSC_VER)
template <class RandomAccessIterator, class Comparator>
void parallel_sort(
RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp = std::less<
typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp) {
concurrency::parallel_sort(Start, End, Comp);
}
template <class IterTy, class FuncTy>
void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
concurrency::parallel_for_each(Begin, End, Fn);
}

template <class IndexTy, class FuncTy>
void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
concurrency::parallel_for(Begin, End, Fn);
}

#else
namespace detail {
const ptrdiff_t MinParallelSize = 1024;

/// \brief Inclusive median.
Expand Down Expand Up @@ -83,46 +99,15 @@ void parallel_quick_sort(RandomAccessIterator Start, RandomAccessIterator End,
});
parallel_quick_sort(Pivot + 1, End, Comp, TG, Depth - 1);
}
}

template <class RandomAccessIterator, class Comparator>
void parallel_sort(
RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp = std::less<
typename std::iterator_traits<RandomAccessIterator>::value_type>()) {
void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp) {
TaskGroup TG;
detail::parallel_quick_sort(Start, End, Comp, TG,
llvm::Log2_64(std::distance(Start, End)) + 1);
}
#endif

template <class T> void parallel_sort(T *Start, T *End) {
parallel_sort(Start, End, std::less<T>());
parallel_quick_sort(Start, End, Comp, TG,
llvm::Log2_64(std::distance(Start, End)) + 1);
}

#if !LLVM_ENABLE_THREADS
template <class IterTy, class FuncTy>
void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
std::for_each(Begin, End, Fn);
}

template <class IndexTy, class FuncTy>
void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
for (IndexTy I = Begin; I != End; ++I)
Fn(I);
}
#elif defined(_MSC_VER)
// Use ppl parallel_for_each on Windows.
template <class IterTy, class FuncTy>
void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
concurrency::parallel_for_each(Begin, End, Fn);
}

template <class IndexTy, class FuncTy>
void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
concurrency::parallel_for(Begin, End, Fn);
}
#else
template <class IterTy, class FuncTy>
void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
// TaskGroup has a relatively high overhead, so we want to reduce
Expand All @@ -142,7 +127,7 @@ void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
}

template <class IndexTy, class FuncTy>
void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
ptrdiff_t TaskSize = (End - Begin) / 1024;
if (TaskSize == 0)
TaskSize = 1;
Expand All @@ -160,7 +145,65 @@ void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) {
Fn(J);
});
}

#endif

template <typename Iter>
using DefComparator =
std::less<typename std::iterator_traits<Iter>::value_type>;

} // namespace detail
#endif

// sequential algorithm implementations.
template <class Policy, class RandomAccessIterator,
class Comparator = detail::DefComparator<RandomAccessIterator>>
void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End,
const Comparator &Comp = Comparator()) {
static_assert(is_execution_policy<Policy>::value,
"Invalid execution policy!");
std::sort(Start, End, Comp);
}

template <class Policy, class IterTy, class FuncTy>
void for_each(Policy policy, IterTy Begin, IterTy End, FuncTy Fn) {
static_assert(is_execution_policy<Policy>::value,
"Invalid execution policy!");
std::for_each(Begin, End, Fn);
}

template <class Policy, class IndexTy, class FuncTy>
void for_each_n(Policy policy, IndexTy Begin, IndexTy End, FuncTy Fn) {
static_assert(is_execution_policy<Policy>::value,
"Invalid execution policy!");
for (IndexTy I = Begin; I != End; ++I)
Fn(I);
}

// Parallel algorithm implementations, only available when LLVM_ENABLE_THREADS
// is true.
#if defined(LLVM_ENABLE_THREADS)
template <class RandomAccessIterator,
class Comparator = detail::DefComparator<RandomAccessIterator>>
void sort(parallel_execution_policy policy, RandomAccessIterator Start,
RandomAccessIterator End, const Comparator &Comp = Comparator()) {
detail::parallel_sort(Start, End, Comp);
}

template <class IterTy, class FuncTy>
void for_each(parallel_execution_policy policy, IterTy Begin, IterTy End,
FuncTy Fn) {
detail::parallel_for_each(Begin, End, Fn);
}

template <class IndexTy, class FuncTy>
void for_each_n(parallel_execution_policy policy, IndexTy Begin, IndexTy End,
FuncTy Fn) {
detail::parallel_for_each_n(Begin, End, Fn);
}
#endif

} // namespace parallel
} // End namespace lld

#endif // LLD_CORE_PARALLEL_H
8 changes: 4 additions & 4 deletions lld/lib/ReaderWriter/MachO/LayoutPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,10 +461,10 @@ llvm::Error LayoutPass::perform(SimpleFile &mergedFile) {
});

std::vector<LayoutPass::SortKey> vec = decorate(atomRange);
parallel_sort(vec.begin(), vec.end(),
[&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
return compareAtoms(l, r, _customSorter);
});
sort(parallel::par, vec.begin(), vec.end(),
[&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
return compareAtoms(l, r, _customSorter);
});
DEBUG(checkTransitivity(vec, _customSorter));
undecorate(atomRange, vec);

Expand Down
4 changes: 2 additions & 2 deletions lld/unittests/CoreTests/ParallelTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ TEST(Parallel, sort) {
for (auto &i : array)
i = dist(randEngine);

lld::parallel_sort(std::begin(array), std::end(array));
sort(lld::parallel::par, std::begin(array), std::end(array));
ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array)));
}

Expand All @@ -36,7 +36,7 @@ TEST(Parallel, parallel_for) {
// writing.
uint32_t range[2050];
std::fill(range, range + 2050, 1);
lld::parallel_for(0, 2049, [&range](size_t I) { ++range[I]; });
for_each_n(lld::parallel::par, 0, 2049, [&range](size_t I) { ++range[I]; });

uint32_t expected[2049];
std::fill(expected, expected + 2049, 2);
Expand Down

0 comments on commit 092c767

Please sign in to comment.