Skip to content
This repository has been archived by the owner on Jan 26, 2024. It is now read-only.

Commit

Permalink
Add CLI benchmark device selector (#5)
Browse files Browse the repository at this point in the history
* add command line device selection to benchmarks

* clean up device type selection, and run clang-format over all benchmark files

* removed unnecessary debugging print statements from cli device selector

* fixed failure to select device if no command line argument given

* apply fixes based on feedback from PR #5
  • Loading branch information
Adam Harries authored and Ruyk committed Oct 25, 2016
1 parent ec4c658 commit 04733fb
Show file tree
Hide file tree
Showing 15 changed files with 202 additions and 55 deletions.
5 changes: 3 additions & 2 deletions benchmarks/basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,16 @@
using namespace sycl::helpers;

benchmark<>::time_units_t benchmark_sort(const unsigned numReps,
const unsigned num_elems) {
const unsigned num_elems,
const cli_device_selector cds) {
std::vector<int> v1;

for (int i = num_elems; i > 0; i--) {
v1.push_back(i);
}

auto mysort = [&]() {
cl::sycl::queue q;
cl::sycl::queue q(cds);
sycl::sycl_execution_policy<class SortAlgorithm1> snp(q);
std::experimental::parallel::sort(snp, begin(v1), end(v1));
};
Expand Down
43 changes: 41 additions & 2 deletions benchmarks/benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <string>
#include <iostream>
#include <regex>
#include "cli_device_selector.hpp"

/**
* output_type
Expand All @@ -43,6 +44,8 @@ enum class output_type {
struct benchmark_arguments {
std::string program_name;
output_type requestedOutput;
std::string device_vendor;
std::string device_type;
bool validProgramOptions;

void usage() {
Expand All @@ -53,6 +56,11 @@ struct benchmark_arguments {
<< std::endl;
std::cout << " - CSV : Output to a CSV file " << std::endl;
std::cout << " - STDOUT: Output to stdout (default) " << std::endl;
std::cout << " --device DEVICE" << std::endl;
std::cout
<< " Select a device (best effort) for running the benchmark."
<< std::endl;
std::cout << " e.g. intel:cpu, amd:gpu etc" << std::endl;
}

benchmark_arguments(int argc, char** argv)
Expand All @@ -61,6 +69,7 @@ struct benchmark_arguments {
validProgramOptions(true) {
/* Match parameters */
std::regex output_regex("--output");
std::regex device_regex("--device");
/* Check if user has specified any options */
bool match = true;
for (int i = 1; i < argc; i++) {
Expand All @@ -73,7 +82,7 @@ struct benchmark_arguments {
}
// Check for the --output parameter
if (std::regex_match(option, output_regex)) {
if (i + 1 >= argc) {
if ((i + 1) >= argc) {
std::cerr << " Incorrect parameter " << std::endl;
match = false;
break;
Expand All @@ -95,6 +104,35 @@ struct benchmark_arguments {
i++;
}

// Check for the --device parameter
if (std::regex_match(option, device_regex)) {
if ((i + 1) >= argc) {
std::cerr << " Incorrect parameter " << std::endl;
match = false;
break;
}
std::string outputOption = argv[i + 1];
std::transform(outputOption.begin(), outputOption.end(),
outputOption.begin(), ::tolower);
// split the string into tokens on ':'
std::stringstream ss(outputOption);
std::string item;
std::vector<std::string> tokens;
while (std::getline(ss, item, ':')) {
tokens.push_back(item);
}
if (tokens.size() != 2) {
std::cerr << " Incorrect number of arguments to device selector "
<< std::endl;
} else {
device_vendor = tokens[0];
device_type = tokens[1];
matchedAnything = true;
}
// Skip next parameter, since it was the device
i++;
}

// This option is not valid
if (!matchedAnything) {
match = false;
Expand Down Expand Up @@ -176,12 +214,13 @@ struct benchmark {
if (!ba.validProgramOptions) { \
return 1; \
} \
cli_device_selector cds(ba.device_vendor, ba.device_type); \
const unsigned NUM_REPS = REPS; \
const unsigned STEP_SIZE = STEP_SIZE_PARAM; \
const unsigned MAX_ELEMS = STEP_SIZE * (NUM_STEPS); \
for (int nelems = STEP_SIZE; nelems < MAX_ELEMS; nelems *= STEP_SIZE) { \
const std::string short_name = NAME; \
auto time = FUNCTION(NUM_REPS, nelems); \
auto time = FUNCTION(NUM_REPS, nelems, cds); \
benchmark<>::output_data(short_name, nelems, time, ba.requestedOutput); \
} \
}
Expand Down
94 changes: 94 additions & 0 deletions benchmarks/cli_device_selector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/* Copyright (c) 2015 The Khronos Group Inc.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and/or associated documentation files (the
"Materials"), to deal in the Materials without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Materials, and to
permit persons to whom the Materials are furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Materials.
MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
https://www.khronos.org/registry/
THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/

#ifndef __INTEL_CPU_SELECTOR__
#define __INTEL_CPU_SELECTOR__

#include <SYCL/sycl.hpp>
#include <string>
#include <iostream>

/** class cli_device_selector.
* @brief Looks for an INTEL cpu among the available CPUs.
* if it finds an INTEL CPU it will return an 1, otherwise it returns a -1.
*/
class cli_device_selector : public cl::sycl::device_selector {
std::string m_vendor_name;
std::string m_device_type;

static cl::sycl::info::device_type match_device_type(std::string requested) {
if (requested.empty()) return cl::sycl::info::device_type::defaults;
std::transform(requested.begin(), requested.end(), requested.begin(),
::tolower);
if (requested == "gpu") return cl::sycl::info::device_type::gpu;
if (requested == "cpu") return cl::sycl::info::device_type::cpu;
if (requested == "accel") return cl::sycl::info::device_type::accelerator;
if (requested == "*" || requested == "any")
return cl::sycl::info::device_type::all;

return cl::sycl::info::device_type::defaults;
}

public:
cli_device_selector(std::string vendor_name, std::string device_type)
: cl::sycl::device_selector(),
m_vendor_name(vendor_name),
m_device_type(device_type) {}

int operator()(const cl::sycl::device &device) const {
int score = 0;

// Score the device type...
cl::sycl::info::device_type dtype =
device.get_info<cl::sycl::info::device::device_type>();
cl::sycl::info::device_type rtype = match_device_type(m_device_type);
if (rtype == dtype || rtype == cl::sycl::info::device_type::all) {
score += 2;
} else if (rtype == cl::sycl::info::device_type::defaults) {
score += 1;
} else {
score -= 2;
}

// score the vendor name
cl::sycl::platform plat = device.get_platform();
std::string name = plat.template get_info<cl::sycl::info::platform::name>();
std::transform(name.begin(), name.end(), name.begin(), ::tolower);
if (name.find(m_vendor_name) != std::string::npos &&
!m_vendor_name.empty()) {
score += 2;
} else if (m_vendor_name == "*" || m_vendor_name.empty()) {
score += 1;
} else {
score -= 2;
}
return score;
}
};

#endif // __INTEL_CPU_SELECTOR__
5 changes: 3 additions & 2 deletions benchmarks/montecarloPI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ int isInsideCircleFunctor(cl::sycl::float2 p) {
}

benchmark<>::time_units_t benchmark_montecarlo(const unsigned numReps,
const unsigned num_elems) {
const unsigned num_elems,
const cli_device_selector cds) {
// Container for the random points
std::vector<cl::sycl::float2> pointset;
std::srand((unsigned int)std::time(0));
Expand All @@ -59,7 +60,7 @@ benchmark<>::time_units_t benchmark_montecarlo(const unsigned numReps,
}

auto myMontecarlo = [&]() {
cl::sycl::queue q;
cl::sycl::queue q(cds);
sycl::sycl_execution_policy<class MontecarloAlgorithm1> snp(q);
count = std::experimental::parallel::transform_reduce(
snp, pointset.begin(), pointset.end(),
Expand Down
15 changes: 8 additions & 7 deletions benchmarks/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Body {
cl::sycl::cl_float3 pos; // position components
cl::sycl::cl_float3 vel; // velocity components
cl::sycl::cl_float3 acc; // force components
float mass; // mass of the particle
float mass; // mass of the particle

public:
/** generateBody
Expand Down Expand Up @@ -118,7 +118,7 @@ class Body {
* @brief Function that prints the Body attributes to an ostream
* @param os : The output stream
*/
void printToFile(std::ostream& os){
void printToFile(std::ostream& os) {
os << pos.x() << " " << pos.y() << " " << pos.z() << " ";
os << vel.x() << " " << vel.y() << " " << vel.z() << " ";
os << acc.x() << " " << acc.y() << " " << acc.z() << " ";
Expand All @@ -130,7 +130,8 @@ class Body {
* @brief Body Function that executes the SYCL CG of NBODY
*/
benchmark<>::time_units_t benchmark_nbody(const unsigned numReps,
const unsigned N) {
const unsigned N,
const cli_device_selector cds) {
srand(time(NULL));
std::vector<Body> bodies(N);

Expand All @@ -141,7 +142,7 @@ benchmark<>::time_units_t benchmark_nbody(const unsigned numReps,
}
auto mainLoop = [&]() {
auto d_bodies = sycl::helpers::make_buffer(begin(bodies), end(bodies));
cl::sycl::queue q;
cl::sycl::queue q(cds);
// Main loop
auto vectorSize = d_bodies.get_count();
auto f = [vectorSize, &d_bodies](cl::sycl::handler& h) mutable {
Expand All @@ -166,9 +167,9 @@ benchmark<>::time_units_t benchmark_nbody(const unsigned numReps,
sycl::sycl_execution_policy<class UpdateAlgorithm> snp2(q);
std::experimental::parallel::for_each(snp2, begin(d_bodies), end(d_bodies),
[=](Body& body) {
body.update();
return body;
}); // main loop
body.update();
return body;
}); // main loop

q.wait_and_throw();
};
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/std_foreach_saxpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
#include "benchmark.h"

benchmark<>::time_units_t benchmark_foreach(const unsigned numReps,
const unsigned num_elems) {
const unsigned num_elems,
const cli_device_selector cds) {
std::vector<float> v1;

for (int i = num_elems; i > 0; i--) {
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/std_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
#include "benchmark.h"

benchmark<>::time_units_t benchmark_sort(const unsigned numReps,
const unsigned num_elems) {
const unsigned num_elems,
const cli_device_selector cds) {
std::vector<int> v1;

for (int i = num_elems; i > 0; i--) {
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/std_transform_saxpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
#include "benchmark.h"

benchmark<>::time_units_t benchmark_transform(const unsigned numReps,
const unsigned num_elems) {
const unsigned num_elems,
const cli_device_selector cds) {
std::vector<float> v1;
std::vector<float> v2;
std::vector<float> res;
Expand Down
18 changes: 10 additions & 8 deletions benchmarks/sycl_exclusive_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,27 @@ using namespace sycl::helpers;
/** benchmark_inclusive_scan
* @brief Body Function that executes the SYCL CG of exclusive_scan
*/
benchmark<>::time_units_t benchmark_exclusive_scan(const unsigned numReps,
const unsigned num_elems) {
benchmark<>::time_units_t benchmark_exclusive_scan(
const unsigned numReps, const unsigned num_elems,
const cli_device_selector cds) {
std::vector<int> v1;

for (int i = num_elems; i > 0; i--) {
v1.push_back(i);
}

auto exclusive_scan = [&]() {
cl::sycl::queue q;
cl::sycl::queue q(cds);
sycl::sycl_execution_policy<class ExclusiveScanAlgorithm1> snp(q);
std::experimental::parallel::exclusive_scan(snp, begin(v1), end(v1),
begin(v1), 0, [=](int x, int y){ return x + y; });
std::experimental::parallel::exclusive_scan(
snp, begin(v1), end(v1), begin(v1), 0,
[=](int x, int y) { return x + y; });
};

auto time = benchmark<>::duration(
numReps, exclusive_scan);
auto time = benchmark<>::duration(numReps, exclusive_scan);

return time;
}

BENCHMARK_MAIN("BENCH_SYCL_EXCLUSIVE_SCAN", benchmark_exclusive_scan, 2u, 33554432u, 1);
BENCHMARK_MAIN("BENCH_SYCL_EXCLUSIVE_SCAN", benchmark_exclusive_scan, 2u,
33554432u, 1);
7 changes: 4 additions & 3 deletions benchmarks/sycl_foreach_saxpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,16 @@
using namespace sycl::helpers;

benchmark<>::time_units_t benchmark_foreach(const unsigned numReps,
const unsigned num_elems) {
const unsigned num_elems,
const cli_device_selector cds) {
std::vector<int> v1;

for (int i = num_elems; i > 0; i--) {
v1.push_back(i);
}

cl::sycl::queue q;
auto myforeach = [&]() {
cl::sycl::queue q(cds);
auto myforeach = [&]() {
sycl::sycl_execution_policy<class ForEachAlgorithm1> snp(q);
std::experimental::parallel::for_each(
snp, begin(v1), end(v1), [=](float val) { return val + val * val; });
Expand Down
Loading

0 comments on commit 04733fb

Please sign in to comment.