forked from BVLC/caffe
-
Notifications
You must be signed in to change notification settings - Fork 22
/
parallel.hpp
121 lines (98 loc) · 2.86 KB
/
parallel.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#ifndef CAFFE_PARALLEL_HPP_
#define CAFFE_PARALLEL_HPP_
#include <boost/date_time/posix_time/posix_time.hpp>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/solver.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/util/blocking_queue.hpp"
namespace caffe {
// Represents a net parameters. Once a net is created, its parameter buffers can
// be replaced by ones from Params, to allow parallelization. Params ensures
// parameters are allocated in one consecutive array.
template<typename Dtype>
class Params {
public:
explicit Params(shared_ptr<Solver<Dtype> > root_solver);
virtual ~Params() {
}
inline size_t size() const {
return size_;
}
inline Dtype* data() const {
return data_;
}
inline Dtype* diff() const {
return diff_;
}
protected:
const size_t size_; // Size of buffers
Dtype* data_; // Network parameters
Dtype* diff_; // Gradient
DISABLE_COPY_AND_ASSIGN(Params);
};
// Params stored in GPU memory.
template<typename Dtype>
class GPUParams : public Params<Dtype> {
public:
GPUParams(shared_ptr<Solver<Dtype> > root_solver, int device);
virtual ~GPUParams();
void configure(Solver<Dtype>* solver) const;
protected:
using Params<Dtype>::size_;
using Params<Dtype>::data_;
using Params<Dtype>::diff_;
};
class DevicePair {
public:
DevicePair(int parent, int device)
: parent_(parent),
device_(device) {
}
inline int parent() {
return parent_;
}
inline int device() {
return device_;
}
// Group GPUs in pairs, by proximity depending on machine's topology
static void compute(const vector<int> devices, vector<DevicePair>* pairs);
protected:
int parent_;
int device_;
};
// Synchronous data parallelism using map-reduce between local GPUs.
template<typename Dtype>
class P2PSync : public GPUParams<Dtype>, public Solver<Dtype>::Callback,
public InternalThread {
public:
explicit P2PSync(shared_ptr<Solver<Dtype> > root_solver,
P2PSync<Dtype>* parent, const SolverParameter& param);
virtual ~P2PSync();
inline const shared_ptr<Solver<Dtype> >& solver() const {
return solver_;
}
void Run(const vector<int>& gpus);
void Prepare(const vector<int>& gpus,
vector<shared_ptr<P2PSync<Dtype> > >* syncs);
inline const int initial_iter() const { return initial_iter_; }
protected:
void on_start();
void on_gradients_ready();
void InternalThreadEntry();
P2PSync<Dtype>* parent_;
vector<P2PSync<Dtype>*> children_;
BlockingQueue<P2PSync<Dtype>*> queue_;
const int initial_iter_;
Dtype* parent_grads_;
shared_ptr<Solver<Dtype> > solver_;
using Params<Dtype>::size_;
using Params<Dtype>::data_;
using Params<Dtype>::diff_;
};
} // namespace caffe
#endif