-
Notifications
You must be signed in to change notification settings - Fork 43
/
Copy pathroundwiselog.hpp
77 lines (71 loc) · 2.21 KB
/
roundwiselog.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#pragma once
#include "util.hpp"
#include "../arm/arm.hpp"
#include "../policy/policy.hpp"
namespace bandit{
class RoundwiseLog{
public:
uint K, P, T, run;
std::vector<std::vector<double> > roundwiseRewards;
std::vector<std::vector<double> > roundwiseRegrets;
RoundwiseLog(uint K, uint P, uint T): K(K), P(P), T(T) {
roundwiseRewards = std::vector<std::vector<double> >(P, std::vector<double>(T, 0.0));
roundwiseRegrets = std::vector<std::vector<double> >(P, std::vector<double>(T, 0.0));
run=0;
}
//start new run
void startRun(){
run+=1;
}
//policy p at round t chose arm k and received reward r
void record(uint p, uint t, uint k, double r, double regretDelta){
roundwiseRewards[p][t]+=r;
roundwiseRegrets[p][t]+=regretDelta;
}
};
class RoundwiseLogWriter{
public:
static void logWrite(RoundwiseLog &log,
std::vector<std::string> armNames, std::vector<std::string> policyNames,
uint T, std::string filename, bool rewardPlot=true, bool regretPlot=true){
const uint K = armNames.size();
const uint P = policyNames.size();
std::ofstream ofs( filename );
ofs << "#averaged result over " << log.run << " trials" << std::endl;
for(uint i=0;i<K;++i){
ofs << "#arm" << i << " " << armNames[i] << std::endl;
}
for(uint p=0;p<P;++p){
ofs << "#policy " << p << " " << policyNames[p] << std::endl;
}
ofs.setf(std::ios::fixed, std::ios::floatfield);
ofs << "#results:" << std::endl;
ofs << "#T" ;
for(uint p=0;p<P;++p){
if(rewardPlot){
ofs << " reward" << p;
}
if(regretPlot){
ofs << " regret" << p ;
}
}
ofs << std::endl;
std::vector<double> cumulatedRewards(P, 0.0);
std::vector<double> cumulatedRegrets(P, 0.0);
for(uint t=0;t<T;++t){
ofs << (t+1);
for(uint p=0;p<P;++p){
cumulatedRewards[p]+=log.roundwiseRewards[p][t];
cumulatedRegrets[p]+=log.roundwiseRegrets[p][t];
if(rewardPlot){
ofs << std::setprecision(2) << " " << cumulatedRewards[p]/log.run;
}
if(regretPlot){
ofs << std::setprecision(2) << " " << cumulatedRegrets[p]/log.run ;
}
}
ofs << std::endl;
}
}
};
} //namespace