Skip to content

Commit

Permalink
examples in base and datadriven fixed; BatchLearner fixed; new Learne…
Browse files Browse the repository at this point in the history
…rSGDE added -> does sparse grid density estimation

git-svn-id: https://ipvs.informatik.uni-stuttgart.de/SGpp/repos/trunk@4679 4eea3252-f0fb-4393-894d-40516dce545b
  • Loading branch information
Fabian Franzelin committed Jul 31, 2015
1 parent eedfe93 commit 2239567
Show file tree
Hide file tree
Showing 17 changed files with 774 additions and 82 deletions.
4 changes: 2 additions & 2 deletions base/src/sgpp/base/operation/hash/OperationSecondMoment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace SGPP {
namespace base {

/**
* This class provides the first moment of a sparse grid function
* This class provides the second moment of a sparse grid function
*/
class OperationSecondMoment {
public:
Expand All @@ -41,4 +41,4 @@ namespace SGPP {
}
}

#endif /* OPERATIONSECONDMOMENT_HPP */
#endif /* OPERATIONSECONDMOMENT_HPP */
30 changes: 11 additions & 19 deletions datadriven/examples/batchLearnerExample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,18 @@
#include <sgpp/datadriven/application/BatchConfiguration.hpp>
#include <sgpp/datadriven/tools/Dataset.hpp>
#include <sgpp/datadriven/tools/ARFFTools.hpp>
#include <sgpp/globaldef.hpp>

/**
* This programm demonstrates the usage of the BatchLearner class. After the parameters are set, the method trainBatch() is called until the end of the file has been reached.
*/

using namespace sg::base;
using namespace sg::datadriven;
using namespace SGPP::base;
using namespace SGPP::datadriven;
using namespace std;


int main (int argc, char** args) {
cout << "parameters: bs(batch size), ts (test size), input, mode(weighting), arg(for weighting), level, pts(to refine every refinement), ref(refine every xth batch, 0=never)" << endl;

std::map<string, string> argsMap;

for (int i = 1; i < argc; i += 2) {
argsMap[args[i]] = args[i + 1];
}


//set variables
sg::base::BatchConfiguration batchConfig;
sg::solver::SLESolverConfiguration solverConfig;
Expand All @@ -41,7 +33,7 @@ int main (int argc, char** args) {

// Set Adaptivity
adaptConfig.maxLevelType_ = false;//not used by BatchLearner
adaptConfig.noPoints_ = std::stoi(argsMap["pts"]);
adaptConfig.noPoints_ = 2;
adaptConfig.numRefinements_ = 1;//not used by BatchLearner
adaptConfig.percent_ = 10.0;//not used by BatchLearner
adaptConfig.threshold_ = 0.001;
Expand All @@ -53,20 +45,20 @@ int main (int argc, char** args) {
solverConfig.type_ = sg::solver::CG;

// Set parameters for the batchLearner
batchConfig.filename = argsMap["input"].c_str();
batchConfig.batchsize = std::stoi(argsMap["bs"]);
batchConfig.filename = "../tests/data/friedman_4d_2000.arff";
batchConfig.batchsize = 500;
batchConfig.samples = 500;
batchConfig.seed = 42;
batchConfig.wMode = std::stoi(argsMap["mode"]);;
batchConfig.wArgument = std::stof(argsMap["arg"]);
batchConfig.refineEvery = std::stoi(argsMap["ref"]);
batchConfig.wMode = 5;
batchConfig.wArgument = 1.0;
batchConfig.refineEvery = 0;
batchConfig.verbose = true;
batchConfig.stack = 0;
batchConfig.testsize = std::stoi(argsMap["ts"]);
batchConfig.testsize = 200;
batchConfig.lambda = 0.0001f;

//set up the grid config
gridConfig.level_ = std::stoi(argsMap["level"]);
gridConfig.level_ = 4;

//init the learner
sg::datadriven::BatchLearner learner(batchConfig, gridConfig, solverConfig, adaptConfig);
Expand Down
75 changes: 75 additions & 0 deletions datadriven/examples/learnerSGDETest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright (C) 2008-today The SG++ project
// This file is part of the SG++ project. For conditions of distribution and
// use, please see the copyright notice provided with SG++ or at
// sgpp.sparsegrids.org

#include <sgpp/datadriven/tools/ARFFTools.hpp>
#include <sgpp/datadriven/application/LearnerSGDE.hpp>
#include <sgpp/base/grid/Grid.hpp>
#include <sgpp/pde/application/RegularizationConfiguration.hpp>
#include <sgpp/datadriven/application/LearnerSGDE.hpp>
#include <sgpp/datadriven/application/GaussianKDE.hpp>
#include <sgpp/globaldef.hpp>

using namespace std;

int main(int argc, char** argv) {
std::string filename = "../tests/data/friedman_4d_2000.arff";

cout << "# loading file: " << filename << endl;
SGPP::datadriven::Dataset dataset = SGPP::datadriven::ARFFTools::readARFF(filename);
SGPP::base::DataMatrix* samples = dataset.getTrainingData();

// configure grid
cout << "# create grid config" << endl;
SGPP::base::RegularGridConfiguration gridConfig;
gridConfig.dim_ = dataset.getDimension();
gridConfig.level_ = 4;
gridConfig.type_ = SGPP::base::GridType::Linear;

// configure adaptive refinement
cout << "# create adaptive refinement config" << endl;
SGPP::base::AdpativityConfiguration adaptConfig;
adaptConfig.numRefinements_ = 0;
adaptConfig.noPoints_ = 10;

// configure solver
cout << "# create solver config" << endl;
SGPP::solver::SLESolverConfiguration solverConfig;
solverConfig.maxIterations_ = 1000;
solverConfig.eps_ = 1e-10;
solverConfig.threshold_ = 1e-10;

// configure regularization
cout << "# create regularization config" << endl;
SGPP::pde::RegularizationConfiguration regularizationConfig;
regularizationConfig.regType_ = SGPP::pde::RegularizationType::Laplace;

// configure learner
cout << "# create learner config" << endl;
SGPP::datadriven::LearnerSGDEConfiguration learnerConfig;
learnerConfig.doCrossValidation_ = true;
learnerConfig.kfold_ = 3;
learnerConfig.lambdaStart_ = 1e-1;
learnerConfig.lambdaEnd_ = 1e-10;
learnerConfig.lambdaSteps_ = 3;
learnerConfig.logScale_ = true;
learnerConfig.shuffle_ = true;
learnerConfig.seed_ = 1234567;
learnerConfig.silent_ = false;

cout << "# creating the learner" << endl;
SGPP::datadriven::LearnerSGDE learner(gridConfig, adaptConfig, solverConfig, regularizationConfig, learnerConfig);
learner.initialize(*samples);

SGPP::datadriven::GaussianKDE kde(*samples);
SGPP::base::DataVector x(learner.getDim());
for (size_t i = 0; i < x.getSize(); i++) {
x[i] = 0.5;
}
cout << "--------------------------------------------------------" << endl;
cout << "pdf_SGDE(x) = " << learner.pdf(x) << " ~ " << kde.pdf(x) << " = pdf_KDE(x)" << endl;
cout << "mean_SGDE(x) = " << learner.mean() << " ~ " << kde.mean() << " = mean_KDE(x)" << endl;
cout << "var_SGDE(x) = " << learner.variance() << " ~ " << kde.variance() << " = var_KDE(x)" << endl;
}

3 changes: 1 addition & 2 deletions datadriven/examples/multTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ void doAllRefinements(SGPP::base::AdpativityConfiguration& adaptConfig,

int main(int argc, char** argv) {

std::string fileName = "friedman2_90000.arff";
// std::string fileName = "debugging.arff";
std::string fileName = "../tests/data/friedman_4d_2000.arff";

uint32_t level = 3;

Expand Down
3 changes: 1 addition & 2 deletions datadriven/examples/multTransposeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ void doAllRefinements(SGPP::base::AdpativityConfiguration& adaptConfig,

int main(int argc, char** argv) {

std::string fileName = "friedman2_90000.arff";
//std::string fileName = "debugging.arff";
std::string fileName = "../tests/data/friedman_4d_2000.arff";

uint32_t level = 3;

Expand Down
3 changes: 1 addition & 2 deletions datadriven/examples/multiEvalPerformance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@

int main(int argc, char** argv) {

// std::string fileName = "friedman_4d_2000.arff";
std::string fileName = "debugging.arff";
std::string fileName = "../tests/data/friedman_4d_2000.arff";

SGPP::datadriven::ARFFTools arffTools;
SGPP::datadriven::Dataset dataset = arffTools.readARFF(fileName);
Expand Down
6 changes: 1 addition & 5 deletions datadriven/examples/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,9 @@

int main(int argc, char** argv) {

// int maxLevel = 9;
int maxLevel = 9;

// std::string fileName = "debugging.arff";
// std::string fileName = "friedman_4d_2000.arff";
std::string fileName = "friedman2_90000.arff";
// std::string fileName = "bigger.arff";
std::string fileName = "../tests/data/friedman_4d_2000.arff";

//sg::base::RegularGridConfiguration gridConfig;
sg::solver::SLESolverConfiguration SLESolverConfigRefine;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,23 @@
#include <string>


//using namespace sg::base;
using namespace std;


namespace SGPP {
namespace base {
/**
* structure to provide parameters for the BatchLearner
*/
struct BatchConfiguration {
string filename;//!< arff-file to be read
std::string filename;//!< arff-file to be read
size_t batchsize;//!< size of one batch
size_t samples;//!< number of samles for the monte carlo sampling (normalization) (0=don't sample) good: 1000
int seed;//!< seed for the sampling
int wMode;//!< number of weighting mode to use x = batch#, y = wArgument: 0 = all batches are equal, 1 = linear (x*y), 2 = pow(y,x), 3 = y/x, 4 = only the last batch counts, 5 = weigh new batch by proportion, but at least y
float wArgument;//!< argument for the weighting method
int wMode;//!< number of weighting mode to use x = batch#, y = wArgument: 0 = all batches are equal, 1 = linear (x*y), 2 = pow(y,x), 3 = y/x, 4 = only the last batch counts, 5 = weight new batch by proportion, but at least y
float_t wArgument;//!< argument for the weighting method
size_t refineEvery;//!< refine every xth batch (0=never)
bool verbose;//!< verbose flag
size_t stack;//!< number of last batches alphavectors to be saved (0=all)
size_t testsize;//!< how many items to test from the data following the batch (0=don't test after learned)
float lambda;//!< lambda for solving
float_t lambda;//!< lambda for solving
};


Expand Down
40 changes: 20 additions & 20 deletions datadriven/src/sgpp/datadriven/application/BatchLearner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#include <sgpp/base/operation/hash/OperationEval.hpp>
#include <sgpp/base/exception/application_exception.hpp>

using namespace sg::base;
using namespace SGPP::base;
using namespace std;


Expand All @@ -61,7 +61,7 @@ namespace SGPP {
throw base::application_exception("BatchLearner: An unsupported SLE solver type was chosen!");

//open file
reader.open(batchConf.filename);
reader.open(batchConf.filename.c_str());

if (!reader) {
cout << "ERROR: file does not exist: " << batchConf.filename << endl;
Expand All @@ -74,7 +74,7 @@ namespace SGPP {
size_t cur_pos = 0;
size_t cur_find = 0;
string cur_value;
double dbl_cur_value;
float_t dbl_cur_value;

DataVector temprow(dimensions);

Expand Down Expand Up @@ -165,9 +165,9 @@ namespace SGPP {

//wMode 5: weigh old alpha with new alpha by occurences
if (batchConf.wMode == 5) {
double k = (double) dataInBatch.at(grid)->getNrows();
double n = (double) occurences.at(grid);
double wNew = max(k / (n + k), (double)batchConf.wArgument);
float_t k = (float_t) dataInBatch.at(grid)->getNrows();
float_t n = (float_t) occurences.at(grid);
float_t wNew = max(k / (n + k), (float_t)batchConf.wArgument);

if (batchConf.verbose)
cout << "old weight: " << 1.0 - wNew << " new weight: " << wNew << endl;
Expand All @@ -191,21 +191,21 @@ namespace SGPP {
}

size_t count = alphaStorage.at(grid).size();//count of old alphas available for calculation
vector<float> factors;
vector<float_t> factors;

//previous alphas exist
//calc factors
float sum = 0.0f;
float_t sum = 0.0f;

for (size_t i = 0; i < count; i++) {
if (batchConf.wMode == 0)
factors.push_back((float)1);//temp: all alphas are equal
factors.push_back((float_t)1);//temp: all alphas are equal
else if (batchConf.wMode == 1)
factors.push_back((float)(i + 1)*batchConf.wArgument); //linear
factors.push_back((float_t)(i + 1)*batchConf.wArgument); //linear
else if (batchConf.wMode == 2)
factors.push_back((float)pow(batchConf.wArgument, (i + 1))); //exp
factors.push_back((float_t)pow(batchConf.wArgument, (i + 1))); //exp
else if (batchConf.wMode == 3)
factors.push_back((float)batchConf.wArgument / (float)(i + 1)); //1/x bzw arg/x
factors.push_back((float_t)batchConf.wArgument / (float_t)(i + 1)); //1/x bzw arg/x
else if (batchConf.wMode != 4 && batchConf.wMode != 5) { //4 and 5 treated elsewhere
cout << "unsupported weighting mode (mode/arg): " << batchConf.wMode << "/" << batchConf.wArgument << endl;
throw 42;
Expand Down Expand Up @@ -238,7 +238,7 @@ namespace SGPP {
//update norm factors
for (auto const& p : grids) {
//for each grid
double evalsum = 0;
float_t evalsum = 0;

for (float x = 0; x < batchConf.samples; x++) {
//generate points per grid
Expand All @@ -250,15 +250,15 @@ namespace SGPP {

//add norm factor
OperationEval* opEval = SGPP::op_factory::createOperationEval(*grids.at(p.first));
double temp = opEval->eval(*alphaVectors.at(p.first), pt);
float_t temp = opEval->eval(*alphaVectors.at(p.first), pt);

if (batchConf.verbose && abs(temp) > 100)
cout << "warning abs>100: " << temp << " for " << pt.toString() << endl;

evalsum += temp;
}

evalsum = evalsum / (double) batchConf.samples;
evalsum = evalsum / (float_t) batchConf.samples;
//update the normFactor
normFactors.at(p.first) = evalsum;

Expand All @@ -276,12 +276,12 @@ namespace SGPP {
testDataset.getRow(i, pt);
//Compute maximum of all density functions:
int max_index = -1;
double max = -1.0f * numeric_limits<double>::max();
float_t max = -1.0f * numeric_limits<float_t>::max();

for (auto const& g : grids) {
SGPP::base::OperationEval* Eval = SGPP::op_factory::createOperationEval(*g.second);
//posterior = likelihood*prior
double res = Eval->eval(*alphaVectors.at(g.first), pt);
float_t res = Eval->eval(*alphaVectors.at(g.first), pt);
delete Eval;

if (batchConf.samples != 0)
Expand Down Expand Up @@ -322,7 +322,7 @@ namespace SGPP {

alphaVectors.insert(std::pair<int, DataVector*>(p.first, new DataVector(grids.at(p.first)->getSize())));
alphaVectors.at(p.first)->setAll(0.0);
normFactors.insert(std::pair<int, float>(p.first, 1));
normFactors.insert(std::pair<int, float_t>(p.first, 1));
}


Expand Down Expand Up @@ -434,8 +434,8 @@ namespace SGPP {
//calc accuracy for this batch and all tests
t_total += (int)result.getSize();
t_correct += correct;
acc_current = (double)(100.0 * correct / (double)result.getSize());
acc_global = (double)(100.0 * t_correct / (double)t_total);
acc_current = (float_t)(100.0 * correct / (float_t)result.getSize());
acc_global = (float_t)(100.0 * t_correct / (float_t)t_total);
//output accuracy
cout << "batch:\t" << acc_current << "% (" << correct << "/" << result.getSize() << ")" << endl;
cout << "total:\t" << acc_global << "% (" << t_correct << "/" << t_total << ")" << endl;
Expand Down
Loading

0 comments on commit 2239567

Please sign in to comment.