examples in base and datadriven fixed; BatchLearner fixed; new Learne…

…rSGDE added -> does sparse grid density estimation git-svn-id: https://ipvs.informatik.uni-stuttgart.de/SGpp/repos/trunk@4679 4eea3252-f0fb-4393-894d-40516dce545b
SGpp · Jul 31, 2015 · 2239567 · 2239567
1 parent eedfe93
commit 2239567
Show file tree

Hide file tree

Showing 17 changed files with 774 additions and 82 deletions.
diff --git a/base/src/sgpp/base/operation/hash/OperationSecondMoment.hpp b/base/src/sgpp/base/operation/hash/OperationSecondMoment.hpp
@@ -15,7 +15,7 @@ namespace SGPP {
   namespace base {
 
     /**
-     * This class provides the first moment of a sparse grid function
+     * This class provides the second moment of a sparse grid function
      */
     class OperationSecondMoment {
       public:
@@ -41,4 +41,4 @@ namespace SGPP {
   }
 }
 
-#endif /* OPERATIONSECONDMOMENT_HPP */
+#endif /* OPERATIONSECONDMOMENT_HPP */
diff --git a/datadriven/examples/batchLearnerExample.cpp b/datadriven/examples/batchLearnerExample.cpp
@@ -13,26 +13,18 @@
 #include <sgpp/datadriven/application/BatchConfiguration.hpp>
 #include <sgpp/datadriven/tools/Dataset.hpp>
 #include <sgpp/datadriven/tools/ARFFTools.hpp>
+#include <sgpp/globaldef.hpp>
 
 /**
  * This programm demonstrates the usage of the BatchLearner class. After the parameters are set, the method trainBatch() is called until the end of the file has been reached.
  */
 
-using namespace sg::base;
-using namespace sg::datadriven;
+using namespace SGPP::base;
+using namespace SGPP::datadriven;
 using namespace std;
 
 
 int main (int argc, char** args) {
-  cout << "parameters: bs(batch size), ts (test size), input, mode(weighting), arg(for weighting), level, pts(to refine every refinement), ref(refine every xth batch, 0=never)" << endl;
-
-  std::map<string, string> argsMap;
-
-  for (int i = 1; i < argc; i += 2) {
-    argsMap[args[i]] = args[i + 1];
-  }
-
-
   //set variables
   sg::base::BatchConfiguration batchConfig;
   sg::solver::SLESolverConfiguration solverConfig;
@@ -41,7 +33,7 @@ int main (int argc, char** args) {
 
   // Set Adaptivity
   adaptConfig.maxLevelType_ = false;//not used by BatchLearner
-  adaptConfig.noPoints_ = std::stoi(argsMap["pts"]);
+  adaptConfig.noPoints_ = 2;
   adaptConfig.numRefinements_ = 1;//not used by BatchLearner
   adaptConfig.percent_ = 10.0;//not used by BatchLearner
   adaptConfig.threshold_ = 0.001;
@@ -53,20 +45,20 @@ int main (int argc, char** args) {
   solverConfig.type_ = sg::solver::CG;
 
   // Set parameters for the batchLearner
-  batchConfig.filename = argsMap["input"].c_str();
-  batchConfig.batchsize = std::stoi(argsMap["bs"]);
+  batchConfig.filename = "../tests/data/friedman_4d_2000.arff";
+  batchConfig.batchsize = 500;
   batchConfig.samples = 500;
   batchConfig.seed = 42;
-  batchConfig.wMode = std::stoi(argsMap["mode"]);;
-  batchConfig.wArgument = std::stof(argsMap["arg"]);
-  batchConfig.refineEvery = std::stoi(argsMap["ref"]);
+  batchConfig.wMode = 5;
+  batchConfig.wArgument = 1.0;
+  batchConfig.refineEvery = 0;
   batchConfig.verbose = true;
   batchConfig.stack = 0;
-  batchConfig.testsize = std::stoi(argsMap["ts"]);
+  batchConfig.testsize = 200;
   batchConfig.lambda = 0.0001f;
 
   //set up the grid config
-  gridConfig.level_ = std::stoi(argsMap["level"]);
+  gridConfig.level_ = 4;
 
   //init the learner
   sg::datadriven::BatchLearner learner(batchConfig, gridConfig, solverConfig, adaptConfig);

diff --git a/datadriven/examples/learnerSGDETest.cpp b/datadriven/examples/learnerSGDETest.cpp
@@ -0,0 +1,75 @@
+// Copyright (C) 2008-today The SG++ project
+// This file is part of the SG++ project. For conditions of distribution and
+// use, please see the copyright notice provided with SG++ or at
+// sgpp.sparsegrids.org
+
+#include <sgpp/datadriven/tools/ARFFTools.hpp>
+#include <sgpp/datadriven/application/LearnerSGDE.hpp>
+#include <sgpp/base/grid/Grid.hpp>
+#include <sgpp/pde/application/RegularizationConfiguration.hpp>
+#include <sgpp/datadriven/application/LearnerSGDE.hpp>
+#include <sgpp/datadriven/application/GaussianKDE.hpp>
+#include <sgpp/globaldef.hpp>
+
+using namespace std;
+
+int main(int argc, char** argv) {
+    std::string filename = "../tests/data/friedman_4d_2000.arff";
+
+    cout << "# loading file: " << filename << endl;
+    SGPP::datadriven::Dataset dataset = SGPP::datadriven::ARFFTools::readARFF(filename);
+    SGPP::base::DataMatrix* samples = dataset.getTrainingData();
+
+    // configure grid
+    cout << "# create grid config" << endl;
+    SGPP::base::RegularGridConfiguration gridConfig;
+    gridConfig.dim_ = dataset.getDimension();
+    gridConfig.level_ = 4;
+    gridConfig.type_ = SGPP::base::GridType::Linear;
+
+    // configure adaptive refinement
+    cout << "# create adaptive refinement config" << endl;
+    SGPP::base::AdpativityConfiguration adaptConfig;
+    adaptConfig.numRefinements_ = 0;
+    adaptConfig.noPoints_ = 10;
+
+    // configure solver
+    cout << "# create solver config" << endl;
+    SGPP::solver::SLESolverConfiguration solverConfig;
+    solverConfig.maxIterations_ = 1000;
+    solverConfig.eps_ = 1e-10;
+    solverConfig.threshold_ = 1e-10;
+
+    // configure regularization
+    cout << "# create regularization config" << endl;
+    SGPP::pde::RegularizationConfiguration regularizationConfig;
+    regularizationConfig.regType_ = SGPP::pde::RegularizationType::Laplace;
+
+    // configure learner
+    cout << "# create learner config" << endl;
+    SGPP::datadriven::LearnerSGDEConfiguration learnerConfig;
+    learnerConfig.doCrossValidation_ = true;
+    learnerConfig.kfold_ = 3;
+    learnerConfig.lambdaStart_ = 1e-1;
+    learnerConfig.lambdaEnd_ = 1e-10;
+    learnerConfig.lambdaSteps_ = 3;
+    learnerConfig.logScale_ = true;
+    learnerConfig.shuffle_ = true;
+    learnerConfig.seed_ = 1234567;
+    learnerConfig.silent_ = false;
+
+    cout << "# creating the learner" << endl;
+    SGPP::datadriven::LearnerSGDE learner(gridConfig, adaptConfig, solverConfig, regularizationConfig, learnerConfig);
+    learner.initialize(*samples);
+
+    SGPP::datadriven::GaussianKDE kde(*samples);
+    SGPP::base::DataVector x(learner.getDim());
+    for (size_t i = 0; i < x.getSize(); i++) {
+        x[i] = 0.5;
+    }
+    cout << "--------------------------------------------------------" << endl;
+    cout << "pdf_SGDE(x) = " << learner.pdf(x) << " ~ " << kde.pdf(x) << " = pdf_KDE(x)" << endl;
+    cout << "mean_SGDE(x) = " << learner.mean() << " ~ " << kde.mean() << " = mean_KDE(x)" << endl;
+    cout << "var_SGDE(x) = " << learner.variance() << " ~ " << kde.variance() << " = var_KDE(x)" << endl;
+}
+
diff --git a/datadriven/examples/multTest.cpp b/datadriven/examples/multTest.cpp
@@ -36,8 +36,7 @@ void doAllRefinements(SGPP::base::AdpativityConfiguration& adaptConfig,
 
 int main(int argc, char** argv) {
 
-  std::string fileName = "friedman2_90000.arff";
-  //  std::string fileName = "debugging.arff";
+  std::string fileName = "../tests/data/friedman_4d_2000.arff";
 
   uint32_t level = 3;
 

diff --git a/datadriven/examples/multTransposeTest.cpp b/datadriven/examples/multTransposeTest.cpp
@@ -42,8 +42,7 @@ void doAllRefinements(SGPP::base::AdpativityConfiguration& adaptConfig,
 
 int main(int argc, char** argv) {
 
-  std::string fileName = "friedman2_90000.arff";
-  //std::string fileName = "debugging.arff";
+  std::string fileName = "../tests/data/friedman_4d_2000.arff";
 
   uint32_t level = 3;
 

diff --git a/datadriven/examples/multiEvalPerformance.cpp b/datadriven/examples/multiEvalPerformance.cpp
@@ -15,8 +15,7 @@
 
 int main(int argc, char** argv) {
 
-  //  std::string fileName = "friedman_4d_2000.arff";
-  std::string fileName = "debugging.arff";
+  std::string fileName = "../tests/data/friedman_4d_2000.arff";
 
   SGPP::datadriven::ARFFTools arffTools;
   SGPP::datadriven::Dataset dataset = arffTools.readARFF(fileName);

diff --git a/datadriven/examples/sampler.cpp b/datadriven/examples/sampler.cpp
@@ -6,13 +6,9 @@
 
 int main(int argc, char** argv) {
 
-  //  int maxLevel = 9;
   int maxLevel = 9;
 
-  //  std::string fileName = "debugging.arff";
-  //  std::string fileName = "friedman_4d_2000.arff";
-  std::string fileName = "friedman2_90000.arff";
-  //  std::string fileName = "bigger.arff";
+  std::string fileName = "../tests/data/friedman_4d_2000.arff";
 
   //sg::base::RegularGridConfiguration gridConfig;
   sg::solver::SLESolverConfiguration SLESolverConfigRefine;

diff --git a/datadriven/src/sgpp/datadriven/application/BatchConfiguration.hpp b/datadriven/src/sgpp/datadriven/application/BatchConfiguration.hpp
@@ -10,27 +10,23 @@
 #include <string>
 
 
-//using namespace sg::base;
-using namespace std;
-
-
 namespace SGPP {
   namespace base {
     /**
      * structure to provide parameters for the BatchLearner
      */
     struct BatchConfiguration {
-      string filename;//!< arff-file to be read
+      std::string filename;//!< arff-file to be read
       size_t batchsize;//!< size of one batch
       size_t samples;//!< number of samles for the monte carlo sampling (normalization) (0=don't sample) good: 1000
       int seed;//!< seed for the sampling
-      int wMode;//!< number of weighting mode to use x = batch#, y = wArgument: 0 = all batches are equal, 1 = linear (x*y), 2 = pow(y,x), 3 = y/x, 4 = only the last batch counts, 5 = weigh new batch by proportion, but at least y
-      float wArgument;//!< argument for the weighting method
+      int wMode;//!< number of weighting mode to use x = batch#, y = wArgument: 0 = all batches are equal, 1 = linear (x*y), 2 = pow(y,x), 3 = y/x, 4 = only the last batch counts, 5 = weight new batch by proportion, but at least y
+      float_t wArgument;//!< argument for the weighting method
       size_t refineEvery;//!< refine every xth batch (0=never)
       bool verbose;//!< verbose flag
       size_t stack;//!< number of last batches alphavectors to be saved (0=all)
       size_t testsize;//!< how many items to test from the data following the batch (0=don't test after learned)
-      float lambda;//!< lambda for solving
+      float_t lambda;//!< lambda for solving
     };
 
 

diff --git a/datadriven/src/sgpp/datadriven/application/BatchLearner.cpp b/datadriven/src/sgpp/datadriven/application/BatchLearner.cpp
@@ -34,7 +34,7 @@
 #include <sgpp/base/operation/hash/OperationEval.hpp>
 #include <sgpp/base/exception/application_exception.hpp>
 
-using namespace sg::base;
+using namespace SGPP::base;
 using namespace std;
 
 
@@ -61,7 +61,7 @@ namespace SGPP {
         throw base::application_exception("BatchLearner: An unsupported SLE solver type was chosen!");
 
       //open file
-      reader.open(batchConf.filename);
+      reader.open(batchConf.filename.c_str());
 
       if (!reader) {
         cout << "ERROR: file does not exist: " << batchConf.filename << endl;
@@ -74,7 +74,7 @@ namespace SGPP {
       size_t cur_pos = 0;
       size_t cur_find = 0;
       string cur_value;
-      double dbl_cur_value;
+      float_t dbl_cur_value;
 
       DataVector temprow(dimensions);
 
@@ -165,9 +165,9 @@ namespace SGPP {
 
       //wMode 5: weigh old alpha with new alpha by occurences
       if (batchConf.wMode == 5) {
-        double k = (double) dataInBatch.at(grid)->getNrows();
-        double n = (double) occurences.at(grid);
-        double wNew = max(k / (n + k), (double)batchConf.wArgument);
+        float_t k = (float_t) dataInBatch.at(grid)->getNrows();
+        float_t n = (float_t) occurences.at(grid);
+        float_t wNew = max(k / (n + k), (float_t)batchConf.wArgument);
 
         if (batchConf.verbose)
           cout << "old weight: " << 1.0 - wNew << " new weight: " << wNew << endl;
@@ -191,21 +191,21 @@ namespace SGPP {
       }
 
       size_t count = alphaStorage.at(grid).size();//count of old alphas available for calculation
-      vector<float> factors;
+      vector<float_t> factors;
 
       //previous alphas exist
       //calc factors
-      float sum = 0.0f;
+      float_t sum = 0.0f;
 
       for (size_t i = 0; i < count; i++) {
         if (batchConf.wMode == 0)
-          factors.push_back((float)1);//temp: all alphas are equal
+          factors.push_back((float_t)1);//temp: all alphas are equal
         else if (batchConf.wMode == 1)
-          factors.push_back((float)(i + 1)*batchConf.wArgument); //linear
+          factors.push_back((float_t)(i + 1)*batchConf.wArgument); //linear
         else if (batchConf.wMode == 2)
-          factors.push_back((float)pow(batchConf.wArgument, (i + 1))); //exp
+          factors.push_back((float_t)pow(batchConf.wArgument, (i + 1))); //exp
         else if (batchConf.wMode == 3)
-          factors.push_back((float)batchConf.wArgument / (float)(i + 1)); //1/x bzw arg/x
+          factors.push_back((float_t)batchConf.wArgument / (float_t)(i + 1)); //1/x bzw arg/x
         else if (batchConf.wMode != 4 && batchConf.wMode != 5) { //4 and 5 treated elsewhere
           cout << "unsupported weighting mode (mode/arg): " << batchConf.wMode << "/" << batchConf.wArgument << endl;
           throw 42;
@@ -238,7 +238,7 @@ namespace SGPP {
         //update norm factors
         for (auto const& p : grids) {
           //for each grid
-          double evalsum = 0;
+          float_t evalsum = 0;
 
           for (float x = 0; x < batchConf.samples; x++) {
             //generate points per grid
@@ -250,15 +250,15 @@ namespace SGPP {
 
             //add norm factor
             OperationEval* opEval = SGPP::op_factory::createOperationEval(*grids.at(p.first));
-            double temp = opEval->eval(*alphaVectors.at(p.first), pt);
+            float_t temp = opEval->eval(*alphaVectors.at(p.first), pt);
 
             if (batchConf.verbose && abs(temp) > 100)
               cout << "warning abs>100: " << temp << " for " << pt.toString() << endl;
 
             evalsum += temp;
           }
 
-          evalsum = evalsum / (double) batchConf.samples;
+          evalsum = evalsum / (float_t) batchConf.samples;
           //update the normFactor
           normFactors.at(p.first) = evalsum;
 
@@ -276,12 +276,12 @@ namespace SGPP {
         testDataset.getRow(i, pt);
         //Compute maximum of all density functions:
         int max_index = -1;
-        double max = -1.0f * numeric_limits<double>::max();
+        float_t max = -1.0f * numeric_limits<float_t>::max();
 
         for (auto const& g : grids) {
           SGPP::base::OperationEval* Eval = SGPP::op_factory::createOperationEval(*g.second);
           //posterior = likelihood*prior
-          double res = Eval->eval(*alphaVectors.at(g.first), pt);
+          float_t res = Eval->eval(*alphaVectors.at(g.first), pt);
           delete Eval;
 
           if (batchConf.samples != 0)
@@ -322,7 +322,7 @@ namespace SGPP {
 
           alphaVectors.insert(std::pair<int, DataVector*>(p.first, new DataVector(grids.at(p.first)->getSize())));
           alphaVectors.at(p.first)->setAll(0.0);
-          normFactors.insert(std::pair<int, float>(p.first, 1));
+          normFactors.insert(std::pair<int, float_t>(p.first, 1));
         }
 
 
@@ -434,8 +434,8 @@ namespace SGPP {
           //calc accuracy for this batch and all tests
           t_total += (int)result.getSize();
           t_correct += correct;
-          acc_current = (double)(100.0 * correct / (double)result.getSize());
-          acc_global = (double)(100.0 * t_correct / (double)t_total);
+          acc_current = (float_t)(100.0 * correct / (float_t)result.getSize());
+          acc_global = (float_t)(100.0 * t_correct / (float_t)t_total);
           //output accuracy
           cout << "batch:\t" << acc_current << "% (" << correct << "/" << result.getSize() << ")" << endl;
           cout << "total:\t" << acc_global << "% (" << t_correct << "/" << t_total << ")" << endl;