Skip to content

Commit

Permalink
Added cca() bindings
Browse files Browse the repository at this point in the history
  • Loading branch information
davisking committed Apr 28, 2013
1 parent 8770498 commit 2c2f955
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 2 deletions.
1 change: 1 addition & 0 deletions tools/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ add_python_module(dlib
src/decision_functions.cpp
src/other.cpp
src/basic.cpp
src/cca.cpp
)
127 changes: 127 additions & 0 deletions tools/python/src/cca.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@

#include <boost/python.hpp>
#include <boost/shared_ptr.hpp>
#include <dlib/statistics.h>
#include "pyassert.h"
#include <boost/python/args.hpp>

using namespace dlib;
using namespace boost::python;

typedef std::vector<std::pair<unsigned long,double> > sparse_vect;

struct cca_outputs
{
matrix<double,0,1> correlations;
matrix<double> Ltrans;
matrix<double> Rtrans;
};

cca_outputs _cca1 (
const std::vector<sparse_vect>& L,
const std::vector<sparse_vect>& R,
unsigned long num_correlations,
unsigned long extra_rank,
unsigned long q,
double regularization
)
{
pyassert(num_correlations > 0 && L.size() > 0 && R.size() > 0 && L.size() == R.size() && regularization >= 0,
"Invalid inputs");

cca_outputs temp;
temp.correlations = cca(L,R,temp.Ltrans,temp.Rtrans,num_correlations,extra_rank,q,regularization);
return temp;
}

// ----------------------------------------------------------------------------------------

unsigned long sparse_vector_max_index_plus_one (
const sparse_vect& v
)
{
return max_index_plus_one(v);
}

matrix<double,0,1> apply_cca_transform (
const matrix<double>& m,
const sparse_vect& v
)
{
pyassert(max_index_plus_one(v) <= m.nr(), "Invalid Inputs");
return sparse_matrix_vector_multiply(trans(m), v);
}

void bind_cca()
{
class_<cca_outputs>("_cca_outputs")
.add_property("correlations", &cca_outputs::correlations)
.add_property("Ltrans", &cca_outputs::Ltrans)
.add_property("Rtrans", &cca_outputs::Rtrans);

def("max_index_plus_one", sparse_vector_max_index_plus_one, arg("v"),
"ensures \n\
- returns the dimensionality of the given sparse vector. That is, returns a \n\
number one larger than the maximum index value in the vector. If the vector \n\
is empty then returns 0. "
);


def("apply_cca_transform", apply_cca_transform, (arg("m"), arg("v")),
"requires \n\
- max_index_plus_one(v) <= m.nr() \n\
ensures \n\
- returns trans(m)*v \n\
(i.e. multiply m by the vector v and return the result) "
);


def("cca", _cca1, (arg("L"), arg("R"), arg("num_correlations"), arg("extra_rank")=5, arg("q")=2, arg("regularization")=0),
"requires \n\
- num_correlations > 0 \n\
- len(L) > 0 \n\
- len(R) > 0 \n\
- len(L) == len(r) \n\
- regularization >= 0 \n\
ensures \n\
- This function performs a canonical correlation analysis between the vectors \n\
in L and R. That is, it finds two transformation matrices, Ltrans and \n\
Rtrans, such that row vectors in the transformed matrices L*Ltrans and \n\
R*Rtrans are as correlated as possible (note that in this notation we \n\
interpret L as a matrix with the input vectors in its rows). Note also that \n\
this function tries to find transformations which produce num_correlations \n\
dimensional output vectors. \n\
- Note that you can easily apply the transformation to a vector using \n\
apply_cca_transform(). So for example, like this: \n\
- apply_cca_transform(Ltrans, some_sparse_vector) \n\
- returns a structure containing the Ltrans and Rtrans transformation matrices \n\
as well as the estimated correlations between elements of the transformed \n\
vectors. \n\
- No centering is applied to the L and R matrices. Therefore, if you want a \n\
CCA relative to the centered vectors then you must apply centering yourself \n\
before calling cca(). \n\
- This function works with reduced rank approximations of the L and R matrices. \n\
This makes it fast when working with large matrices. In particular, we use \n\
the dlib::svd_fast() routine to find reduced rank representations of the input \n\
matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q) \n\
and similarly for R. This means that you can use the extra_rank and q \n\
arguments to cca() to influence the accuracy of the reduced rank \n\
approximation. However, the default values should work fine for most \n\
problems. \n\
- This function performs the ridge regression version of Canonical Correlation \n\
Analysis when regularization is set to a value > 0. In particular, larger \n\
values indicate the solution should be more heavily regularized. This can be \n\
useful when the dimensionality of the data is larger than the number of \n\
samples. \n\
- A good discussion of CCA can be found in the paper \"Canonical Correlation \n\
Analysis\" by David Weenink. In particular, this function is implemented \n\
using equations 29 and 30 from his paper. We also use the idea of doing CCA \n\
on a reduced rank approximation of L and R as suggested by Paramveer S. \n\
Dhillon in his paper \"Two Step CCA: A new spectral method for estimating \n\
vector models of words\". "

);
}



7 changes: 7 additions & 0 deletions tools/python/src/dlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,22 @@ void bind_decision_functions();
void bind_basic_types();
void bind_other();
void bind_svm_rank_trainer();
void bind_cca();


BOOST_PYTHON_MODULE(dlib)
{
// Disable printing of the C++ function signature in the python __doc__ string
// since it is full of huge amounts of template clutter.
boost::python::docstring_options options(true,true,false);

bind_matrix();
bind_vector();
bind_svm_c_trainer();
bind_decision_functions();
bind_basic_types();
bind_other();
bind_svm_rank_trainer();
bind_cca();
}

8 changes: 6 additions & 2 deletions tools/python/src/matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
#include <dlib/matrix.h>
#include <dlib/string.h>
#include "serialize_pickle.h"
#include <boost/python/args.hpp>


using namespace dlib;
using namespace std;
using namespace boost::python;
using std::string;
using std::ostringstream;


void matrix_set_size(matrix<double>& m, long nr, long nc)
Expand Down Expand Up @@ -159,10 +161,12 @@ void bind_matrix()

class_<matrix<double> >("matrix", init<>())
.def("__init__", make_constructor(&make_matrix_from_size))
.def("set_size", &matrix_set_size)
.def("set_size", &matrix_set_size, (arg("rows"), arg("cols")), "Set the size of the matrix to the given number of rows and columns.")
.def("__init__", make_constructor(&from_object))
.def("__repr__", &matrix_double__repr__)
.def("__str__", &matrix_double__str__)
.def("nr", &matrix<double>::nr, "Return the number of rows in the matrix.")
.def("nc", &matrix<double>::nc, "Return the number of columns in the matrix.")
.def("__len__", &matrix_double__len__)
.def("__getitem__", &matrix_double__getitem__, with_custodian_and_ward_postcall<0,1>())
.add_property("shape", &get_matrix_size)
Expand Down

0 comments on commit 2c2f955

Please sign in to comment.