Transitioned to dynet and fixed various warnings

trevorcohn · Nov 1, 2016 · c263461 · c263461
1 parent 349aba7
commit c263461
Show file tree

Hide file tree

Showing 9 changed files with 118 additions and 108 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,3 +1,3 @@
-[submodule "cnn"]
-	path = cnn
-	url = https://github.com/clab/cnn.git
+[submodule "dynet"]
+	path = dynet
+	url = https://github.com/clab/dynet.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,7 +1,7 @@
 project(mantis)
 cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
 
-set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cnn/cmake)
+set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/dynet/cmake)
 
 # CNN uses Eigen which exploits modern CPU architectures. To get the
 # best possible performance, the following are recommended:
@@ -29,7 +29,7 @@ endif()
 enable_testing()
 
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}
-                    ${PROJECT_SOURCE_DIR}/cnn)
+                    ${PROJECT_SOURCE_DIR}/dynet)
 
 function(find_cudnn)
   set(CUDNN_ROOT "" CACHE PATH "CUDNN root path")
@@ -111,9 +111,9 @@ include_directories(${EIGEN3_INCLUDE_DIR})
 FIND_PACKAGE(Threads REQUIRED)
 set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})
 
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cnn/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/dynet/config.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config.h)
 include_directories(${CMAKE_CURRENT_BINARY_DIR})
 
-add_subdirectory(cnn/cnn)
+add_subdirectory(dynet/dynet)
 add_subdirectory(src)
 enable_testing()
diff --git a/cnn b/cnn
diff --git a/dynet b/dynet
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -2,14 +2,29 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
 
 foreach(TARGET attentional biattentional)
   ADD_EXECUTABLE(${TARGET} ${TARGET}.cc)
-  target_link_libraries(${TARGET} cnn ${LIBS})
   if(UNIX AND NOT APPLE)
     target_link_libraries(${TARGET} rt)
   endif()
   if (WITH_CUDA_BACKEND)
-    add_dependencies(${TARGET} cnncuda)
-    target_link_libraries(${TARGET} cnncuda)
+	  set(CUDA_SEPARABLE_COMPILATION ON)
+	  list(APPEND CUDA_NVCC_FLAGS "-gencode;arch=compute_30,code=sm_30;-gencode;arch=compute_35,code=sm_35;-gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_52,code=sm_52;-gencode;arch=compute_52,code=compute_52;-std=c++11;-DVERBOSE;-DEIGEN_USE_GPU;-DHAVE_CUDA")
+	  if(CMAKE_COMPILER_IS_GNUCXX)
+	    if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9)
+	      # gcc 4.9 or later versions raise SEGV due to the optimization problem.
+	      # Use -O1 instead for now.
+	      list(APPEND CUDA_NVCC_FLAGS "-O1")
+	    else()
+	      list(APPEND CUDA_NVCC_FLAGS "-O2")
+	    endif()
+	  else()
+	    list(APPEND CUDA_NVCC_FLAGS "-O2")
+	  endif()
+	  add_dependencies(${TARGET} gdynet dynetcuda)
+	  target_link_libraries(${TARGET} gdynet dynetcuda)
     CUDA_ADD_CUBLAS_TO_TARGET(${TARGET})
+  else()
+	  add_dependencies(${TARGET} dynet)
+	  target_link_libraries(${TARGET} dynet ${LIBS})
   endif (WITH_CUDA_BACKEND)
 endforeach()
 
diff --git a/src/attentional.cc b/src/attentional.cc
@@ -10,7 +10,7 @@
 #include <boost/program_options/variables_map.hpp>
 
 using namespace std;
-using namespace cnn;
+using namespace dynet;
 using namespace boost::program_options;
 
 unsigned LAYERS = 1; // 2
@@ -19,8 +19,8 @@ unsigned ALIGN_DIM = 32;   // 128
 unsigned SRC_VOCAB_SIZE = 0;
 unsigned TGT_VOCAB_SIZE = 0;
 
-cnn::Dict sd;
-cnn::Dict td;
+dynet::Dict sd;
+dynet::Dict td;
 int kSRC_SOS;
 int kSRC_EOS;
 int kTGT_SOS;
@@ -45,7 +45,7 @@ template <class rnn_t>
 int main_body(variables_map vm);
 
 int main(int argc, char** argv) {
-    cnn::initialize(argc, argv);
+    dynet::initialize(argc, argv);
 
     // command line processing
     variables_map vm; 
@@ -112,8 +112,8 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
         bool doco, float coverage, bool display, bool fert);
 
 template <class AM_t> void test_rescore(Model &model, AM_t &am, Corpus &testing, bool doco);
-template <class AM_t> void test_decode(Model &model, AM_t &am, std::string test_file, bool doco, int beam);
-template <class AM_t> void test_kbest_arcs(Model &model, AM_t &am, string test_file, int top_k);
+template <class AM_t> void test_decode(Model &model, AM_t &am, std::string test_file, bool doco, unsigned beam);
+template <class AM_t> void test_kbest_arcs(Model &model, AM_t &am, string test_file, unsigned top_k);
 template <class AM_t> void fert_stats(Model &model, AM_t &am, Corpus &devel, bool global_fert);
 
 const Sentence* context(const Corpus &corpus, unsigned i);
@@ -146,8 +146,6 @@ int main_body(variables_map vm)
     if (vm.count("lstm"))	flavour = "LSTM";
     else if (vm.count("gru"))	flavour = "GRU";
 
-    typedef vector<int> Sentence;
-    typedef pair<Sentence, Sentence> SentencePair;
     Corpus training, devel, testing;
     string line;
     cerr << "Reading training data from " << vm["train"].as<string>() << "...\n";
@@ -234,19 +232,19 @@ int main_body(variables_map vm)
                 vm["epochs"].as<int>(), doco, vm["coverage"].as<float>(), vm.count("display"),
                 fert);
     else if (vm.count("kbest"))
-    	test_kbest_arcs(model, am, vm["kbest"].as<string>(), vm["topk"].as<int>());
+    	test_kbest_arcs(model, am, vm["kbest"].as<string>(), vm["topk"].as<unsigned>());
     else if (vm.count("test")) {
         if (vm.count("rescore"))
             test_rescore(model, am, testing, doco);
         else // test
-            test_decode(model, am, vm["test"].as<string>(), doco, vm["beam"].as<int>());
+            test_decode(model, am, vm["test"].as<string>(), doco, vm["beam"].as<unsigned>());
     }
     else if (vm.count("fert-stats"))
         fert_stats(model, am, devel, vm.count("fertility"));
 
     delete sgd;
 
-    //cnn::Free();
+    //dynet::Free();
 
     return EXIT_SUCCESS;
 }
@@ -264,9 +262,9 @@ void test_rescore(Model &model, AM_t &am, Corpus &testing, bool doco)
         tie(ssent, tsent, docid) = testing[i];
 
 	ComputationGraph cg;
-        am.BuildGraph(ssent, tsent, cg, nullptr, (doco) ? context(testing, i) : nullptr);
+        auto iloss = am.BuildGraph(ssent, tsent, cg, nullptr, (doco) ? context(testing, i) : nullptr);
 
-	double loss = as_scalar(cg.forward());
+	double loss = as_scalar(cg.forward(iloss));
         cout << i << " |||";
 	for (auto &w: ssent)
 	    cout << " " << sd.convert(w);
@@ -286,10 +284,8 @@ void test_rescore(Model &model, AM_t &am, Corpus &testing, bool doco)
 }
 
 template <class AM_t>
-void test_decode(Model &model, AM_t &am, string test_file, bool doco, int beam)
+void test_decode(Model &model, AM_t &am, string test_file, bool doco, unsigned beam)
 {
-    double tloss = 0;
-    int tchars = 0;
     int lno = 0;
 
     cerr << "Reading test examples from " << test_file << endl;
@@ -339,7 +335,7 @@ void test_decode(Model &model, AM_t &am, string test_file, bool doco, int beam)
 }
 
 template <class AM_t>
-void test_kbest_arcs(Model &model, AM_t &am, string test_file, int top_k)
+void test_kbest_arcs(Model &model, AM_t &am, string test_file, unsigned top_k)
 {
     // only suitable for monolingual setting, of predicting a sentence given preceeding sentence
     cerr << "Reading test examples from " << test_file << endl;
@@ -369,7 +365,7 @@ void test_kbest_arcs(Model &model, AM_t &am, string test_file, int top_k)
                     errs.push_back(i_err);
                 }
                 Expression i_nerr = sum(errs);
-                double loss = as_scalar(cg.incremental_forward());
+                double loss = as_scalar(cg.incremental_forward(i_nerr));
 
                 //cout << last_last_id << ":" << last_id << " |||";
                 //for (auto &w: source) cout << " " << sd.convert(w);
@@ -483,9 +479,9 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
     }
 
     bool first = true;
-    int report = 0;
+    unsigned report = 0;
     unsigned lines = 0;
-    int epoch = 0;
+    unsigned epoch = 0;
     Sentence ssent, tsent;
     int docid;
 
@@ -497,8 +493,8 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
             tie(ssent, tsent, docid) = devel[i];
             ComputationGraph cg;
             Expression alignment;
-            am.BuildGraph(ssent, tsent, cg, &alignment, (doco) ? context(devel, i) : nullptr);
-            cg.forward();
+            auto iloss = am.BuildGraph(ssent, tsent, cg, &alignment, (doco) ? context(devel, i) : nullptr);
+            cg.forward(iloss);
 
             cout << "\n====== SENTENCE " << i << " =========\n";
             am.display_ascii(ssent, tsent, cg, alignment, sd, td);
@@ -586,7 +582,7 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
                 objective = objective + fertility_nll;
 
             // perform forward computation for aggregate objective
-            cg.forward();
+            cg.forward(objective);
 
             // grab the parts of the objective
             loss += as_scalar(cg.get_value(xent.i));
@@ -595,7 +591,7 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
             if (fert) 
                 loss_fert += as_scalar(cg.get_value(fertility_nll.i));
 
-            cg.backward();
+            cg.backward(objective);
 	    sgd.update();
             ++lines;
 
@@ -627,8 +623,8 @@ void train(Model &model, AM_t &am, Corpus &training, Corpus &devel,
             for (unsigned i = 0; i < devel.size(); ++i) {
                 tie(ssent, tsent, docid) = devel[i];
                 ComputationGraph cg;
-                am.BuildGraph(ssent, tsent, cg, nullptr, (doco) ? context(devel, i) : nullptr, nullptr, nullptr);
-                dloss += as_scalar(cg.forward());
+                auto idloss = am.BuildGraph(ssent, tsent, cg, nullptr, (doco) ? context(devel, i) : nullptr, nullptr, nullptr);
+                dloss += as_scalar(cg.forward(idloss));
                 dchars += tsent.size() - 1;
             }
             if (dloss < best) {