added compatibility for eval_scripts/train.sh without apex and update…

…d README (#29)
castorini · Sep 18, 2019 · b302b34 · b302b34
1 parent 2dd0401
commit b302b34
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 12 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,6 @@
 
 [ ![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3381673.svg)](https://doi.org/10.5281/zenodo.3381673)
 
-
 Document ranking via sentence modeling using BERT
 
 Note: 
@@ -26,16 +25,14 @@ pip install -r requirements.txt
 git clone https://github.com/NVIDIA/apex
 cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
 
-# Set up Anserini (last reproduced with commit id: f690b5b769d7b0a623e034b31438df126d81b791)
+# Set up Anserini (last reproduced with commit id: 5da46f610435be6364700bc5a6144253ed3f3b59)
 git clone https://github.com/castorini/anserini.git
 cd anserini && mvn clean package appassembler:assemble
 cd eval && tar xvfz trec_eval.9.0.4.tar.gz && cd trec_eval.9.0.4 && make && cd ../../..
 
 # Download data and models
-cd data
 wget https://zenodo.org/record/3381673/files/emnlp_bert4ir_v2.tar.gz
 tar -xzvf emnlp_bert4ir_v2.tar.gz
-cd ..
 ```
 
 Experiment Names:
@@ -45,7 +42,6 @@ Experiment Names:
 - robust04, car_core17, car_core18
 - msmarco_robust04, msmarco_core17, msmarco_core18
 
-
 ## Training
 
 For BERT(MB):
@@ -126,12 +122,12 @@ data_path=<path/to/data/root>
 ### Sentence Evidence
 
 ```
-# Tune hyperparameters
+# Tune hyperparameters (if you do not have apex working, run this script with an additional "NOAPEX" param at the end)
 ./eval_scripts/train.sh ${experiment} ${collection} ${anserini_path}
 
 # Run experiment
-./eval_scripts/test.sh #{experiment} ${collection} ${anserini_path}
+./eval_scripts/test.sh ${experiment} ${collection} ${anserini_path}
 
 # Evaluate with trec_eval
-./eval_scripts/eval.sh #{experiment} ${anserini_path} ${data_path}
+./eval_scripts/eval.sh ${experiment} ${collection} ${anserini_path} ${data_path}
 ```
diff --git a/eval_scripts/train.sh b/eval_scripts/train.sh
@@ -3,6 +3,7 @@
 experiment=$1
 collection=$2
 anserini_path=$3
+no_apex=$4
 
 if [ ! -d "run_logs/${experiment}" ] ; then
     mkdir -p "run_logs/${experiment}"
@@ -11,14 +12,26 @@ fi
 for i in $(seq 0 4)
     do
         python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 1.0 0.1 0.1 $i train > "run_logs/${experiment}/eval${i}a.txt"
-        cat "run_logs/${experiment}/eval${i}a.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}a_best.txt"
+        if [ $no_apex = "NOAPEX" ]; then
+            cat "run_logs/${experiment}/eval${i}a.txt" | tail -n +2 | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}a_best.txt"
+        else
+            cat "run_logs/${experiment}/eval${i}a.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}a_best.txt"
+        fi
         rm "runs/run.${experiment}.cv.train"
 
         python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 1.0 1.0 0.1 $i train > "run_logs/${experiment}/eval${i}ab.txt"
-        cat "run_logs/${experiment}/eval${i}ab.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}ab_best.txt"
+        if [ $no_apex = "NOAPEX" ]; then
+            cat "run_logs/${experiment}/eval${i}ab.txt" | tail -n +2 | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}ab_best.txt"
+        else
+            cat "run_logs/${experiment}/eval${i}ab.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}ab_best.txt"
+        fi
         rm "runs/run.${experiment}.cv.train"
 
         python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 1.0 1.0 1.0 $i train > "run_logs/${experiment}/eval${i}abc.txt"
-        cat "run_logs/${experiment}/eval${i}abc.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}abc_best.txt"
+        if [ $no_apex = "NOAPEX" ]; then
+            cat "run_logs/${experiment}/eval${i}abc.txt" | tail -n +2 | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}abc_best.txt"
+        else
+            cat "run_logs/${experiment}/eval${i}abc.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}abc_best.txt"
+        fi
         rm "runs/run.${experiment}.cv.train"
-    done
+    done