forked from castorini/birch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge EMNLP branch into master (castorini#25)
* Update codebase to train on all MB data * Fix data format for robust04 * Fix minor errors in new inference code * Update scripts and path to support core* * Fix core* bug in data.py * Add utility scripts * Minor fixes in MB branch before merge * Clean up to reproduce EMNLP results * Add README for arXiv * Add Anserini commit id * Fix typo in Zenodo link
- Loading branch information
1 parent
3543e65
commit 7cec228
Showing
18 changed files
with
429 additions
and
358 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,20 @@ | ||
experiment=$1 | ||
anserini_path=$2 | ||
qrels_file=$3 | ||
collection=$2 | ||
anserini_path=$3 | ||
data_path=$4 | ||
|
||
echo "Experiment: ${experiment}" | ||
|
||
if [[ ${experiment} == *"bm25+rm3"* ]] ; then | ||
echo "BM25+RM3:" | ||
${anserini_path}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map -m P.20 "${anserini_path}/src/main/resources/topics-and-qrels/${qrels_file}" "runs/run.${experiment}.txt" | ||
${anserini_path}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map -m P.20 -m ndcg_cut.20 "${data_path}/qrels/qrels.${collection}.txt" "runs/run.${experiment}.txt" | ||
else | ||
echo "1S:" | ||
${anserini_path}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map -m P.20 "${anserini_path}/src/main/resources/topics-and-qrels/${qrels_file}" "runs/run.${experiment}.cv.a" | ||
${anserini_path}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map -m P.20 -m ndcg_cut.20 "${data_path}/qrels/qrels.${collection}.txt" "runs/run.${experiment}.cv.a" | ||
|
||
echo "2S:" | ||
${anserini_path}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map -m P.20 "${anserini_path}/src/main/resources/topics-and-qrels/${qrels_file}" "runs/run.${experiment}.cv.ab" | ||
${anserini_path}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map -m P.20 -m ndcg_cut.20 "${data_path}/qrels/qrels.${collection}.txt" "runs/run.${experiment}.cv.ab" | ||
|
||
echo "3S:" | ||
${anserini_path}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map -m P.20 "${anserini_path}/src/main/resources/topics-and-qrels/${qrels_file}" "runs/run.${experiment}.cv.abc" | ||
${anserini_path}/eval/trec_eval.9.0.4/trec_eval -M1000 -m map -m P.20 -m ndcg_cut.20 "${data_path}/qrels/qrels.${collection}.txt" "runs/run.${experiment}.cv.abc" | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,36 @@ | ||
#!/usr/bin/env bash | ||
|
||
experiment=$1 | ||
num_folds=$2 | ||
collection=$2 | ||
anserini_path=$3 | ||
tune_params=$4 | ||
|
||
if [ ${num_folds} == '5' ] ; then | ||
folds_file="robust04-paper2-folds.json" | ||
collection="robust04_5cv" | ||
else | ||
folds_file="robust04-paper1-folds.json" | ||
collection="robust04_2cv" | ||
fi | ||
declare -a sents=("a" "ab" "abc") | ||
|
||
if [ ${tune_params} ] ; then | ||
declare -a sents=("a" "ab" "abc") | ||
|
||
./eval_scripts/train.qqsh ${experiment} ${num_folds} ${anserini_path} | ||
|
||
for i in "${sents[@]}" | ||
do | ||
for j in $(seq 0 $((num_folds - 1))) | ||
for i in "${sents[@]}" | ||
do | ||
if [[ "${collection}" == "robust04" ]] ; then | ||
for j in $(seq 0 4) | ||
do | ||
while IFS= read -r line | ||
do | ||
alpha=$(echo ${line#?} | cut -d" " -f1) | ||
beta=$(echo ${line#?} | cut -d" " -f2) | ||
gamma=$(echo ${line#?} | cut -d" " -f3) | ||
done < "log/${experiment}/${j}${i}_best.txt" | ||
done < "run_logs/${experiment}/${j}${i}_best.txt" | ||
|
||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} --folds_file ${folds_file} 3 ${alpha} ${beta} ${gamma} ${j} test | ||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 ${alpha} ${beta} ${gamma} $j test | ||
done | ||
cat runs/run.${experiment}.cv.test.* > runs/run.${experiment}.cv.${i} | ||
done | ||
else | ||
./eval_scripts/${experiment}_eval.sh ${experiment} ${collection} ${anserini_path} ${folds_file} | ||
fi | ||
cat runs/run.${experiment}.cv.test.* > runs/run.${experiment}.cv.$i | ||
else | ||
while IFS= read -r line | ||
do | ||
alpha=$(echo ${line#?} | cut -d" " -f1) | ||
beta=$(echo ${line#?} | cut -d" " -f2) | ||
gamma=$(echo ${line#?} | cut -d" " -f3) | ||
done < "run_logs/${experiment}/${i}_best.txt" | ||
|
||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 ${alpha} ${beta} ${gamma} 0 all | ||
mv runs/run.${experiment}.cv.all runs/run.${experiment}.cv.$i | ||
fi | ||
done | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,38 @@ | ||
#!/usr/bin/env bash | ||
|
||
experiment=$1 | ||
num_folds=$2 | ||
collection=$2 | ||
anserini_path=$3 | ||
|
||
if [ ${num_folds} == '5' ] ; then | ||
folds_file="robust04-paper2-folds.json" | ||
collection="robust04_5cv" | ||
else | ||
folds_file="robust04-paper1-folds.json" | ||
collection="robust04_2cv" | ||
if [ ! -d "run_logs/${experiment}" ] ; then | ||
mkdir -p "run_logs/${experiment}" | ||
fi | ||
|
||
if [ ! -d "log/${experiment}" ] ; then | ||
mkdir -p "log/${experiment}" | ||
fi | ||
if [[ "${collection}" == "robust04" ]] ; then | ||
for i in $(seq 0 4) | ||
do | ||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 1.0 0.1 0.1 $i train > "run_logs/${experiment}/eval${i}a.txt" | ||
cat "run_logs/${experiment}/eval${i}a.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}a_best.txt" | ||
rm "runs/run.${experiment}.cv.train" | ||
|
||
for i in $(seq 0 $((num_folds - 1))) | ||
do | ||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --folds_file ${folds_file} --anserini_path ${anserini_path} --data_path data 3 1.0 0.1 0.1 $i train > "log/${experiment}/eval${i}a.txt" | ||
cat "log/${experiment}/eval${i}a.txt" | sort -k5r,5 -k3,3 | head -1 > "log/${experiment}/${i}a_best.txt" | ||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 1.0 1.0 0.1 $i train > "run_logs/${experiment}/eval${i}ab.txt" | ||
cat "run_logs/${experiment}/eval${i}ab.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}ab_best.txt" | ||
rm "runs/run.${experiment}.cv.train" | ||
|
||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 1.0 1.0 1.0 $i train > "run_logs/${experiment}/eval${i}abc.txt" | ||
cat "run_logs/${experiment}/eval${i}abc.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/${i}abc_best.txt" | ||
rm "runs/run.${experiment}.cv.train" | ||
done | ||
else | ||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 1.0 0.1 0.1 0 train > "run_logs/${experiment}/evala.txt" | ||
cat "run_logs/${experiment}/evala.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/a_best.txt" | ||
rm "runs/run.${experiment}.cv.train" | ||
|
||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --folds_file ${folds_file} --anserini_path ${anserini_path} --data_path data 3 1.0 1.0 0.1 ${i} train > "log/${experiment}/eval${i}ab.txt" | ||
cat "log/${experiment}/eval${i}ab.txt" | sort -k5r,5 -k3,3 | head -1 > "log/${experiment}/${i}ab_best.txt" | ||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 1.0 1.0 0.1 0 train > "run_logs/${experiment}/evalab.txt" | ||
cat "run_logs/${experiment}/evalab.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/ab_best.txt" | ||
rm "runs/run.${experiment}.cv.train" | ||
|
||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --folds_file ${folds_file} --anserini_path ${anserini_path} --data_path data 3 1.0 1.0 1.0 $i train > "log/${experiment}/eval${i}abc.txt" | ||
cat "log/${experiment}/eval${i}abc.txt" | sort -k5r,5 -k3,3 | head -1 > "log/${experiment}/${i}abc_best.txt" | ||
python src/main.py --mode retrieval --experiment ${experiment} --collection ${collection} --anserini_path ${anserini_path} 3 1.0 1.0 1.0 0 train > "run_logs/${experiment}/evalabc.txt" | ||
cat "run_logs/${experiment}/evalabc.txt" | sort -k5r,5 -k3,3 | head -1 > "run_logs/${experiment}/abc_best.txt" | ||
rm "runs/run.${experiment}.cv.train" | ||
done | ||
fi |
Oops, something went wrong.