From 760c22a3300a4fc3bfc83991140cdc1d6d7a35f9 Mon Sep 17 00:00:00 2001 From: Jimmy Lin Date: Sat, 8 Jul 2023 06:21:43 -0400 Subject: [PATCH] Refactor Aggretriever 2CR regressions on MS MARCO (#1566) More consistent naming of indexes and conditions. --- docs/2cr/msmarco-v1-doc.html | 222 +++++++++--------- docs/2cr/msmarco-v1-passage.html | 332 +++++++++++++-------------- docs/2cr/msmarco-v2-doc.html | 50 ++-- docs/2cr/msmarco-v2-passage.html | 50 ++-- pyserini/2cr/msmarco-v1-doc.yaml | 12 +- pyserini/2cr/msmarco-v1-passage.yaml | 84 +++---- pyserini/2cr/msmarco-v2-doc.yaml | 8 +- pyserini/2cr/msmarco-v2-passage.yaml | 8 +- pyserini/2cr/msmarco.py | 39 ++-- pyserini/prebuilt_index_info.py | 20 +- tests/test_prebuilt_index.py | 2 +- 11 files changed, 426 insertions(+), 401 deletions(-) diff --git a/docs/2cr/msmarco-v1-doc.html b/docs/2cr/msmarco-v1-doc.html index d1a21ca2b..dda5f3db0 100644 --- a/docs/2cr/msmarco-v1-doc.html +++ b/docs/2cr/msmarco-v1-doc.html @@ -215,7 +215,7 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-slim \
+  --index msmarco-v1-doc \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-doc-default.dl19.txt \
   --bm25 --k1 0.9 --b 0.4
@@ -236,7 +236,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-slim \
+  --index msmarco-v1-doc \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-doc-default.dl20.txt \
   --bm25 --k1 0.9 --b 0.4
@@ -257,7 +257,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-slim \
+  --index msmarco-v1-doc \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-doc-default.dev.txt \
   --bm25 --k1 0.9 --b 0.4
@@ -319,7 +319,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-slim \
+  --index msmarco-v1-doc-segmented \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-doc-segmented-default.dl19.txt \
   --bm25 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -340,7 +340,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-slim \
+  --index msmarco-v1-doc-segmented \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-doc-segmented-default.dl20.txt \
   --bm25 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -361,7 +361,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-slim \
+  --index msmarco-v1-doc-segmented \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-doc-segmented-default.dev.txt \
   --bm25 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -423,7 +423,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-rm3-doc-default.dl19.txt \
   --bm25 --rm3 --k1 0.9 --b 0.4
@@ -444,7 +444,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-rm3-doc-default.dl20.txt \
   --bm25 --rm3 --k1 0.9 --b 0.4
@@ -465,7 +465,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-rm3-doc-default.dev.txt \
   --bm25 --rm3 --k1 0.9 --b 0.4
@@ -527,7 +527,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-rm3-doc-segmented-default.dl19.txt \
   --bm25 --rm3 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -548,7 +548,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-rm3-doc-segmented-default.dl20.txt \
   --bm25 --rm3 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -569,7 +569,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-rm3-doc-segmented-default.dev.txt \
   --bm25 --rm3 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -631,7 +631,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-default.dl19.txt \
   --bm25 --rocchio --k1 0.9 --b 0.4
@@ -652,7 +652,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-default.dl20.txt \
   --bm25 --rocchio --k1 0.9 --b 0.4
@@ -673,7 +673,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-default.dev.txt \
   --bm25 --rocchio --k1 0.9 --b 0.4
@@ -735,7 +735,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-segmented-default.dl19.txt \
   --bm25 --rocchio --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -756,7 +756,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-segmented-default.dl20.txt \
   --bm25 --rocchio --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -777,7 +777,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-segmented-default.dev.txt \
   --bm25 --rocchio --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -840,7 +840,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-slim \
+  --index msmarco-v1-doc \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-doc-tuned.dl19.txt \
   --bm25
@@ -861,7 +861,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-slim \
+  --index msmarco-v1-doc \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-doc-tuned.dl20.txt \
   --bm25
@@ -882,7 +882,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-slim \
+  --index msmarco-v1-doc \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-doc-tuned.dev.txt \
   --bm25
@@ -944,7 +944,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-slim \
+  --index msmarco-v1-doc-segmented \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-doc-segmented-tuned.dl19.txt \
   --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -965,7 +965,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-slim \
+  --index msmarco-v1-doc-segmented \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-doc-segmented-tuned.dl20.txt \
   --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -986,7 +986,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-slim \
+  --index msmarco-v1-doc-segmented \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-doc-segmented-tuned.dev.txt \
   --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -1048,7 +1048,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-rm3-doc-tuned.dl19.txt \
   --bm25 --rm3
@@ -1069,7 +1069,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-rm3-doc-tuned.dl20.txt \
   --bm25 --rm3
@@ -1090,7 +1090,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-rm3-doc-tuned.dev.txt \
   --bm25 --rm3
@@ -1152,7 +1152,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-rm3-doc-segmented-tuned.dl19.txt \
   --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -1173,7 +1173,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-rm3-doc-segmented-tuned.dl20.txt \
   --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -1194,7 +1194,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-rm3-doc-segmented-tuned.dev.txt \
   --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -1256,7 +1256,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-tuned.dl19.txt \
   --bm25 --rocchio
@@ -1277,7 +1277,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-tuned.dl20.txt \
   --bm25 --rocchio
@@ -1298,7 +1298,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-full \
+  --index msmarco-v1-doc \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-tuned.dev.txt \
   --bm25 --rocchio
@@ -1360,7 +1360,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics dl19-doc \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-segmented-tuned.dl19.txt \
   --bm25 --rocchio --hits 10000 --max-passage-hits 1000 --max-passage
@@ -1381,7 +1381,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics dl20 \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-segmented-tuned.dl20.txt \
   --bm25 --rocchio --hits 10000 --max-passage-hits 1000 --max-passage
@@ -1402,7 +1402,7 @@ 

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-full \
+  --index msmarco-v1-doc-segmented \
   --topics msmarco-doc-dev \
   --output run.msmarco-v1-doc.bm25-rocchio-doc-segmented-tuned.dev.txt \
   --bm25 --rocchio --hits 10000 --max-passage-hits 1000 --max-passage
@@ -2360,21 +2360,21 @@ 

MS MARCO V1 Document

- + -[1] — (3b) -uniCOIL (w/ doc2query-T5): pre-encoded queries -0.2789 -0.6396 -0.6652 + +uniCOIL (noexp): query inference with PyTorch +0.2661 +0.6347 +0.6385 -0.3882 -0.6033 -0.7869 +0.3698 +0.5906 +0.7621 -0.3531 -0.9546 +0.3410 +0.9420 @@ -2403,17 +2403,18 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-unicoil \
-  --topics dl19-doc-unicoil \
-  --output run.msmarco-v1-doc.unicoil.dl19.txt \
+  --index msmarco-v1-doc-segmented-unicoil-noexp \
+  --topics dl19-doc \
+  --encoder castorini/unicoil-noexp-msmarco-passage \
+  --output run.msmarco-v1-doc.unicoil-noexp-pytorch.dl19.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 100 -m map dl19-doc run.msmarco-v1-doc.unicoil.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-doc run.msmarco-v1-doc.unicoil.dl19.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 dl19-doc run.msmarco-v1-doc.unicoil.dl19.txt
+
python -m pyserini.eval.trec_eval -c -M 100 -m map dl19-doc run.msmarco-v1-doc.unicoil-noexp-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-doc run.msmarco-v1-doc.unicoil-noexp-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 dl19-doc run.msmarco-v1-doc.unicoil-noexp-pytorch.dl19.txt
 
@@ -2424,17 +2425,18 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-unicoil \
-  --topics dl20-unicoil \
-  --output run.msmarco-v1-doc.unicoil.dl20.txt \
+  --index msmarco-v1-doc-segmented-unicoil-noexp \
+  --topics dl20 \
+  --encoder castorini/unicoil-noexp-msmarco-passage \
+  --output run.msmarco-v1-doc.unicoil-noexp-pytorch.dl20.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 100 -m map dl20-doc run.msmarco-v1-doc.unicoil.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-doc run.msmarco-v1-doc.unicoil.dl20.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 dl20-doc run.msmarco-v1-doc.unicoil.dl20.txt
+
python -m pyserini.eval.trec_eval -c -M 100 -m map dl20-doc run.msmarco-v1-doc.unicoil-noexp-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-doc run.msmarco-v1-doc.unicoil-noexp-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 dl20-doc run.msmarco-v1-doc.unicoil-noexp-pytorch.dl20.txt
 
@@ -2445,16 +2447,17 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-unicoil \
-  --topics msmarco-doc-dev-unicoil \
-  --output run.msmarco-v1-doc.unicoil.dev.txt \
+  --index msmarco-v1-doc-segmented-unicoil-noexp \
+  --topics msmarco-doc-dev \
+  --encoder castorini/unicoil-noexp-msmarco-passage \
+  --output run.msmarco-v1-doc.unicoil-noexp-pytorch.dev.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 100 -m recip_rank msmarco-doc-dev run.msmarco-v1-doc.unicoil.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-doc-dev run.msmarco-v1-doc.unicoil.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 100 -m recip_rank msmarco-doc-dev run.msmarco-v1-doc.unicoil-noexp-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-doc-dev run.msmarco-v1-doc.unicoil-noexp-pytorch.dev.txt
 
@@ -2465,21 +2468,21 @@

MS MARCO V1 Document

- + - -uniCOIL (noexp): on-the-fly query inference -0.2661 -0.6347 -0.6385 +[1] — (3b) +uniCOIL (w/ doc2query-T5): pre-encoded queries +0.2789 +0.6396 +0.6652 -0.3698 -0.5906 -0.7621 +0.3882 +0.6033 +0.7869 -0.3410 -0.9420 +0.3531 +0.9546 @@ -2508,17 +2511,17 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-unicoil-noexp \
-  --topics dl19-doc --encoder castorini/unicoil-noexp-msmarco-passage \
-  --output run.msmarco-v1-doc.unicoil-noexp-otf.dl19.txt \
+  --index msmarco-v1-doc-segmented-unicoil \
+  --topics dl19-doc-unicoil \
+  --output run.msmarco-v1-doc.unicoil.dl19.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 100 -m map dl19-doc run.msmarco-v1-doc.unicoil-noexp-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-doc run.msmarco-v1-doc.unicoil-noexp-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 dl19-doc run.msmarco-v1-doc.unicoil-noexp-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -M 100 -m map dl19-doc run.msmarco-v1-doc.unicoil.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-doc run.msmarco-v1-doc.unicoil.dl19.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 dl19-doc run.msmarco-v1-doc.unicoil.dl19.txt
 
@@ -2529,17 +2532,17 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-unicoil-noexp \
-  --topics dl20 --encoder castorini/unicoil-noexp-msmarco-passage \
-  --output run.msmarco-v1-doc.unicoil-noexp-otf.dl20.txt \
+  --index msmarco-v1-doc-segmented-unicoil \
+  --topics dl20-unicoil \
+  --output run.msmarco-v1-doc.unicoil.dl20.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 100 -m map dl20-doc run.msmarco-v1-doc.unicoil-noexp-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-doc run.msmarco-v1-doc.unicoil-noexp-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 dl20-doc run.msmarco-v1-doc.unicoil-noexp-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -M 100 -m map dl20-doc run.msmarco-v1-doc.unicoil.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-doc run.msmarco-v1-doc.unicoil.dl20.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 dl20-doc run.msmarco-v1-doc.unicoil.dl20.txt
 
@@ -2550,16 +2553,16 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v1-doc-segmented-unicoil-noexp \
-  --topics msmarco-doc-dev --encoder castorini/unicoil-noexp-msmarco-passage \
-  --output run.msmarco-v1-doc.unicoil-noexp-otf.dev.txt \
+  --index msmarco-v1-doc-segmented-unicoil \
+  --topics msmarco-doc-dev-unicoil \
+  --output run.msmarco-v1-doc.unicoil.dev.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 100 -m recip_rank msmarco-doc-dev run.msmarco-v1-doc.unicoil-noexp-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-doc-dev run.msmarco-v1-doc.unicoil-noexp-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 100 -m recip_rank msmarco-doc-dev run.msmarco-v1-doc.unicoil.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-doc-dev run.msmarco-v1-doc.unicoil.dev.txt
 
@@ -2569,11 +2572,11 @@

MS MARCO V1 Document

- + -uniCOIL (w/ doc2query-T5): on-the-fly query inference +uniCOIL (w/ doc2query-T5): query inference with PyTorch 0.2789 0.6396 0.6654 @@ -2613,16 +2616,17 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v1-doc-segmented-unicoil \
-  --topics dl19-doc --encoder castorini/unicoil-msmarco-passage \
-  --output run.msmarco-v1-doc.unicoil-otf.dl19.txt \
+  --topics dl19-doc \
+  --encoder castorini/unicoil-msmarco-passage \
+  --output run.msmarco-v1-doc.unicoil-pytorch.dl19.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 100 -m map dl19-doc run.msmarco-v1-doc.unicoil-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-doc run.msmarco-v1-doc.unicoil-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 dl19-doc run.msmarco-v1-doc.unicoil-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -M 100 -m map dl19-doc run.msmarco-v1-doc.unicoil-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-doc run.msmarco-v1-doc.unicoil-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 dl19-doc run.msmarco-v1-doc.unicoil-pytorch.dl19.txt
 
@@ -2634,16 +2638,17 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v1-doc-segmented-unicoil \
-  --topics dl20 --encoder castorini/unicoil-msmarco-passage \
-  --output run.msmarco-v1-doc.unicoil-otf.dl20.txt \
+  --topics dl20 \
+  --encoder castorini/unicoil-msmarco-passage \
+  --output run.msmarco-v1-doc.unicoil-pytorch.dl20.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 100 -m map dl20-doc run.msmarco-v1-doc.unicoil-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-doc run.msmarco-v1-doc.unicoil-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 dl20-doc run.msmarco-v1-doc.unicoil-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -M 100 -m map dl20-doc run.msmarco-v1-doc.unicoil-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-doc run.msmarco-v1-doc.unicoil-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 dl20-doc run.msmarco-v1-doc.unicoil-pytorch.dl20.txt
 
@@ -2655,15 +2660,16 @@

MS MARCO V1 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v1-doc-segmented-unicoil \
-  --topics msmarco-doc-dev --encoder castorini/unicoil-msmarco-passage \
-  --output run.msmarco-v1-doc.unicoil-otf.dev.txt \
+  --topics msmarco-doc-dev \
+  --encoder castorini/unicoil-msmarco-passage \
+  --output run.msmarco-v1-doc.unicoil-pytorch.dev.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 100 -m recip_rank msmarco-doc-dev run.msmarco-v1-doc.unicoil-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-doc-dev run.msmarco-v1-doc.unicoil-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 100 -m recip_rank msmarco-doc-dev run.msmarco-v1-doc.unicoil-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-doc-dev run.msmarco-v1-doc.unicoil-pytorch.dev.txt
 
diff --git a/docs/2cr/msmarco-v1-passage.html b/docs/2cr/msmarco-v1-passage.html index e18001ad6..bc29d5ec8 100644 --- a/docs/2cr/msmarco-v1-passage.html +++ b/docs/2cr/msmarco-v1-passage.html @@ -1526,11 +1526,11 @@

MS MARCO V1 Passage

- + -uniCOIL (w/ doc2query-T5): on-the-fly query inference with PyTorch +uniCOIL (w/ doc2query-T5): query inference with PyTorch 0.4617 0.7027 0.8291 @@ -1572,15 +1572,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil \ --topics dl19-passage \ --encoder castorini/unicoil-msmarco-passage \ - --output run.msmarco-v1-passage.unicoil-otf.dl19.txt \ + --output run.msmarco-v1-passage.unicoil-pytorch.dl19.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.unicoil-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.unicoil-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.unicoil-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.unicoil-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.unicoil-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.unicoil-pytorch.dl19.txt
 
@@ -1594,15 +1594,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil \ --topics dl20 \ --encoder castorini/unicoil-msmarco-passage \ - --output run.msmarco-v1-passage.unicoil-otf.dl20.txt \ + --output run.msmarco-v1-passage.unicoil-pytorch.dl20.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.unicoil-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.unicoil-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.unicoil-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.unicoil-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.unicoil-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.unicoil-pytorch.dl20.txt
 
@@ -1616,14 +1616,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil \ --topics msmarco-passage-dev-subset \ --encoder castorini/unicoil-msmarco-passage \ - --output run.msmarco-v1-passage.unicoil-otf.dev.txt \ + --output run.msmarco-v1-passage.unicoil-pytorch.dev.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-pytorch.dev.txt
 
@@ -1633,11 +1633,11 @@

MS MARCO V1 Passage

- + -uniCOIL (w/ doc2query-T5): on-the-fly query inference with ONNX Runtime +uniCOIL (w/ doc2query-T5): query inference with ONNX 0.4617 0.7027 0.8291 @@ -1679,15 +1679,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil \ --topics dl19-passage \ --onnx-encoder UniCoil \ - --output run.msmarco-v1-passage.unicoil-otf-onnx.dl19.txt \ + --output run.msmarco-v1-passage.unicoil-onnx.dl19.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.unicoil-otf-onnx.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.unicoil-otf-onnx.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.unicoil-otf-onnx.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.unicoil-onnx.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.unicoil-onnx.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.unicoil-onnx.dl19.txt
 
@@ -1701,15 +1701,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil \ --topics dl20 \ --onnx-encoder UniCoil \ - --output run.msmarco-v1-passage.unicoil-otf-onnx.dl20.txt \ + --output run.msmarco-v1-passage.unicoil-onnx.dl20.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.unicoil-otf-onnx.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.unicoil-otf-onnx.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.unicoil-otf-onnx.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.unicoil-onnx.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.unicoil-onnx.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.unicoil-onnx.dl20.txt
 
@@ -1723,14 +1723,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil \ --topics msmarco-passage-dev-subset \ --onnx-encoder UniCoil \ - --output run.msmarco-v1-passage.unicoil-otf-onnx.dev.txt \ + --output run.msmarco-v1-passage.unicoil-onnx.dev.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-otf-onnx.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-otf-onnx.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-onnx.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-onnx.dev.txt
 
@@ -1844,11 +1844,11 @@

MS MARCO V1 Passage

- + -uniCOIL (noexp): on-the-fly query inference with PyTorch +uniCOIL (noexp): query inference with PyTorch 0.4033 0.6434 0.7752 @@ -1890,15 +1890,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil-noexp \ --topics dl19-passage \ --encoder castorini/unicoil-noexp-msmarco-passage \ - --output run.msmarco-v1-passage.unicoil-noexp-otf.dl19.txt \ + --output run.msmarco-v1-passage.unicoil-noexp-pytorch.dl19.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.unicoil-noexp-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.unicoil-noexp-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.unicoil-noexp-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.unicoil-noexp-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.unicoil-noexp-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.unicoil-noexp-pytorch.dl19.txt
 
@@ -1912,15 +1912,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil-noexp \ --topics dl20 \ --encoder castorini/unicoil-noexp-msmarco-passage \ - --output run.msmarco-v1-passage.unicoil-noexp-otf.dl20.txt \ + --output run.msmarco-v1-passage.unicoil-noexp-pytorch.dl20.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.unicoil-noexp-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.unicoil-noexp-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.unicoil-noexp-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.unicoil-noexp-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.unicoil-noexp-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.unicoil-noexp-pytorch.dl20.txt
 
@@ -1934,14 +1934,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil-noexp \ --topics msmarco-passage-dev-subset \ --encoder castorini/unicoil-noexp-msmarco-passage \ - --output run.msmarco-v1-passage.unicoil-noexp-otf.dev.txt \ + --output run.msmarco-v1-passage.unicoil-noexp-pytorch.dev.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-noexp-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-noexp-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-noexp-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-noexp-pytorch.dev.txt
 
@@ -1951,11 +1951,11 @@

MS MARCO V1 Passage

- + -uniCOIL (noexp): on-the-fly query inference with ONNX Runtime +uniCOIL (noexp): query inference with ONNX 0.4061 0.6531 0.7809 @@ -1997,15 +1997,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil-noexp \ --topics dl19-passage \ --onnx-encoder UniCoil \ - --output run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dl19.txt \ + --output run.msmarco-v1-passage.unicoil-noexp-onnx.dl19.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.unicoil-noexp-onnx.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.unicoil-noexp-onnx.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.unicoil-noexp-onnx.dl19.txt
 
@@ -2019,15 +2019,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil-noexp \ --topics dl20 \ --onnx-encoder UniCoil \ - --output run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dl20.txt \ + --output run.msmarco-v1-passage.unicoil-noexp-onnx.dl20.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.unicoil-noexp-onnx.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.unicoil-noexp-onnx.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.unicoil-noexp-onnx.dl20.txt
 
@@ -2041,14 +2041,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-unicoil-noexp \ --topics msmarco-passage-dev-subset \ --onnx-encoder UniCoil \ - --output run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dev.txt \ + --output run.msmarco-v1-passage.unicoil-noexp-onnx.dev.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-noexp-otf-onnx.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-noexp-onnx.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.unicoil-noexp-onnx.dev.txt
 
@@ -2059,11 +2059,11 @@

MS MARCO V1 Passage

- + [2] -SPLADE++ CoCondenser-EnsembleDistil with ONNX Runtime +SPLADE++ EnsembleDistil: query inference with ONNX 0.5054 0.7320 0.8724 @@ -2105,15 +2105,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-splade-pp-ed \ --topics dl19-passage \ --onnx-encoder SpladePlusPlusEnsembleDistil \ - --output run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dl19.txt \ + --output run.msmarco-v1-passage.splade-pp-ed-onnx.dl19.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.splade-pp-ed-onnx.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.splade-pp-ed-onnx.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.splade-pp-ed-onnx.dl19.txt
 
@@ -2127,15 +2127,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-splade-pp-ed \ --topics dl20 \ --onnx-encoder SpladePlusPlusEnsembleDistil \ - --output run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dl20.txt \ + --output run.msmarco-v1-passage.splade-pp-ed-onnx.dl20.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.splade-pp-ed-onnx.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.splade-pp-ed-onnx.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.splade-pp-ed-onnx.dl20.txt
 
@@ -2149,14 +2149,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-splade-pp-ed \ --topics msmarco-passage-dev-subset \ --onnx-encoder SpladePlusPlusEnsembleDistil \ - --output run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dev.txt \ + --output run.msmarco-v1-passage.splade-pp-ed-onnx.dev.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.splade-pp-ed-otf-onnx.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.splade-pp-ed-onnx.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.splade-pp-ed-onnx.dev.txt
 
@@ -2166,11 +2166,11 @@

MS MARCO V1 Passage

- + [2] -SPLADE++ CoCondenser-SelfDistil with ONNX Runtime +SPLADE++ SelfDistil: query inference with ONNX 0.4997 0.7356 0.8758 @@ -2212,15 +2212,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-splade-pp-sd \ --topics dl19-passage \ --onnx-encoder SpladePlusPlusSelfDistil \ - --output run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dl19.txt \ + --output run.msmarco-v1-passage.splade-pp-sd-onnx.dl19.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.splade-pp-sd-onnx.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.splade-pp-sd-onnx.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.splade-pp-sd-onnx.dl19.txt
 
@@ -2234,15 +2234,15 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-splade-pp-sd \ --topics dl20 \ --onnx-encoder SpladePlusPlusSelfDistil \ - --output run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dl20.txt \ + --output run.msmarco-v1-passage.splade-pp-sd-onnx.dl20.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.splade-pp-sd-onnx.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.splade-pp-sd-onnx.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.splade-pp-sd-onnx.dl20.txt
 
@@ -2256,14 +2256,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage-splade-pp-sd \ --topics msmarco-passage-dev-subset \ --onnx-encoder SpladePlusPlusSelfDistil \ - --output run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dev.txt \ + --output run.msmarco-v1-passage.splade-pp-sd-onnx.dev.txt \ --hits 1000 --impact
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.splade-pp-sd-otf-onnx.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.splade-pp-sd-onnx.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.splade-pp-sd-onnx.dev.txt
 
@@ -2375,11 +2375,11 @@

MS MARCO V1 Passage

- + [3] -ANCE: on-the-fly query inference with PyTorch +ANCE: query inference with PyTorch 0.3710 0.6452 0.7554 @@ -2421,14 +2421,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.ance \ --topics dl19-passage \ --encoder castorini/ance-msmarco-passage \ - --output run.msmarco-v1-passage.ance-otf.dl19.txt + --output run.msmarco-v1-passage.ance-pytorch.dl19.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.ance-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.ance-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.ance-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.ance-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.ance-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.ance-pytorch.dl19.txt
 
@@ -2442,14 +2442,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.ance \ --topics dl20 \ --encoder castorini/ance-msmarco-passage \ - --output run.msmarco-v1-passage.ance-otf.dl20.txt + --output run.msmarco-v1-passage.ance-pytorch.dl20.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.ance-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.ance-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.ance-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.ance-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.ance-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.ance-pytorch.dl20.txt
 
@@ -2463,13 +2463,13 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.ance \ --topics msmarco-passage-dev-subset \ --encoder castorini/ance-msmarco-passage \ - --output run.msmarco-v1-passage.ance-otf.dev.txt + --output run.msmarco-v1-passage.ance-pytorch.dev.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.ance-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.ance-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.ance-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.ance-pytorch.dev.txt
 
@@ -2581,11 +2581,11 @@

MS MARCO V1 Passage

- + [4] -DistilBERT KD: on-the-fly query inference with PyTorch +DistilBERT KD: query inference with PyTorch 0.4053 0.6994 0.7653 @@ -2627,14 +2627,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.distilbert-dot-margin-mse-t2 \ --topics dl19-passage \ --encoder sebastian-hofstaetter/distilbert-dot-margin_mse-T2-msmarco \ - --output run.msmarco-v1-passage.distilbert-kd-otf.dl19.txt + --output run.msmarco-v1-passage.distilbert-kd-pytorch.dl19.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.distilbert-kd-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.distilbert-kd-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.distilbert-kd-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.distilbert-kd-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.distilbert-kd-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.distilbert-kd-pytorch.dl19.txt
 
@@ -2648,14 +2648,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.distilbert-dot-margin-mse-t2 \ --topics dl20 \ --encoder sebastian-hofstaetter/distilbert-dot-margin_mse-T2-msmarco \ - --output run.msmarco-v1-passage.distilbert-kd-otf.dl20.txt + --output run.msmarco-v1-passage.distilbert-kd-pytorch.dl20.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.distilbert-kd-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.distilbert-kd-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.distilbert-kd-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.distilbert-kd-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.distilbert-kd-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.distilbert-kd-pytorch.dl20.txt
 
@@ -2669,13 +2669,13 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.distilbert-dot-margin-mse-t2 \ --topics msmarco-passage-dev-subset \ --encoder sebastian-hofstaetter/distilbert-dot-margin_mse-T2-msmarco \ - --output run.msmarco-v1-passage.distilbert-kd-otf.dev.txt + --output run.msmarco-v1-passage.distilbert-kd-pytorch.dev.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.distilbert-kd-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.distilbert-kd-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.distilbert-kd-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.distilbert-kd-pytorch.dev.txt
 
@@ -2786,11 +2786,11 @@

MS MARCO V1 Passage

- + [5] -DistilBERT KD TASB: on-the-fly query inference with PyTorch +DistilBERT KD TASB: query inference with PyTorch 0.4590 0.7210 0.8406 @@ -2832,14 +2832,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.distilbert-dot-tas_b-b256 \ --topics dl19-passage \ --encoder sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco \ - --output run.msmarco-v1-passage.distilbert-kd-tasb-otf.dl19.txt + --output run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dl19.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.distilbert-kd-tasb-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.distilbert-kd-tasb-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.distilbert-kd-tasb-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dl19.txt
 
@@ -2853,14 +2853,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.distilbert-dot-tas_b-b256 \ --topics dl20 \ --encoder sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco \ - --output run.msmarco-v1-passage.distilbert-kd-tasb-otf.dl20.txt + --output run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dl20.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.distilbert-kd-tasb-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.distilbert-kd-tasb-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.distilbert-kd-tasb-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dl20.txt
 
@@ -2874,13 +2874,13 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.distilbert-dot-tas_b-b256 \ --topics msmarco-passage-dev-subset \ --encoder sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco \ - --output run.msmarco-v1-passage.distilbert-kd-tasb-otf.dev.txt + --output run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dev.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.distilbert-kd-tasb-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.distilbert-kd-tasb-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.distilbert-kd-tasb-pytorch.dev.txt
 
@@ -2992,11 +2992,11 @@

MS MARCO V1 Passage

- + [6] -TCT_ColBERT-V2-HN+: on-the-fly query inference with PyTorch +TCT_ColBERT-V2-HN+: query inference with PyTorch 0.4469 0.7204 0.8261 @@ -3038,14 +3038,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.tct_colbert-v2-hnp \ --topics dl19-passage \ --encoder castorini/tct_colbert-v2-hnp-msmarco \ - --output run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dl19.txt + --output run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dl19.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dl19.txt
 
@@ -3059,14 +3059,14 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.tct_colbert-v2-hnp \ --topics dl20 \ --encoder castorini/tct_colbert-v2-hnp-msmarco \ - --output run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dl20.txt + --output run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dl20.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dl20.txt
 
@@ -3080,13 +3080,13 @@

MS MARCO V1 Passage

--index msmarco-v1-passage.tct_colbert-v2-hnp \ --topics msmarco-passage-dev-subset \ --encoder castorini/tct_colbert-v2-hnp-msmarco \ - --output run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dev.txt + --output run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dev.txt
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.tct_colbert-v2-hnp-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.tct_colbert-v2-hnp-pytorch.dev.txt
 
@@ -3097,11 +3097,11 @@

MS MARCO V1 Passage

- + [7] -SLIM: on-the-fly query inference with PyTorch +SLIM: query inference with PyTorch 0.4509 0.7010 0.8241 @@ -3207,11 +3207,11 @@

MS MARCO V1 Passage

- + [7] -SLIM++: on-the-fly query inference with PyTorch +SLIM++: query inference with PyTorch 0.4687 0.7140 0.8415 @@ -3318,11 +3318,11 @@

MS MARCO V1 Passage

- + [8] -Aggretriever-DistilBERT: on-the-fly query inference with PyTorch +Aggretriever-DistilBERT: query inference with PyTorch 0.4301 0.6816 0.8023 @@ -3361,17 +3361,17 @@

MS MARCO V1 Passage

python -m pyserini.search.faiss \
   --threads 16 --batch-size 512 \
-  --index msmarco-passage.aggretriever-distilbert \
+  --index msmarco-v1-passage.aggretriever-distilbert \
   --topics dl19-passage \
   --encoder castorini/aggretriever-distilbert \
-  --output run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dl19.txt
+  --output run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dl19.txt
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dl19.txt
 
@@ -3382,17 +3382,17 @@

MS MARCO V1 Passage

python -m pyserini.search.faiss \
   --threads 16 --batch-size 512 \
-  --index msmarco-passage.aggretriever-distilbert \
+  --index msmarco-v1-passage.aggretriever-distilbert \
   --topics dl20 \
   --encoder castorini/aggretriever-distilbert \
-  --output run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dl20.txt
+  --output run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dl20.txt
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dl20.txt
 
@@ -3403,16 +3403,16 @@

MS MARCO V1 Passage

python -m pyserini.search.faiss \
   --threads 16 --batch-size 512 \
-  --index msmarco-passage.aggretriever-distilbert \
+  --index msmarco-v1-passage.aggretriever-distilbert \
   --topics msmarco-passage-dev-subset \
   --encoder castorini/aggretriever-distilbert \
-  --output run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dev.txt
+  --output run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dev.txt
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.Aggretriever-Distilbert-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.aggretriever-distilbert-pytorch.dev.txt
 
@@ -3422,11 +3422,11 @@

MS MARCO V1 Passage

- + [8] -Aggretriever-coCondenser: on-the-fly query inference with PyTorch +Aggretriever-coCondenser: query inference with PyTorch 0.4350 0.6837 0.8078 @@ -3465,17 +3465,17 @@

MS MARCO V1 Passage

python -m pyserini.search.faiss \
   --threads 16 --batch-size 512 \
-  --index msmarco-passage.aggretriever-cocondenser \
+  --index msmarco-v1-passage.aggretriever-cocondenser \
   --topics dl19-passage \
   --encoder castorini/aggretriever-cocondenser \
-  --output run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dl19.txt
+  --output run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dl19.txt
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dl19.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dl19.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dl19.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dl19.txt
 
@@ -3486,17 +3486,17 @@

MS MARCO V1 Passage

python -m pyserini.search.faiss \
   --threads 16 --batch-size 512 \
-  --index msmarco-passage.aggretriever-cocondenser \
+  --index msmarco-v1-passage.aggretriever-cocondenser \
   --topics dl20 \
   --encoder castorini/aggretriever-cocondenser \
-  --output run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dl20.txt
+  --output run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dl20.txt
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dl20.txt
-python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dl20.txt
+
python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dl20.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dl20.txt
 
@@ -3507,16 +3507,16 @@

MS MARCO V1 Passage

python -m pyserini.search.faiss \
   --threads 16 --batch-size 512 \
-  --index msmarco-passage.aggretriever-cocondenser \
+  --index msmarco-v1-passage.aggretriever-cocondenser \
   --topics msmarco-passage-dev-subset \
   --encoder castorini/aggretriever-cocondenser \
-  --output run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dev.txt
+  --output run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dev.txt
 
Evaluation commands:
-
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dev.txt
-python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.Aggretriever-coCondenser-otf.dev.txt
+
python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset run.msmarco-v1-passage.aggretriever-cocondenser-pytorch.dev.txt
 
diff --git a/docs/2cr/msmarco-v2-doc.html b/docs/2cr/msmarco-v2-doc.html index 5e5e05fdd..6e2691f1e 100644 --- a/docs/2cr/msmarco-v2-doc.html +++ b/docs/2cr/msmarco-v2-doc.html @@ -222,7 +222,7 @@

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-slim \
+  --index msmarco-v2-doc \
   --topics dl21 \
   --output run.msmarco-v2-doc.bm25-doc-default.dl21.txt \
   --bm25
@@ -245,7 +245,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-slim \
+  --index msmarco-v2-doc \
   --topics msmarco-v2-doc-dev \
   --output run.msmarco-v2-doc.bm25-doc-default.dev.txt \
   --bm25
@@ -265,7 +265,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-slim \
+  --index msmarco-v2-doc \
   --topics msmarco-v2-doc-dev2 \
   --output run.msmarco-v2-doc.bm25-doc-default.dev2.txt \
   --bm25
@@ -328,7 +328,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-segmented-slim \
+  --index msmarco-v2-doc-segmented \
   --topics dl21 \
   --output run.msmarco-v2-doc.bm25-doc-segmented-default.dl21.txt \
   --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -351,7 +351,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-segmented-slim \
+  --index msmarco-v2-doc-segmented \
   --topics msmarco-v2-doc-dev \
   --output run.msmarco-v2-doc.bm25-doc-segmented-default.dev.txt \
   --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -371,7 +371,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-segmented-slim \
+  --index msmarco-v2-doc-segmented \
   --topics msmarco-v2-doc-dev2 \
   --output run.msmarco-v2-doc.bm25-doc-segmented-default.dev2.txt \
   --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -434,7 +434,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-full \
+  --index msmarco-v2-doc \
   --topics dl21 \
   --output run.msmarco-v2-doc.bm25-rm3-doc-default.dl21.txt \
   --bm25 --rm3
@@ -457,7 +457,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-full \
+  --index msmarco-v2-doc \
   --topics msmarco-v2-doc-dev \
   --output run.msmarco-v2-doc.bm25-rm3-doc-default.dev.txt \
   --bm25 --rm3
@@ -477,7 +477,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-full \
+  --index msmarco-v2-doc \
   --topics msmarco-v2-doc-dev2 \
   --output run.msmarco-v2-doc.bm25-rm3-doc-default.dev2.txt \
   --bm25 --rm3
@@ -540,7 +540,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-segmented-full \
+  --index msmarco-v2-doc-segmented \
   --topics dl21 \
   --output run.msmarco-v2-doc.bm25-rm3-doc-segmented-default.dl21.txt \
   --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -563,7 +563,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-segmented-full \
+  --index msmarco-v2-doc-segmented \
   --topics msmarco-v2-doc-dev \
   --output run.msmarco-v2-doc.bm25-rm3-doc-segmented-default.dev.txt \
   --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -583,7 +583,7 @@ 

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-doc-segmented-full \
+  --index msmarco-v2-doc-segmented \
   --topics msmarco-v2-doc-dev2 \
   --output run.msmarco-v2-doc.bm25-rm3-doc-segmented-default.dev2.txt \
   --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
@@ -1241,11 +1241,11 @@ 

MS MARCO V2 Document

- + -uniCOIL (noexp): on-the-fly query inference +uniCOIL (noexp): query inference with PyTorch 0.2589 0.6501 0.9282 @@ -1286,7 +1286,8 @@

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-doc-segmented-unicoil-noexp-0shot \
-  --topics dl21 --encoder castorini/unicoil-noexp-msmarco-passage \
+  --topics dl21 \
+  --encoder castorini/unicoil-noexp-msmarco-passage \
   --output run.msmarco-v2-doc.unicoil-noexp-otf.dl21.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
@@ -1309,7 +1310,8 @@

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-doc-segmented-unicoil-noexp-0shot \
-  --topics msmarco-v2-doc-dev --encoder castorini/unicoil-noexp-msmarco-passage \
+  --topics msmarco-v2-doc-dev \
+  --encoder castorini/unicoil-noexp-msmarco-passage \
   --output run.msmarco-v2-doc.unicoil-noexp-otf.dev.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
@@ -1329,7 +1331,8 @@

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-doc-segmented-unicoil-noexp-0shot \
-  --topics msmarco-v2-doc-dev2 --encoder castorini/unicoil-noexp-msmarco-passage \
+  --topics msmarco-v2-doc-dev2 \
+  --encoder castorini/unicoil-noexp-msmarco-passage \
   --output run.msmarco-v2-doc.unicoil-noexp-otf.dev2.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
@@ -1347,11 +1350,11 @@

MS MARCO V2 Document

- + -uniCOIL (w/ doc2query-T5): on-the-fly query inference +uniCOIL (w/ doc2query-T5): query inference with PyTorch 0.2720 0.6782 0.9684 @@ -1392,7 +1395,8 @@

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-doc-segmented-unicoil-0shot \
-  --topics dl21 --encoder castorini/unicoil-msmarco-passage \
+  --topics dl21 \
+  --encoder castorini/unicoil-msmarco-passage \
   --output run.msmarco-v2-doc.unicoil-otf.dl21.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
@@ -1415,7 +1419,8 @@

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-doc-segmented-unicoil-0shot \
-  --topics msmarco-v2-doc-dev --encoder castorini/unicoil-msmarco-passage \
+  --topics msmarco-v2-doc-dev \
+  --encoder castorini/unicoil-msmarco-passage \
   --output run.msmarco-v2-doc.unicoil-otf.dev.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
@@ -1435,7 +1440,8 @@

MS MARCO V2 Document

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-doc-segmented-unicoil-0shot \
-  --topics msmarco-v2-doc-dev2 --encoder castorini/unicoil-msmarco-passage \
+  --topics msmarco-v2-doc-dev2 \
+  --encoder castorini/unicoil-msmarco-passage \
   --output run.msmarco-v2-doc.unicoil-otf.dev2.txt \
   --impact --hits 10000 --max-passage-hits 1000 --max-passage
 
diff --git a/docs/2cr/msmarco-v2-passage.html b/docs/2cr/msmarco-v2-passage.html index 28b8e32f2..a878ceb88 100644 --- a/docs/2cr/msmarco-v2-passage.html +++ b/docs/2cr/msmarco-v2-passage.html @@ -222,7 +222,7 @@

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-slim \
+  --index msmarco-v2-passage \
   --topics dl21 \
   --output run.msmarco-v2-passage.bm25-default.dl21.txt \
   --bm25
@@ -245,7 +245,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-slim \
+  --index msmarco-v2-passage \
   --topics msmarco-v2-passage-dev \
   --output run.msmarco-v2-passage.bm25-default.dev.txt \
   --bm25
@@ -265,7 +265,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-slim \
+  --index msmarco-v2-passage \
   --topics msmarco-v2-passage-dev2 \
   --output run.msmarco-v2-passage.bm25-default.dev2.txt \
   --bm25
@@ -328,7 +328,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-augmented-slim \
+  --index msmarco-v2-passage-augmented \
   --topics dl21 \
   --output run.msmarco-v2-passage.bm25-augmented-default.dl21.txt \
   --bm25
@@ -351,7 +351,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-augmented-slim \
+  --index msmarco-v2-passage-augmented \
   --topics msmarco-v2-passage-dev \
   --output run.msmarco-v2-passage.bm25-augmented-default.dev.txt \
   --bm25
@@ -371,7 +371,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-augmented-slim \
+  --index msmarco-v2-passage-augmented \
   --topics msmarco-v2-passage-dev2 \
   --output run.msmarco-v2-passage.bm25-augmented-default.dev2.txt \
   --bm25
@@ -434,7 +434,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-full \
+  --index msmarco-v2-passage \
   --topics dl21 \
   --output run.msmarco-v2-passage.bm25-rm3-default.dl21.txt \
   --bm25 --rm3
@@ -457,7 +457,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-full \
+  --index msmarco-v2-passage \
   --topics msmarco-v2-passage-dev \
   --output run.msmarco-v2-passage.bm25-rm3-default.dev.txt \
   --bm25 --rm3
@@ -477,7 +477,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-full \
+  --index msmarco-v2-passage \
   --topics msmarco-v2-passage-dev2 \
   --output run.msmarco-v2-passage.bm25-rm3-default.dev2.txt \
   --bm25 --rm3
@@ -540,7 +540,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-augmented-full \
+  --index msmarco-v2-passage-augmented \
   --topics dl21 \
   --output run.msmarco-v2-passage.bm25-rm3-augmented-default.dl21.txt \
   --bm25 --rm3
@@ -563,7 +563,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-augmented-full \
+  --index msmarco-v2-passage-augmented \
   --topics msmarco-v2-passage-dev \
   --output run.msmarco-v2-passage.bm25-rm3-augmented-default.dev.txt \
   --bm25 --rm3
@@ -583,7 +583,7 @@ 

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
-  --index msmarco-v2-passage-augmented-full \
+  --index msmarco-v2-passage-augmented \
   --topics msmarco-v2-passage-dev2 \
   --output run.msmarco-v2-passage.bm25-rm3-augmented-default.dev2.txt \
   --bm25 --rm3
@@ -1241,11 +1241,11 @@ 

MS MARCO V2 Passage

- + -uniCOIL (noexp): on-the-fly query inference +uniCOIL (noexp): query inference with PyTorch 0.2194 0.5759 0.6991 @@ -1286,7 +1286,8 @@

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-passage-unicoil-noexp-0shot \
-  --topics dl21 --encoder castorini/unicoil-noexp-msmarco-passage \
+  --topics dl21 \
+  --encoder castorini/unicoil-noexp-msmarco-passage \
   --output run.msmarco-v2-passage.unicoil-noexp-otf.dl21.txt \
   --hits 1000 --impact
 
@@ -1309,7 +1310,8 @@

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-passage-unicoil-noexp-0shot \
-  --topics msmarco-v2-passage-dev --encoder castorini/unicoil-noexp-msmarco-passage \
+  --topics msmarco-v2-passage-dev \
+  --encoder castorini/unicoil-noexp-msmarco-passage \
   --output run.msmarco-v2-passage.unicoil-noexp-otf.dev.txt \
   --hits 1000 --impact
 
@@ -1329,7 +1331,8 @@

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-passage-unicoil-noexp-0shot \
-  --topics msmarco-v2-passage-dev2 --encoder castorini/unicoil-noexp-msmarco-passage \
+  --topics msmarco-v2-passage-dev2 \
+  --encoder castorini/unicoil-noexp-msmarco-passage \
   --output run.msmarco-v2-passage.unicoil-noexp-otf.dev2.txt \
   --hits 1000 --impact
 
@@ -1347,11 +1350,11 @@

MS MARCO V2 Passage

- + -uniCOIL (w/ doc2query-T5): on-the-fly query inference +uniCOIL (w/ doc2query-T5): query inference with PyTorch 0.2539 0.6160 0.7311 @@ -1392,7 +1395,8 @@

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-passage-unicoil-0shot \
-  --topics dl21 --encoder castorini/unicoil-msmarco-passage \
+  --topics dl21 \
+  --encoder castorini/unicoil-msmarco-passage \
   --output run.msmarco-v2-passage.unicoil-otf.dl21.txt \
   --hits 1000 --impact
 
@@ -1415,7 +1419,8 @@

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-passage-unicoil-0shot \
-  --topics msmarco-v2-passage-dev --encoder castorini/unicoil-msmarco-passage \
+  --topics msmarco-v2-passage-dev \
+  --encoder castorini/unicoil-msmarco-passage \
   --output run.msmarco-v2-passage.unicoil-otf.dev.txt \
   --hits 1000 --impact
 
@@ -1435,7 +1440,8 @@

MS MARCO V2 Passage

python -m pyserini.search.lucene \
   --threads 16 --batch-size 128 \
   --index msmarco-v2-passage-unicoil-0shot \
-  --topics msmarco-v2-passage-dev2 --encoder castorini/unicoil-msmarco-passage \
+  --topics msmarco-v2-passage-dev2 \
+  --encoder castorini/unicoil-msmarco-passage \
   --output run.msmarco-v2-passage.unicoil-otf.dev2.txt \
   --hits 1000 --impact
 
diff --git a/pyserini/2cr/msmarco-v1-doc.yaml b/pyserini/2cr/msmarco-v1-doc.yaml index bd9d0b4e6..ca015d38c 100644 --- a/pyserini/2cr/msmarco-v1-doc.yaml +++ b/pyserini/2cr/msmarco-v1-doc.yaml @@ -447,9 +447,9 @@ conditions: - MAP: 0.4271 nDCG@10: 0.5851 R@1K: 0.8266 - - name: unicoil-noexp-otf - display: "uniCOIL (noexp): otf" - display-html: "uniCOIL (noexp): on-the-fly query inference" + - name: unicoil-noexp-pytorch + display: "uniCOIL (noexp): query inference with PyTorch" + display-html: "uniCOIL (noexp): query inference with PyTorch" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil-noexp --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage topics: - topic_key: msmarco-doc-dev @@ -492,9 +492,9 @@ conditions: - MAP: 0.3698 nDCG@10: 0.5893 R@1K: 0.7623 - - name: unicoil-otf - display: "uniCOIL (w/ doc2query-T5): otf" - display-html: "uniCOIL (w/ doc2query-T5): on-the-fly query inference" + - name: unicoil-pytorch + display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch" + display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-doc-segmented-unicoil --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage topics: - topic_key: msmarco-doc-dev diff --git a/pyserini/2cr/msmarco-v1-passage.yaml b/pyserini/2cr/msmarco-v1-passage.yaml index 3b5d25f70..ef3932cb3 100644 --- a/pyserini/2cr/msmarco-v1-passage.yaml +++ b/pyserini/2cr/msmarco-v1-passage.yaml @@ -87,9 +87,9 @@ conditions: - MAP: 0.3120 nDCG@10: 0.4908 R@1K: 0.8327 - - name: distilbert-kd-tasb-otf - display: "DistilBERT KD TASB: otf with PyTorch" - display-html: "DistilBERT KD TASB: on-the-fly query inference with PyTorch" + - name: distilbert-kd-tasb-pytorch + display: "DistilBERT KD TASB: query inference with PyTorch" + display-html: "DistilBERT KD TASB: query inference with PyTorch" display-row: "[5]" command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.distilbert-dot-tas_b-b256 --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco --output $output topics: @@ -133,9 +133,9 @@ conditions: - MAP: 0.4698 nDCG@10: 0.6854 R@1K: 0.8727 - - name: distilbert-kd-otf - display: "DistilBERT KD: otf with PyTorch" - display-html: "DistilBERT KD: on-the-fly query inference with PyTorch" + - name: distilbert-kd-pytorch + display: "DistilBERT KD: query inference with PyTorch" + display-html: "DistilBERT KD: query inference with PyTorch" display-row: "[4]" command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.distilbert-dot-margin-mse-t2 --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-margin_mse-T2-msmarco --output $output topics: @@ -179,9 +179,9 @@ conditions: - MAP: 0.4159 nDCG@10: 0.6447 R@1K: 0.7953 - - name: ance-otf - display: "ANCE: otf with PyTorch" - display-html: "ANCE: on-the-fly query inference with PyTorch" + - name: ance-pytorch + display: "ANCE: query inference with PyTorch" + display-html: "ANCE: query inference with PyTorch" display-row: "[3]" command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.ance --topics $topics --encoder castorini/ance-msmarco-passage --output $output topics: @@ -405,9 +405,9 @@ conditions: - MAP: 0.4286 nDCG@10: 0.6131 R@1K: 0.8700 - - name: unicoil-otf - display: "uniCOIL (w/ doc2query-T5): otf with PyTorch" - display-html: "uniCOIL (w/ doc2query-T5): on-the-fly query inference with PyTorch" + - name: unicoil-pytorch + display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch" + display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --hits 1000 --impact topics: - topic_key: msmarco-passage-dev-subset @@ -427,9 +427,9 @@ conditions: - MAP: 0.4429 nDCG@10: 0.6745 R@1K: 0.8433 - - name: unicoil-otf-onnx - display: "uniCOIL (w/ doc2query-T5): otf with ONNX Runtime" - display-html: "uniCOIL (w/ doc2query-T5): on-the-fly query inference with ONNX Runtime" + - name: unicoil-onnx + display: "uniCOIL (w/ doc2query-T5): query inference with ONNX" + display-html: "uniCOIL (w/ doc2query-T5): query inference with ONNX" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil --topics $topics --onnx-encoder UniCoil --output $output --hits 1000 --impact topics: - topic_key: msmarco-passage-dev-subset @@ -472,9 +472,9 @@ conditions: - MAP: 0.4430 nDCG@10: 0.6745 R@1K: 0.8430 - - name: unicoil-noexp-otf - display: "uniCOIL (noexp): otf with PyTorch" - display-html: "uniCOIL (noexp): on-the-fly query inference with PyTorch" + - name: unicoil-noexp-pytorch + display: "uniCOIL (noexp): query inference with PyTorch" + display-html: "uniCOIL (noexp): query inference with PyTorch" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil-noexp --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --hits 1000 --impact topics: - topic_key: msmarco-passage-dev-subset @@ -494,9 +494,9 @@ conditions: - MAP: 0.4022 nDCG@10: 0.6524 R@1K: 0.7861 - - name: unicoil-noexp-otf-onnx - display: "uniCOIL (noexp): otf with ONNX Runtime" - display-html: "uniCOIL (noexp): on-the-fly query inference with ONNX Runtime" + - name: unicoil-noexp-onnx + display: "uniCOIL (noexp): query inference with ONNX" + display-html: "uniCOIL (noexp): query inference with ONNX" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-unicoil-noexp --topics $topics --onnx-encoder UniCoil --output $output --hits 1000 --impact topics: - topic_key: msmarco-passage-dev-subset @@ -539,9 +539,9 @@ conditions: - MAP: 0.4021 nDCG@10: 0.6523 R@1K: 0.7861 - - name: splade-pp-ed-otf-onnx - display: "SPLADE++ CoCondenser-EnsembleDistil with ONNX Runtime" - display-html: "SPLADE++ CoCondenser-EnsembleDistil with ONNX Runtime" + - name: splade-pp-ed-onnx + display: "SPLADE++ EnsembleDistil: query inference with ONNX" + display-html: "SPLADE++ EnsembleDistil: query inference with ONNX" display-row: "[2]" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-splade-pp-ed --topics $topics --onnx-encoder SpladePlusPlusEnsembleDistil --output $output --hits 1000 --impact topics: @@ -562,9 +562,9 @@ conditions: - MAP: 0.5002 nDCG@10: 0.7198 R@1K: 0.8995 - - name: splade-pp-sd-otf-onnx - display: "SPLADE++ CoCondenser-SelfDistil with ONNX Runtime" - display-html: "SPLADE++ CoCondenser-SelfDistil with ONNX Runtime" + - name: splade-pp-sd-onnx + display: "SPLADE++ SelfDistil: query inference with ONNX" + display-html: "SPLADE++ SelfDistil: query inference with ONNX" display-row: "[2]" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v1-passage-splade-pp-sd --topics $topics --onnx-encoder SpladePlusPlusSelfDistil --output $output --hits 1000 --impact topics: @@ -585,9 +585,9 @@ conditions: - MAP: 0.5140 nDCG@10: 0.7285 R@1K: 0.9023 - - name: tct_colbert-v2-hnp-otf - display: "TCT_ColBERT-V2-HN+: otf with PyTorch" - display-html: "TCT_ColBERT-V2-HN+: on-the-fly query inference with PyTorch" + - name: tct_colbert-v2-hnp-pytorch + display: "TCT_ColBERT-V2-HN+: query inference with PyTorch" + display-html: "TCT_ColBERT-V2-HN+: query inference with PyTorch" display-row: "[6]" command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.tct_colbert-v2-hnp --topics $topics --encoder castorini/tct_colbert-v2-hnp-msmarco --output $output topics: @@ -632,8 +632,8 @@ conditions: nDCG@10: 0.6882 R@1K: 0.8429 - name: slimr - display: "SLIM: on-the-fly query inference with PyTorch" - display-html: "SLIM: on-the-fly query inference with PyTorch" + display: "SLIM: query inference with PyTorch" + display-html: "SLIM: query inference with PyTorch" display-row: "[7]" command: python -m pyserini.search.lucene --threads 16 --batch 128 --index msmarco-v1-passage-slimr --topics $topics --encoder castorini/slimr-msmarco-passage --encoded-corpus scipy-sparse-vectors.msmarco-v1-passage-slimr --output $output --output-format msmarco --hits 1000 --impact --min-idf 3 topics: @@ -655,8 +655,8 @@ conditions: nDCG@10: 0.6403 R@1K: 0.8543 - name: slimr-pp - display: "SLIM++: on-the-fly query inference with PyTorch" - display-html: "SLIM++: on-the-fly query inference with PyTorch" + display: "SLIM++: query inference with PyTorch" + display-html: "SLIM++: query inference with PyTorch" display-row: "[7]" command: python -m pyserini.search.lucene --threads 16 --batch 128 --index msmarco-v1-passage-slimr-pp --topics $topics --encoder castorini/slimr-pp-msmarco-passage --encoded-corpus scipy-sparse-vectors.msmarco-v1-passage-slimr-pp --output $output --output-format msmarco --hits 1000 --impact --min-idf 3 topics: @@ -677,11 +677,11 @@ conditions: - MAP: 0.4906 nDCG@10: 0.7021 R@1K: 0.8551 - - name: Aggretriever-Distilbert-otf - display: "Aggretriever-DistilBERT: otf with PyTorch" - display-html: "Aggretriever-DistilBERT: on-the-fly query inference with PyTorch" + - name: aggretriever-distilbert-pytorch + display: "Aggretriever-DistilBERT: query inference with PyTorch" + display-html: "Aggretriever-DistilBERT: query inference with PyTorch" display-row: "[8]" - command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage.aggretriever-distilbert --topics $topics --encoder castorini/aggretriever-distilbert --output $output + command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.aggretriever-distilbert --topics $topics --encoder castorini/aggretriever-distilbert --output $output topics: - topic_key: msmarco-passage-dev-subset eval_key: msmarco-passage-dev-subset @@ -700,11 +700,11 @@ conditions: - MAP: 0.4329 nDCG@10: 0.6726 R@1K: 0.8351 - - name: Aggretriever-coCondenser-otf - display: "Aggretriever-coCondenser: otf with PyTorch" - display-html: "Aggretriever-coCondenser: on-the-fly query inference with PyTorch" + - name: aggretriever-cocondenser-pytorch + display: "Aggretriever-coCondenser: query inference with PyTorch" + display-html: "Aggretriever-coCondenser: query inference with PyTorch" display-row: "[8]" - command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-passage.aggretriever-cocondenser --topics $topics --encoder castorini/aggretriever-cocondenser --output $output + command: python -m pyserini.search.faiss --threads 16 --batch-size 512 --index msmarco-v1-passage.aggretriever-cocondenser --topics $topics --encoder castorini/aggretriever-cocondenser --output $output topics: - topic_key: msmarco-passage-dev-subset eval_key: msmarco-passage-dev-subset diff --git a/pyserini/2cr/msmarco-v2-doc.yaml b/pyserini/2cr/msmarco-v2-doc.yaml index 593cfbb80..6287f8a3b 100644 --- a/pyserini/2cr/msmarco-v2-doc.yaml +++ b/pyserini/2cr/msmarco-v2-doc.yaml @@ -216,8 +216,8 @@ conditions: R@100: 0.3563 R@1K: 0.6787 - name: unicoil-noexp-otf - display: "uniCOIL (noexp): otf" - display-html: "uniCOIL (noexp): on-the-fly query inference" + display: "uniCOIL (noexp): query inference with PyTorch" + display-html: "uniCOIL (noexp): query inference with PyTorch" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented-unicoil-noexp-0shot --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage topics: - topic_key: msmarco-v2-doc-dev @@ -263,8 +263,8 @@ conditions: R@100: 0.3700 R@1K: 0.7069 - name: unicoil-otf - display: "uniCOIL (w/ doc2query-T5): otf" - display-html: "uniCOIL (w/ doc2query-T5): on-the-fly query inference" + display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch" + display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-doc-segmented-unicoil-0shot --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage topics: - topic_key: msmarco-v2-doc-dev diff --git a/pyserini/2cr/msmarco-v2-passage.yaml b/pyserini/2cr/msmarco-v2-passage.yaml index bbf87680b..06383d365 100644 --- a/pyserini/2cr/msmarco-v2-passage.yaml +++ b/pyserini/2cr/msmarco-v2-passage.yaml @@ -216,8 +216,8 @@ conditions: R@100: 0.4731 R@1K: 0.7551 - name: unicoil-otf - display: "uniCOIL (w/ doc2query-T5): otf" - display-html: "uniCOIL (w/ doc2query-T5): on-the-fly query inference" + display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch" + display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-unicoil-0shot --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --hits 1000 --impact topics: - topic_key: msmarco-v2-passage-dev @@ -263,8 +263,8 @@ conditions: R@100: 0.4246 R@1K: 0.6897 - name: unicoil-noexp-otf - display: "uniCOIL (noexp): otf" - display-html: "uniCOIL (noexp): on-the-fly query inference" + display: "uniCOIL (noexp): query inference with PyTorch" + display-html: "uniCOIL (noexp): query inference with PyTorch" command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-unicoil-noexp-0shot --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --hits 1000 --impact topics: - topic_key: msmarco-v2-passage-dev diff --git a/pyserini/2cr/msmarco.py b/pyserini/2cr/msmarco.py index df0d92ba4..acc0e5e31 100644 --- a/pyserini/2cr/msmarco.py +++ b/pyserini/2cr/msmarco.py @@ -30,6 +30,7 @@ # The models: the rows of the results table will be ordered this way. models = { + # MS MARCO v1 passage 'msmarco-v1-passage': ['bm25-default', 'bm25-rm3-default', @@ -48,34 +49,36 @@ 'bm25-rocchio-d2q-t5-tuned', '', 'unicoil', - 'unicoil-otf', - 'unicoil-otf-onnx', + 'unicoil-pytorch', + 'unicoil-onnx', 'unicoil-noexp', - 'unicoil-noexp-otf', - 'unicoil-noexp-otf-onnx', + 'unicoil-noexp-pytorch', + 'unicoil-noexp-onnx', '', - 'splade-pp-ed-otf-onnx', - 'splade-pp-sd-otf-onnx', + 'splade-pp-ed-onnx', + 'splade-pp-sd-onnx', '', 'ance', - 'ance-otf', + 'ance-pytorch', '', 'distilbert-kd', - 'distilbert-kd-otf', + 'distilbert-kd-pytorch', 'distilbert-kd-tasb', - 'distilbert-kd-tasb-otf', + 'distilbert-kd-tasb-pytorch', '', 'tct_colbert-v2-hnp', - 'tct_colbert-v2-hnp-otf', + 'tct_colbert-v2-hnp-pytorch', '', 'slimr', 'slimr-pp', '', - 'Aggretriever-Distilbert-otf', - 'Aggretriever-coCondenser-otf', + 'aggretriever-distilbert-pytorch', + 'aggretriever-cocondenser-pytorch', '', 'openai-ada2', 'openai-ada2-hyde'], + + # MS MARCO v1 doc 'msmarco-v1-doc': ['bm25-doc-default', 'bm25-doc-segmented-default', @@ -102,10 +105,12 @@ 'bm25-rm3-d2q-t5-doc-segmented-tuned', '', 'unicoil-noexp', - 'unicoil', + 'unicoil-noexp-pytorch', '', - 'unicoil-noexp-otf', - 'unicoil-otf'], + 'unicoil', + 'unicoil-pytorch'], + + # MS MARCO v2 passage 'msmarco-v2-passage': ['bm25-default', 'bm25-augmented-default', @@ -122,6 +127,8 @@ '', 'unicoil-noexp-otf', 'unicoil-otf'], + + # MS MARCO v2 doc 'msmarco-v2-doc': ['bm25-doc-default', 'bm25-doc-segmented-default', @@ -479,7 +486,7 @@ def run_conditions(args): result_str = ok_str # Flaky tests elif args.collection == 'msmarco-v1-passage' \ - and topic_key == 'msmarco-passage-dev-subset' and name == 'ance-otf' \ + and topic_key == 'msmarco-passage-dev-subset' and name == 'ance-pytorch' \ and metric == 'MRR@10' and abs(score-float(expected[metric])) <= 0.0001: result_str = okish_str else: diff --git a/pyserini/prebuilt_index_info.py b/pyserini/prebuilt_index_info.py index 709b91b1c..5333779c4 100644 --- a/pyserini/prebuilt_index_info.py +++ b/pyserini/prebuilt_index_info.py @@ -3053,26 +3053,26 @@ FAISS_INDEX_INFO_MSMARCO = { # Aggretriever indexes - "msmarco-passage.aggretriever-cocondenser": { + "msmarco-v1-passage.aggretriever-cocondenser": { "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by aggretriever-cocondenser encoder.", - "filename": "faiss.msmarco-passage.aggretriever-cocondenser.20230407.f627ef.tar.gz", + "filename": "faiss.msmarco-v1-passage.aggretriever-cocondenser.20230407.f627ef.tar.gz", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-passage.aggretriever-cocondenser.20230407.f627ef.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.aggretriever-cocondenser.20230407.f627ef.tar.gz" ], - "md5": "7d5f33b1b350f6cac6a02f3d1c4670ca", - "size compressed (bytes)": 26053474787, + "md5": "58da608d5b31b28001b3aa1cf33479f6", + "size compressed (bytes)": 26053474943, "documents": 8841823, "downloaded": False, "texts": "msmarco-v1-passage" }, - "msmarco-passage.aggretriever-distilbert": { + "msmarco-v1-passage.aggretriever-distilbert": { "description": "Faiss FlatIP index of the MS MARCO passage corpus encoded by aggretriever-distilbert encoder.", - "filename": "faiss.msmarco-passage.aggretriever-distilbert.20230407.f627ef.tar.gz", + "filename": "faiss.msmarco-v1-passage.aggretriever-distilbert.20230407.f627ef.tar.gz", "urls": [ - "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-passage.aggretriever-distilbert.20230407.f627ef.tar.gz" + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.msmarco-v1-passage.aggretriever-distilbert.20230407.f627ef.tar.gz" ], - "md5": "e9c48c36d1c2a7b3da0ab39f58e10ffc", - "size compressed (bytes)": 25963140897, + "md5": "ed1492be0ce7539aacd5db5028404989", + "size compressed (bytes)": 25963140666, "documents": 8841823, "downloaded": False, "texts": "msmarco-v1-passage" diff --git a/tests/test_prebuilt_index.py b/tests/test_prebuilt_index.py index 01add7353..9d1d634c7 100644 --- a/tests/test_prebuilt_index.py +++ b/tests/test_prebuilt_index.py @@ -145,7 +145,7 @@ def test_faiss_msmarco(self): for url in FAISS_INDEX_INFO[key]['urls']: urls.append(url) - self.assertEqual(cnt, 13) + self.assertEqual(cnt, 15) self._test_urls(urls) def test_faiss_wikipedia(self):