diff --git a/docs/2cr/beir.html b/docs/2cr/beir.html index 07b374777..8c482e81c 100644 --- a/docs/2cr/beir.html +++ b/docs/2cr/beir.html @@ -149,6 +149,7 @@

BEIR

BM25 Flat BM25 Multifield SPLADE + Contriever @@ -161,6 +162,8 @@

BEIR

nDCG@10 R@100 + nDCG@10 + R@100 @@ -177,10 +180,12 @@

BEIR

0.7109 0.1308 + 0.2732 + 0.0368 - +
@@ -194,6 +199,9 @@

BEIR

+ @@ -285,6 +293,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.trec-covid.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-trec-covid.contriever \
+  --topics beir-v1.0.0-trec-covid-test \
+  --output run.beir-contriever.trec-covid.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-trec-covid-test \
+  run.beir-contriever.trec-covid.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-trec-covid-test \
+  run.beir-contriever.trec-covid.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-trec-covid-test \
+  run.beir-contriever.trec-covid.txt
+
+
@@ -303,10 +339,12 @@

BEIR

0.5035 0.7422 + 0.3016 + 0.5412 - +
@@ -320,6 +358,9 @@

BEIR

+ @@ -411,6 +452,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.bioasq.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-bioasq.contriever \
+  --topics beir-v1.0.0-bioasq-test \
+  --output run.beir-contriever.bioasq.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-bioasq-test \
+  run.beir-contriever.bioasq.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-bioasq-test \
+  run.beir-contriever.bioasq.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-bioasq-test \
+  run.beir-contriever.bioasq.txt
+
+
@@ -429,10 +498,12 @@

BEIR

0.3454 0.2891 + 0.3173 + 0.2943 - +
@@ -446,6 +517,9 @@

BEIR

+ @@ -537,6 +611,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.nfcorpus.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-nfcorpus.contriever \
+  --topics beir-v1.0.0-nfcorpus-test \
+  --output run.beir-contriever.nfcorpus.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-nfcorpus-test \
+  run.beir-contriever.nfcorpus.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-nfcorpus-test \
+  run.beir-contriever.nfcorpus.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-nfcorpus-test \
+  run.beir-contriever.nfcorpus.txt
+
+
@@ -555,10 +657,12 @@

BEIR

0.5442 0.9285 + 0.2536 + 0.7712 - +
@@ -572,6 +676,9 @@

BEIR

+ @@ -663,6 +770,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.nq.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-nq.contriever \
+  --topics beir-v1.0.0-nq-test \
+  --output run.beir-contriever.nq.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-nq-test \
+  run.beir-contriever.nq.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-nq-test \
+  run.beir-contriever.nq.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-nq-test \
+  run.beir-contriever.nq.txt
+
+
@@ -681,10 +816,12 @@

BEIR

0.6860 0.8144 + 0.4807 + 0.7046 - +
@@ -698,6 +835,9 @@

BEIR

+ @@ -789,6 +929,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.hotpotqa.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-hotpotqa.contriever \
+  --topics beir-v1.0.0-hotpotqa-test \
+  --output run.beir-contriever.hotpotqa.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-hotpotqa-test \
+  run.beir-contriever.hotpotqa.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-hotpotqa-test \
+  run.beir-contriever.hotpotqa.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-hotpotqa-test \
+  run.beir-contriever.hotpotqa.txt
+
+
@@ -807,10 +975,12 @@

BEIR

0.3514 0.6298 + 0.2449 + 0.5619 - +
@@ -824,6 +994,9 @@

BEIR

+ @@ -915,6 +1088,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.fiqa.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-fiqa.contriever \
+  --topics beir-v1.0.0-fiqa-test \
+  --output run.beir-contriever.fiqa.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-fiqa-test \
+  run.beir-contriever.fiqa.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-fiqa-test \
+  run.beir-contriever.fiqa.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-fiqa-test \
+  run.beir-contriever.fiqa.txt
+
+
@@ -933,10 +1134,12 @@

BEIR

0.2957 0.3311 + 0.2338 + 0.2568 - +
@@ -950,6 +1153,9 @@

BEIR

+ @@ -1041,6 +1247,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.signal1m.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-signal1m.contriever \
+  --topics beir-v1.0.0-signal1m-test \
+  --output run.beir-contriever.signal1m.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-signal1m-test \
+  run.beir-contriever.signal1m.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-signal1m-test \
+  run.beir-contriever.signal1m.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-signal1m-test \
+  run.beir-contriever.signal1m.txt
+
+
@@ -1059,10 +1293,12 @@

BEIR

0.3936 0.4323 + 0.3484 + 0.4234 - +
@@ -1076,6 +1312,9 @@

BEIR

+ @@ -1167,6 +1406,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.trec-news.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-trec-news.contriever \
+  --topics beir-v1.0.0-trec-news-test \
+  --output run.beir-contriever.trec-news.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-trec-news-test \
+  run.beir-contriever.trec-news.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-trec-news-test \
+  run.beir-contriever.trec-news.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-trec-news-test \
+  run.beir-contriever.trec-news.txt
+
+
@@ -1185,10 +1452,12 @@

BEIR

0.4581 0.3773 + 0.3155 + 0.2757 - +
@@ -1202,6 +1471,9 @@

BEIR

+ @@ -1293,6 +1565,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.robust04.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-robust04.contriever \
+  --topics beir-v1.0.0-robust04-test \
+  --output run.beir-contriever.robust04.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-robust04-test \
+  run.beir-contriever.robust04.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-robust04-test \
+  run.beir-contriever.robust04.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-robust04-test \
+  run.beir-contriever.robust04.txt
+
+
@@ -1311,10 +1611,12 @@

BEIR

0.5210 0.9822 + 0.3791 + 0.9011 - +
@@ -1328,6 +1630,9 @@

BEIR

+ @@ -1419,6 +1724,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.arguana.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-arguana.contriever \
+  --topics beir-v1.0.0-arguana-test \
+  --output run.beir-contriever.arguana.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-arguana-test \
+  run.beir-contriever.arguana.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-arguana-test \
+  run.beir-contriever.arguana.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-arguana-test \
+  run.beir-contriever.arguana.txt
+
+
@@ -1437,10 +1770,12 @@

BEIR

0.2435 0.4723 + 0.1668 + 0.3736 - +
@@ -1454,6 +1789,9 @@

BEIR

+ @@ -1545,6 +1883,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.webis-touche2020.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-webis-touche2020.contriever \
+  --topics beir-v1.0.0-webis-touche2020-test \
+  --output run.beir-contriever.webis-touche2020.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-webis-touche2020-test \
+  run.beir-contriever.webis-touche2020.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-webis-touche2020-test \
+  run.beir-contriever.webis-touche2020.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-webis-touche2020-test \
+  run.beir-contriever.webis-touche2020.txt
+
+
@@ -1563,10 +1929,12 @@

BEIR

0.3954 0.7405 + 0.3771 + 0.7436 - +
@@ -1580,6 +1948,9 @@

BEIR

+ @@ -1671,6 +2042,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-android.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-android.contriever \
+  --topics beir-v1.0.0-cqadupstack-android-test \
+  --output run.beir-contriever.cqadupstack-android.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-android-test \
+  run.beir-contriever.cqadupstack-android.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-android-test \
+  run.beir-contriever.cqadupstack-android.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-android-test \
+  run.beir-contriever.cqadupstack-android.txt
+
+
@@ -1689,10 +2088,12 @@

BEIR

0.4026 0.6768 + 0.3571 + 0.6442 - +
@@ -1706,6 +2107,9 @@

BEIR

+ @@ -1797,6 +2201,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-english.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-english.contriever \
+  --topics beir-v1.0.0-cqadupstack-english-test \
+  --output run.beir-contriever.cqadupstack-english.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-english-test \
+  run.beir-contriever.cqadupstack-english.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-english-test \
+  run.beir-contriever.cqadupstack-english.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-english-test \
+  run.beir-contriever.cqadupstack-english.txt
+
+
@@ -1815,10 +2247,12 @@

BEIR

0.5061 0.8138 + 0.4597 + 0.8092 - +
@@ -1832,6 +2266,9 @@

BEIR

+ @@ -1923,6 +2360,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-gaming.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-gaming.contriever \
+  --topics beir-v1.0.0-cqadupstack-gaming-test \
+  --output run.beir-contriever.cqadupstack-gaming.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-gaming-test \
+  run.beir-contriever.cqadupstack-gaming.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-gaming-test \
+  run.beir-contriever.cqadupstack-gaming.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-gaming-test \
+  run.beir-contriever.cqadupstack-gaming.txt
+
+
@@ -1941,10 +2406,12 @@

BEIR

0.3223 0.6419 + 0.2411 + 0.5792 - +
@@ -1958,6 +2425,9 @@

BEIR

+ @@ -2049,6 +2519,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-gis.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-gis.contriever \
+  --topics beir-v1.0.0-cqadupstack-gis-test \
+  --output run.beir-contriever.cqadupstack-gis.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-gis-test \
+  run.beir-contriever.cqadupstack-gis.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-gis-test \
+  run.beir-contriever.cqadupstack-gis.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-gis-test \
+  run.beir-contriever.cqadupstack-gis.txt
+
+
@@ -2067,10 +2565,12 @@

BEIR

0.2423 0.5732 + 0.1841 + 0.5127 - +
@@ -2084,6 +2584,9 @@

BEIR

+ @@ -2175,6 +2678,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-mathematica.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-mathematica.contriever \
+  --topics beir-v1.0.0-cqadupstack-mathematica-test \
+  --output run.beir-contriever.cqadupstack-mathematica.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-mathematica-test \
+  run.beir-contriever.cqadupstack-mathematica.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-mathematica-test \
+  run.beir-contriever.cqadupstack-mathematica.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-mathematica-test \
+  run.beir-contriever.cqadupstack-mathematica.txt
+
+
@@ -2193,10 +2724,12 @@

BEIR

0.3668 0.7286 + 0.3430 + 0.7013 - +
@@ -2210,6 +2743,9 @@

BEIR

+ @@ -2301,6 +2837,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-physics.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-physics.contriever \
+  --topics beir-v1.0.0-cqadupstack-physics-test \
+  --output run.beir-contriever.cqadupstack-physics.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-physics-test \
+  run.beir-contriever.cqadupstack-physics.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-physics-test \
+  run.beir-contriever.cqadupstack-physics.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-physics-test \
+  run.beir-contriever.cqadupstack-physics.txt
+
+
@@ -2319,10 +2883,12 @@

BEIR

0.3412 0.6653 + 0.3029 + 0.6402 - +
@@ -2336,6 +2902,9 @@

BEIR

+ @@ -2427,6 +2996,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-programmers.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-programmers.contriever \
+  --topics beir-v1.0.0-cqadupstack-programmers-test \
+  --output run.beir-contriever.cqadupstack-programmers.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-programmers-test \
+  run.beir-contriever.cqadupstack-programmers.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-programmers-test \
+  run.beir-contriever.cqadupstack-programmers.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-programmers-test \
+  run.beir-contriever.cqadupstack-programmers.txt
+
+
@@ -2445,10 +3042,12 @@

BEIR

0.3142 0.5889 + 0.2483 + 0.5269 - +
@@ -2462,6 +3061,9 @@

BEIR

+ @@ -2553,6 +3155,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-stats.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-stats.contriever \
+  --topics beir-v1.0.0-cqadupstack-stats-test \
+  --output run.beir-contriever.cqadupstack-stats.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-stats-test \
+  run.beir-contriever.cqadupstack-stats.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-stats-test \
+  run.beir-contriever.cqadupstack-stats.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-stats-test \
+  run.beir-contriever.cqadupstack-stats.txt
+
+
@@ -2571,10 +3201,12 @@

BEIR

0.2575 0.5231 + 0.1540 + 0.4333 - +
@@ -2588,6 +3220,9 @@

BEIR

+ @@ -2679,6 +3314,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-tex.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-tex.contriever \
+  --topics beir-v1.0.0-cqadupstack-tex-test \
+  --output run.beir-contriever.cqadupstack-tex.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-tex-test \
+  run.beir-contriever.cqadupstack-tex.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-tex-test \
+  run.beir-contriever.cqadupstack-tex.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-tex-test \
+  run.beir-contriever.cqadupstack-tex.txt
+
+
@@ -2697,10 +3360,12 @@

BEIR

0.3292 0.6192 + 0.2636 + 0.5879 - +
@@ -2714,6 +3379,9 @@

BEIR

+ @@ -2805,6 +3473,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-unix.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-unix.contriever \
+  --topics beir-v1.0.0-cqadupstack-unix-test \
+  --output run.beir-contriever.cqadupstack-unix.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-unix-test \
+  run.beir-contriever.cqadupstack-unix.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-unix-test \
+  run.beir-contriever.cqadupstack-unix.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-unix-test \
+  run.beir-contriever.cqadupstack-unix.txt
+
+
@@ -2823,10 +3519,12 @@

BEIR

0.3343 0.6404 + 0.2878 + 0.6485 - +
@@ -2840,6 +3538,9 @@

BEIR

+ @@ -2931,6 +3632,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-webmasters.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-webmasters.contriever \
+  --topics beir-v1.0.0-cqadupstack-webmasters-test \
+  --output run.beir-contriever.cqadupstack-webmasters.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-webmasters-test \
+  run.beir-contriever.cqadupstack-webmasters.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-webmasters-test \
+  run.beir-contriever.cqadupstack-webmasters.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-webmasters-test \
+  run.beir-contriever.cqadupstack-webmasters.txt
+
+
@@ -2949,10 +3678,12 @@

BEIR

0.2839 0.5974 + 0.1914 + 0.5364 - +
@@ -2966,6 +3697,9 @@

BEIR

+ @@ -3057,6 +3791,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.cqadupstack-wordpress.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-cqadupstack-wordpress.contriever \
+  --topics beir-v1.0.0-cqadupstack-wordpress-test \
+  --output run.beir-contriever.cqadupstack-wordpress.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-cqadupstack-wordpress-test \
+  run.beir-contriever.cqadupstack-wordpress.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-cqadupstack-wordpress-test \
+  run.beir-contriever.cqadupstack-wordpress.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-cqadupstack-wordpress-test \
+  run.beir-contriever.cqadupstack-wordpress.txt
+
+
@@ -3075,10 +3837,12 @@

BEIR

0.8136 0.9817 + 0.8349 + 0.9871 - +
@@ -3092,6 +3856,9 @@

BEIR

+ @@ -3183,6 +3950,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.quora.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-quora.contriever \
+  --topics beir-v1.0.0-quora-test \
+  --output run.beir-contriever.quora.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-quora-test \
+  run.beir-contriever.quora.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-quora-test \
+  run.beir-contriever.quora.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-quora-test \
+  run.beir-contriever.quora.txt
+
+
@@ -3201,10 +3996,12 @@

BEIR

0.4416 0.5636 + 0.2916 + 0.4529 - +
@@ -3218,6 +4015,9 @@

BEIR

+ @@ -3309,6 +4109,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.dbpedia-entity.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-dbpedia-entity.contriever \
+  --topics beir-v1.0.0-dbpedia-entity-test \
+  --output run.beir-contriever.dbpedia-entity.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-dbpedia-entity-test \
+  run.beir-contriever.dbpedia-entity.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-dbpedia-entity-test \
+  run.beir-contriever.dbpedia-entity.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-dbpedia-entity-test \
+  run.beir-contriever.dbpedia-entity.txt
+
+
@@ -3327,10 +4155,12 @@

BEIR

0.1590 0.3671 + 0.1491 + 0.3601 - +
@@ -3344,6 +4174,9 @@

BEIR

+ @@ -3435,6 +4268,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.scidocs.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-scidocs.contriever \
+  --topics beir-v1.0.0-scidocs-test \
+  --output run.beir-contriever.scidocs.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-scidocs-test \
+  run.beir-contriever.scidocs.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-scidocs-test \
+  run.beir-contriever.scidocs.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-scidocs-test \
+  run.beir-contriever.scidocs.txt
+
+
@@ -3453,10 +4314,12 @@

BEIR

0.7962 0.9550 + 0.6821 + 0.9356 - +
@@ -3470,6 +4333,9 @@

BEIR

+ @@ -3561,6 +4427,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.fever.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-fever.contriever \
+  --topics beir-v1.0.0-fever-test \
+  --output run.beir-contriever.fever.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-fever-test \
+  run.beir-contriever.fever.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-fever-test \
+  run.beir-contriever.fever.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-fever-test \
+  run.beir-contriever.fever.txt
+
+
@@ -3579,10 +4473,12 @@

BEIR

0.2276 0.5140 + 0.1550 + 0.4422 - +
@@ -3596,6 +4492,9 @@

BEIR

+ @@ -3687,6 +4586,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.climate-fever.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-climate-fever.contriever \
+  --topics beir-v1.0.0-climate-fever-test \
+  --output run.beir-contriever.climate-fever.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-climate-fever-test \
+  run.beir-contriever.climate-fever.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-climate-fever-test \
+  run.beir-contriever.climate-fever.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-climate-fever-test \
+  run.beir-contriever.climate-fever.txt
+
+
@@ -3705,10 +4632,12 @@

BEIR

0.6992 0.9270 + 0.6493 + 0.9260 - +
@@ -3722,6 +4651,9 @@

BEIR

+ @@ -3813,6 +4745,34 @@

BEIR

run.beir-splade-distil-cocodenser-medium.scifact.txt +
+
+Command to generate run: + +
+
python -m pyserini.search.faiss \
+ --encoder-class contriever --encoder facebook/contriever \
+  --index beir-v1.0.0-scifact.contriever \
+  --topics beir-v1.0.0-scifact-test \
+  --output run.beir-contriever.scifact.txt --batch 128 --threads 16 \
+  --hits 1000 --remove-query
+
+Evaluation commands: + +
+
python -m pyserini.eval.trec_eval \
+  -c -m ndcg_cut.10 beir-v1.0.0-scifact-test \
+  run.beir-contriever.scifact.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.100 beir-v1.0.0-scifact-test \
+  run.beir-contriever.scifact.txt
+
+python -m pyserini.eval.trec_eval \
+  -c -m recall.1000 beir-v1.0.0-scifact-test \
+  run.beir-contriever.scifact.txt
+
+
diff --git a/pyserini/prebuilt_index_info.py b/pyserini/prebuilt_index_info.py index 5745d3c2b..4c770f156 100644 --- a/pyserini/prebuilt_index_info.py +++ b/pyserini/prebuilt_index_info.py @@ -1887,6 +1887,385 @@ "downloaded": False }, + # BEIR (v1.0.0) contriever indexes + "beir-v1.0.0-trec-covid.contriever": { + "description": "Faiss index for BEIR v1.0.0 (TREC-COVID) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-trec-covid.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-trec-covid.contriever.20230124.tar.gz" + ], + "md5": "5b5baf557979e30e943180627fe31340", + "size compressed (bytes)": 488100317, + "documents": 171332, + "downloaded": False, + "texts": "beir-v1.0.0-trec-covid.flat", + }, + "beir-v1.0.0-bioasq.contriever": { + "description": "Faiss index for BEIR v1.0.0 (BioASQ) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-bioasq.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-bioasq.contriever.20230124.tar.gz" + ], + "md5": "c0cbca535d38c1f1f78ff1bd6d91af5d", + "size compressed (bytes)": 42417202460, + "documents": 14914603, + "downloaded": False, + "texts": "beir-v1.0.0-bioasq.flat", + }, + "beir-v1.0.0-nfcorpus.contriever": { + "description": "Faiss index for BEIR v1.0.0 (NFCorpus) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-nfcorpus.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-nfcorpus.contriever.20230124.tar.gz" + ], + "md5": "5eff0107f7953ebe7658c3a6400e7027", + "size compressed (bytes)": 10322409, + "documents": 3633, + "downloaded": False, + "texts": "beir-v1.0.0-nfcorpus.flat", + }, + "beir-v1.0.0-nq.contriever": { + "description": "Faiss index for BEIR v1.0.0 (NQ) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-nq.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-nq.contriever.20230124.tar.gz" + ], + "md5": "e1825fe0ce5c8000b63b1499374adb0e", + "size compressed (bytes)": 7617697503, + "documents": 2681468, + "downloaded": False, + "texts": "beir-v1.0.0-nq.flat", + }, + "beir-v1.0.0-hotpotqa.contriever": { + "description": "Faiss index for BEIR v1.0.0 (HotpotQA) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-hotpotqa.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-hotpotqa.contriever.20230124.tar.gz" + ], + "md5": "51445960e00a18264ae3947b3af2bc80", + "size compressed (bytes)": 14874721901, + "documents": 5233329, + "downloaded": False, + "texts": "beir-v1.0.0-hotpotqa.flat", + }, + "beir-v1.0.0-fiqa.contriever": { + "description": "Faiss index for BEIR v1.0.0 (FiQA-2018) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-fiqa.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-fiqa.contriever.20230124.tar.gz" + ], + "md5": "a03cc30459b1a1928b93ad1aa51a7849", + "size compressed (bytes)": 164024764, + "documents": 57638, + "downloaded": False, + "texts": "beir-v1.0.0-fiqa.flat", + }, + "beir-v1.0.0-signal1m.contriever": { + "description": "Faiss index for BEIR v1.0.0 (Signal-1M) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-signal1m.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-signal1m.contriever.20230124.tar.gz" + ], + "md5": "19e3e324b7b87e55fb9f6b6b1e72c464", + "size compressed (bytes)": 8142533760, + "documents": 2866316, + "downloaded": False, + "texts": "beir-v1.0.0-signal1m.flat", + }, + "beir-v1.0.0-trec-news.contriever": { + "description": "Faiss index for BEIR v1.0.0 (TREC-NEWS) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-trec-news.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-trec-news.contriever.20230124.tar.gz" + ], + "md5": "20db6299b57b3e78ea2f8b7a2b649770", + "size compressed (bytes)": 1629958623, + "documents": 594977, + "downloaded": False, + "texts": "beir-v1.0.0-trec-news.flat", + }, + "beir-v1.0.0-robust04.contriever": { + "description": "Faiss index for BEIR v1.0.0 (Robust04) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-robust04.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-robust04.contriever.20230124.tar.gz" + ], + "md5": "81c730b68e066baf18d5b46918b8c830", + "size compressed (bytes)": 1501110333, + "documents": 528155, + "downloaded": False, + "texts": "beir-v1.0.0-robust04.flat", + }, + "beir-v1.0.0-arguana.contriever": { + "description": "Faiss index for BEIR v1.0.0 (ArguAna) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-arguana.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-arguana.contriever.20230124.tar.gz" + ], + "md5": "03f701916d49dd86b9c8989796d2dcc4", + "size compressed (bytes)": 24710561, + "documents": 8674, + "downloaded": False, + "texts": "beir-v1.0.0-arguana.flat", + }, + "beir-v1.0.0-webis-touche2020.contriever": { + "description": "Faiss index for BEIR v1.0.0 (Webis-Touche2020) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-webis-touche2020.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-webis-touche2020.contriever.20230124.tar.gz" + ], + "md5": "dfff9bc58521f09542f0affa3069f9a7", + "size compressed (bytes)": 1091320704, + "documents": 382545, + "downloaded": False, + "texts": "beir-v1.0.0-webis-touche2020.flat", + }, + "beir-v1.0.0-cqadupstack-android.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-android) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-android.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-android.contriever.20230124.tar.gz" + ], + "md5": "4f03c0238f0e8f77e6365b61108042ed", + "size compressed (bytes)": 65447231, + "documents": 22998, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-android.flat", + }, + "beir-v1.0.0-cqadupstack-english.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-english) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-english.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-english.contriever.20230124.tar.gz" + ], + "md5": "319e3cba8f5f5d5175aad92c99c4b0fd", + "size compressed (bytes)": 114460495, + "documents": 40221, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-english.flat", + }, + "beir-v1.0.0-cqadupstack-gaming.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-gaming) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-gaming.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-gaming.contriever.20230124.tar.gz" + ], + "md5": "049f2cb22adfb5803a5f7f762f578bce", + "size compressed (bytes)": 128906099, + "documents": 45301, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-gaming.flat", + }, + "beir-v1.0.0-cqadupstack-gis.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-gis) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-gis.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-gis.contriever.20230124.tar.gz" + ], + "md5": "13fdfa5a13634c10c1e7e6179bb4c376", + "size compressed (bytes)": 107128974, + "documents": 37637, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-gis.flat", + }, + "beir-v1.0.0-cqadupstack-mathematica.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-mathematica) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-mathematica.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-mathematica.contriever.20230124.tar.gz" + ], + "md5": "e4f756eede3ae5f9228d32096c1bd5b4", + "size compressed (bytes)": 47544559, + "documents": 16705, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-mathematica.flat", + }, + "beir-v1.0.0-cqadupstack-physics.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-physics) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-physics.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-physics.contriever.20230124.tar.gz" + ], + "md5": "b92ec0c233a1112d6f8782fb0f2bc9c1", + "size compressed (bytes)": 109048286, + "documents": 38316, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-physics.flat", + }, + "beir-v1.0.0-cqadupstack-programmers.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-programmers) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-programmers.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-programmers.contriever.20230124.tar.gz" + ], + "md5": "f180240f35e2a3c27d39361a20533205", + "size compressed (bytes)": 91583135, + "documents": 32176, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-programmers.flat", + }, + "beir-v1.0.0-cqadupstack-stats.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-stats) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-stats.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-stats.contriever.20230124.tar.gz" + ], + "md5": "64737df62b4e03b93356ba234cefe0e6", + "size compressed (bytes)": 120288620, + "documents": 42269, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-stats.flat", + }, + "beir-v1.0.0-cqadupstack-tex.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-tex) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-tex.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-tex.contriever.20230124.tar.gz" + ], + "md5": "ef087faff49e5bae0799e8576e387c0d", + "size compressed (bytes)": 194080724, + "documents": 68184, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-tex.flat", + }, + "beir-v1.0.0-cqadupstack-unix.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-unix) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-unix.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-unix.contriever.20230124.tar.gz" + ], + "md5": "9279884bfc3a14c2896276b679a58dbf", + "size compressed (bytes)": 134860159, + "documents": 47382, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-unix.flat", + }, + "beir-v1.0.0-cqadupstack-webmasters.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-webmasters) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-webmasters.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-webmasters.contriever.20230124.tar.gz" + ], + "md5": "f1a46fc6f6586c716d2a6239753c9573", + "size compressed (bytes)": 49531545, + "documents": 17405, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-webmasters.flat", + }, + "beir-v1.0.0-cqadupstack-wordpress.contriever": { + "description": "Faiss index for BEIR v1.0.0 (CQADupStack-wordpress) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-cqadupstack-wordpress.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-cqadupstack-wordpress.contriever.20230124.tar.gz" + ], + "md5": "27480c7a4c8d437af30618bf98b10969", + "size compressed (bytes)": 138348184, + "documents": 48605, + "downloaded": False, + "texts": "beir-v1.0.0-cqadupstack-wordpress.flat", + }, + "beir-v1.0.0-quora.contriever": { + "description": "Faiss index for BEIR v1.0.0 (Quora) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-quora.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-quora.contriever.20230124.tar.gz" + ], + "md5": "4876145908b7af946593df6dbb8af600", + "size compressed (bytes)": 1485866217, + "documents": 522931, + "downloaded": False, + "texts": "beir-v1.0.0-quora.flat", + }, + "beir-v1.0.0-dbpedia-entity.contriever": { + "description": "Faiss index for BEIR v1.0.0 (DBPedia) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-dbpedia-entity.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-dbpedia-entity.contriever.20230124.tar.gz" + ], + "md5": "ee88a23de31d3faf403673c08ea0c844", + "size compressed (bytes)": 13214316305, + "documents": 4635922, + "downloaded": False, + "texts": "beir-v1.0.0-dbpedia-entity.flat", + }, + "beir-v1.0.0-scidocs.contriever": { + "description": "Faiss index for BEIR v1.0.0 (SCIDOCS) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-scidocs.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-scidocs.contriever.20230124.tar.gz" + ], + "md5": "dd1555b714c482a22cbb74d8c72599c9", + "size compressed (bytes)": 73532556, + "documents": 25657, + "downloaded": False, + "texts": "beir-v1.0.0-scidocs.flat", + }, + "beir-v1.0.0-fever.contriever": { + "description": "Faiss index for BEIR v1.0.0 (FEVER) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-fever.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-fever.contriever.20230124.tar.gz" + ], + "md5": "d5b738dc38e56857a987bdb1eb4ce5c1", + "size compressed (bytes)": 15437918827, + "documents": 5416568, + "downloaded": False, + "texts": "beir-v1.0.0-fever.flat", + }, + "beir-v1.0.0-climate-fever.contriever": { + "description": "Faiss index for BEIR v1.0.0 (Climate-FEVER) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-climate-fever.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-climate-fever.contriever.20230124.tar.gz" + ], + "md5": "1e169cf6a8baaa4909f6823e3c23a80f", + "size compressed (bytes)": 15437988868, + "documents": 5416593, + "downloaded": False, + "texts": "beir-v1.0.0-climate-fever.flat", + }, + "beir-v1.0.0-scifact.contriever": { + "description": "Faiss index for BEIR v1.0.0 (SciFact) corpus encoded by Contriever encoder.", + "filename": "faiss.beir-v1.0.0-scifact.contriever.20230124.tar.gz", + "readme": "faiss.beir-v1.0.0.contriever.20230124.README.md", + "urls": [ + "https://rgw.cs.uwaterloo.ca/pyserini/indexes/faiss.beir-v1.0.0-scifact.contriever.20230124.tar.gz" + ], + "md5": "61eb253aa08c9c97fa2f82ef2a96ca7b", + "size compressed (bytes)": 14753553, + "documents": 5183, + "downloaded": False, + "texts": "beir-v1.0.0-scifact.flat", + }, + "hc4-v1.0-fa": { "description": "Lucene index for HC4 v1.0 (Persian). (Lucene 9)", "filename": "lucene-index.hc4-v1.0-fa.20221025.c4a8d0.tar.gz", diff --git a/pyserini/resources/beir.yaml b/pyserini/resources/beir.yaml index 3247305d9..4ab41a8e1 100644 --- a/pyserini/resources/beir.yaml +++ b/pyserini/resources/beir.yaml @@ -443,3 +443,151 @@ conditions: - nDCG@10: 0.6992 R@100: 0.9270 R@1000: 0.9767 + - name: contriever + command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever --index beir-v1.0.0-${dataset}.contriever --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query + datasets: + - dataset: trec-covid + scores: + - nDCG@10: 0.2732 + R@100: 0.0368 + R@1000: 0.1675 + - dataset: bioasq + scores: + - nDCG@10: 0.3016 + R@100: 0.5412 + R@1000: 0.7396 + - dataset: nfcorpus + scores: + - nDCG@10: 0.3173 + R@100: 0.2943 + R@1000: 0.6232 + - dataset: nq + scores: + - nDCG@10: 0.2536 + R@100: 0.7712 + R@1000: 0.9286 + - dataset: hotpotqa + scores: + - nDCG@10: 0.4807 + R@100: 0.7046 + R@1000: 0.8294 + - dataset: fiqa + scores: + - nDCG@10: 0.2449 + R@100: 0.5619 + R@1000: 0.8215 + - dataset: signal1m + scores: + - nDCG@10: 0.2338 + R@100: 0.2568 + R@1000: 0.4757 + - dataset: trec-news + scores: + - nDCG@10: 0.3484 + R@100: 0.4234 + R@1000: 0.7389 + - dataset: robust04 + scores: + - nDCG@10: 0.3155 + R@100: 0.2757 + R@1000: 0.5097 + - dataset: arguana + scores: + - nDCG@10: 0.3791 + R@100: 0.9011 + R@1000: 0.9851 + - dataset: webis-touche2020 + scores: + - nDCG@10: 0.1668 + R@100: 0.3736 + R@1000: 0.7144 + - dataset: cqadupstack-android + scores: + - nDCG@10: 0.3771 + R@100: 0.7436 + R@1000: 0.9173 + - dataset: cqadupstack-english + scores: + - nDCG@10: 0.3571 + R@100: 0.6442 + R@1000: 0.8042 + - dataset: cqadupstack-gaming + scores: + - nDCG@10: 0.4597 + R@100: 0.8092 + R@1000: 0.9354 + - dataset: cqadupstack-gis + scores: + - nDCG@10: 0.2411 + R@100: 0.5792 + R@1000: 0.8018 + - dataset: cqadupstack-mathematica + scores: + - nDCG@10: 0.1841 + R@100: 0.5127 + R@1000: 0.7757 + - dataset: cqadupstack-physics + scores: + - nDCG@10: 0.3430 + R@100: 0.7013 + R@1000: 0.8980 + - dataset: cqadupstack-programmers + scores: + - nDCG@10: 0.3029 + R@100: 0.6402 + R@1000: 0.8434 + - dataset: cqadupstack-stats + scores: + - nDCG@10: 0.2483 + R@100: 0.5269 + R@1000: 0.7417 + - dataset: cqadupstack-tex + scores: + - nDCG@10: 0.1540 + R@100: 0.4333 + R@1000: 0.6870 + - dataset: cqadupstack-unix + scores: + - nDCG@10: 0.2636 + R@100: 0.5879 + R@1000: 0.8212 + - dataset: cqadupstack-webmasters + scores: + - nDCG@10: 0.2878 + R@100: 0.6485 + R@1000: 0.8800 + - dataset: cqadupstack-wordpress + scores: + - nDCG@10: 0.1914 + R@100: 0.5364 + R@1000: 0.7551 + - dataset: quora + scores: + - nDCG@10: 0.8349 + R@100: 0.9871 + R@1000: 0.9981 + - dataset: dbpedia-entity + scores: + - nDCG@10: 0.2916 + R@100: 0.4529 + R@1000: 0.7142 + - dataset: scidocs + scores: + - nDCG@10: 0.1491 + R@100: 0.3601 + R@1000: 0.6105 + - dataset: fever + scores: + - nDCG@10: 0.6821 + R@100: 0.9356 + R@1000: 0.9655 + - dataset: climate-fever + scores: + - nDCG@10: 0.1550 + R@100: 0.4422 + R@1000: 0.7232 + - dataset: scifact + scores: + - nDCG@10: 0.6493 + R@100: 0.9260 + R@1000: 0.9967 \ No newline at end of file diff --git a/scripts/beir/gather_beir_index_stats.py b/scripts/beir/gather_beir_index_stats.py index bb7f62413..3ea9fefcc 100644 --- a/scripts/beir/gather_beir_index_stats.py +++ b/scripts/beir/gather_beir_index_stats.py @@ -74,3 +74,23 @@ print(f' "unique_terms": {stats["unique_terms"]},') print(f' "downloaded": False') print(f' }},') + +# Stats for "contriever" indexes +for key in beir_keys: + index_reader = IndexReader(f'indexes/faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}') + stats = index_reader.stats() + md5 = compute_md5(f'indexes/faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.tar.gz') + size = os.path.getsize(f'indexes/faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.tar.gz') + print(f' "beir-v1.0.0-{key}.contriever": {{') + print(f' "description": "Faiss index for BEIR v1.0.0 ({beir_keys[key]}) corpus encoded by Contriever encoder.",') + print(f' "filename": "faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.tar.gz",') + print(f' "readme": "faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.README.md",') + print(f' "urls": [') + print(f' "https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/pyserini-indexes/faiss.beir-v1.0.0-{key}.contriever.{date}.{commitid}.tar.gz"') + print(f' ],') + print(f' "md5": "{md5}",') + print(f' "size compressed (bytes)": {size},') + print(f' "documents": {stats["documents"]},') + print(f' "downloaded": False,') + print(f' "texts": "beir-v1.0.0-{key}.flat"') + print(f' }},') diff --git a/scripts/beir/run_beir_baselines.py b/scripts/beir/run_beir_baselines.py index edbbab385..726569d25 100644 --- a/scripts/beir/run_beir_baselines.py +++ b/scripts/beir/run_beir_baselines.py @@ -92,3 +92,16 @@ os.system(cmd) cmd = f'python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 -m recall.100,1000 beir-v1.0.0-{key}-test runs/run.beir-v1.0.0-{key}-splade_distil_cocodenser_medium.trec' os.system(cmd) + +# Runs on Contriever index +for key in beir_keys: + cmd = f'python -m pyserini.search.faiss \ + --encoder-class contriever --encoder facebook/contriever \ + --index beir-v1.0.0-{key}.contriever \ + --topics beir-v1.0.0-{key}-test \ + --output runs/run.beir.contriever.{key}.txt \ + --batch 128 --threads 16 \ + --remove-query --hits 1000' + os.system(cmd) + cmd = f'python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 -m recall.100,1000 beir-v1.0.0-{key}-test runs/run.beir.contriever.{key}.txt' + os.system(cmd) diff --git a/scripts/repro_matrix/beir_html.template b/scripts/repro_matrix/beir_html.template index b89eef10b..4e265a209 100644 --- a/scripts/repro_matrix/beir_html.template +++ b/scripts/repro_matrix/beir_html.template @@ -149,6 +149,7 @@ pre[class*="prettyprint"] { BM25 Flat BM25 Multifield SPLADE + Contriever @@ -161,6 +162,8 @@ pre[class*="prettyprint"] { nDCG@10 R@100 + nDCG@10 + R@100 diff --git a/scripts/repro_matrix/beir_html_row.template b/scripts/repro_matrix/beir_html_row.template index 7f7bfafe8..0fa843cd7 100644 --- a/scripts/repro_matrix/beir_html_row.template +++ b/scripts/repro_matrix/beir_html_row.template @@ -10,10 +10,12 @@ $s5 $s6 +$s7 +$s8 - +
@@ -27,6 +29,9 @@ + @@ -70,6 +75,19 @@ Evaluation commands:
${eval_cmd3}
+
+
+Command to generate run: + +
+
$cmd4
+
+Evaluation commands: + +
+
${eval_cmd4}
+
+
diff --git a/scripts/repro_matrix/generate_html_beir.py b/scripts/repro_matrix/generate_html_beir.py old mode 100644 new mode 100755 index 2f05679db..8ae8fc581 --- a/scripts/repro_matrix/generate_html_beir.py +++ b/scripts/repro_matrix/generate_html_beir.py @@ -25,6 +25,7 @@ def format_run_command(raw): return raw.replace('--topics', '\\\n --topics')\ .replace('--index', '\\\n --index')\ + .replace('--encoder-class', '\\\n --encoder-class')\ .replace('--output ', '\\\n --output ')\ .replace('--output-format trec', '\\\n --output-format trec \\\n ') \ .replace('--hits ', '\\\n --hits ') @@ -84,12 +85,16 @@ def read_file(f): s4=f'{table[dataset]["multifield"]["R@100"]:8.4f}', s5=f'{table[dataset]["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}', s6=f'{table[dataset]["splade-distil-cocodenser-medium"]["R@100"]:8.4f}', + s7=f'{table[dataset]["contriever"]["nDCG@10"]:8.4f}', + s8=f'{table[dataset]["contriever"]["R@100"]:8.4f}', cmd1=commands[dataset]["flat"], cmd2=commands[dataset]["multifield"], cmd3=commands[dataset]["splade-distil-cocodenser-medium"], + cmd4=commands[dataset]["contriever"], eval_cmd1=eval_commands[dataset]["flat"].rstrip(), eval_cmd2=eval_commands[dataset]["multifield"].rstrip(), eval_cmd3=eval_commands[dataset]["splade-distil-cocodenser-medium"].rstrip(), + eval_cmd4=eval_commands[dataset]["contriever"].rstrip(), ) html_rows.append(s) diff --git a/scripts/repro_matrix/run_all_beir.py b/scripts/repro_matrix/run_all_beir.py index 146ee5d9e..79b27e0a5 100644 --- a/scripts/repro_matrix/run_all_beir.py +++ b/scripts/repro_matrix/run_all_beir.py @@ -96,18 +96,20 @@ final_score = (top_level_sums[model][metric] + cqa_score) / 18 final_scores[model][metric] = final_score - print(' ' * 30 + 'BM25-flat' + ' ' * 10 + 'BM25-mf' + ' ' * 11 + 'SPLADE') + print(' ' * 30 + 'BM25-flat' + ' ' * 10 + 'BM25-mf' + ' ' * 11 + 'SPLADE' + ' ' * 11 + 'Contriever') print(' ' * 26 + 'nDCG@10 R@100 ' * 3) - print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14) + print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14) for dataset in beir_keys: print(f'{dataset:25}' + f'{table[dataset]["bm25-flat"]["nDCG@10"]:8.4f}{table[dataset]["bm25-flat"]["R@100"]:8.4f} ' + f'{table[dataset]["bm25-multifield"]["nDCG@10"]:8.4f}{table[dataset]["bm25-multifield"]["R@100"]:8.4f} ' + - f'{table[dataset]["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{table[dataset]["splade-distil-cocodenser-medium"]["R@100"]:8.4f}') - print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14) - print('avg' + ' ' * 22 + f'{final_scores["bm25-flat"]["nDCG@10"]:8.4f}{final_scores["bm25-flat"]["R@100"]:8.4f} ' + - f'{final_scores["bm25-multifield"]["nDCG@10"]:8.4f}{final_scores["bm25-multifield"]["R@100"]:8.4f} ' + - f'{final_scores["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{final_scores["splade-distil-cocodenser-medium"]["R@100"]:8.4f} ') + f'{table[dataset]["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{table[dataset]["splade-distil-cocodenser-medium"]["R@100"]:8.4f}' + + f'{table[dataset]["contriever"]["nDCG@10"]:8.4f}{table[dataset]["contriever"]["R@100"]:8.4f} ') + print(' ' * 27 + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14 + ' ' + '-' * 14) + print('avg' + ' ' * 22 + f'{final_scores["bm25-flat"]["nDCG@10"]:8.4f}{final_scores["bm25-flat"]["R@100"]:8.4f} ' + + f'{final_scores["bm25-multifield"]["nDCG@10"]:8.4f}{final_scores["bm25-multifield"]["R@100"]:8.4f} ' + + f'{final_scores["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}{final_scores["splade-distil-cocodenser-medium"]["R@100"]:8.4f} ' + + f'{final_scores["contriever"]["nDCG@10"]:8.4f}{final_scores["contriever"]["R@100"]:8.4f} ') end = time.time()