diff --git a/docs/2cr/msmarco-v1-doc.html b/docs/2cr/msmarco-v1-doc.html index 9777abfc3..4da6c2b7f 100644 --- a/docs/2cr/msmarco-v1-doc.html +++ b/docs/2cr/msmarco-v1-doc.html @@ -131,7 +131,7 @@ ">
[1] Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin. diff --git a/docs/2cr/msmarco-v1-passage.html b/docs/2cr/msmarco-v1-passage.html index 98cb7cbb5..652691ab8 100644 --- a/docs/2cr/msmarco-v1-passage.html +++ b/docs/2cr/msmarco-v1-passage.html @@ -131,7 +131,7 @@ ">
Evaluation commands:python -m pyserini.search.faiss \ --threads 16 --batch-size 512 \ - --index msmarco-v1-passage.cosdpr-distil \ + --index msmarco-v1-passage.cosdpr-distil \ --topics dl19-passage \ --encoder castorini/cosdpr-distil \ - --output run.msmarco-v1-passage.cosdpr-distil-pytorch.dl19.txt + --output run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dl19.txt
@@ -6120,20 +6120,20 @@python -m pyserini.eval.trec_eval -c -l 2 -m map dl19-passage \ - run.msmarco-v1-passage.cosdpr-distil-pytorch.dl19.txt + run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dl19.txt python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl19-passage \ - run.msmarco-v1-passage.cosdpr-distil-pytorch.dl19.txt + run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dl19.txt python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl19-passage \ - run.msmarco-v1-passage.cosdpr-distil-pytorch.dl19.txt + run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dl19.txt
Evaluation commands:python -m pyserini.search.faiss \ --threads 16 --batch-size 512 \ - --index msmarco-v1-passage.cosdpr-distil \ + --index msmarco-v1-passage.cosdpr-distil \ --topics dl20 \ --encoder castorini/cosdpr-distil \ - --output run.msmarco-v1-passage.cosdpr-distil-pytorch.dl20.txt + --output run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dl20.txt
@@ -6144,18 +6144,18 @@python -m pyserini.eval.trec_eval -c -l 2 -m map dl20-passage \ - run.msmarco-v1-passage.cosdpr-distil-pytorch.dl20.txt + run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dl20.txt python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl20-passage \ - run.msmarco-v1-passage.cosdpr-distil-pytorch.dl20.txt + run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dl20.txt python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl20-passage \ - run.msmarco-v1-passage.cosdpr-distil-pytorch.dl20.txt + run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dl20.txt
Evaluation commands:python -m pyserini.search.faiss \ --threads 16 --batch-size 512 \ - --index msmarco-v1-passage.cosdpr-distil \ + --index msmarco-v1-passage.cosdpr-distil \ --topics msmarco-passage-dev-subset \ --encoder castorini/cosdpr-distil \ - --output run.msmarco-v1-passage.cosdpr-distil-pytorch.dev.txt + --output run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dev.txt
@@ -6165,12 +6165,242 @@python -m pyserini.eval.trec_eval -c -M 10 -m recip_rank msmarco-passage-dev-subset \ - run.msmarco-v1-passage.cosdpr-distil-pytorch.dev.txt + run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dev.txt python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-passage-dev-subset \ - run.msmarco-v1-passage.cosdpr-distil-pytorch.dev.txt + run.msmarco-v1-passage.cosdpr-distil.faiss-flat.pytorch.dev.txt
msmarco-v1-passage.bge-base-en-v1.5.hnsw
+[readme]
+msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8
+[readme]
+msmarco-v1-passage.cosdpr-distil.hnsw
+[readme]
+msmarco-v1-passage.cosdpr-distil.hnsw-int8
+[readme]
+beir-v1.0.0-trec-covid.bge-base-en-v1.5.hnsw
diff --git a/pyserini/2cr/msmarco-v1-doc.yaml b/pyserini/2cr/msmarco-v1-doc.yaml
index db1844789..4a6c39648 100644
--- a/pyserini/2cr/msmarco-v1-doc.yaml
+++ b/pyserini/2cr/msmarco-v1-doc.yaml
@@ -45,8 +45,8 @@ conditions:
nDCG@10: 0.5286
R@1K: 0.8085
- name: bm25-doc-segmented-tuned
- display: BM25 doc segmented (k1=2.16, b=0.61)
- display-html: BM25 doc segmented (k1=2.16, b=0.61)
+ display: BM25 doc seg (k1=2.16, b=0.61)
+ display-html: BM25 doc seg (k1=2.16, b=0.61)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
@@ -67,8 +67,8 @@ conditions:
nDCG@10: 0.5213
R@1K: 0.7725
- name: bm25-doc-segmented-default
- display: BM25 doc segmented (k1=0.9, b=0.4)
- display-html: BM25 doc segmented (k1=0.9, b=0.4)
+ display: BM25 doc seg (k1=0.9, b=0.4)
+ display-html: BM25 doc seg (k1=0.9, b=0.4)
display-row: "[1] (1b)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
@@ -135,8 +135,8 @@ conditions:
nDCG@10: 0.5254
R@1K: 0.8259
- name: bm25-rm3-doc-segmented-tuned
- display: BM25+RM3 doc segmented (k1=2.16, b=0.61)
- display-html: BM25+RM3 doc segmented (k1=2.16, b=0.61)
+ display: BM25+RM3 doc seg (k1=2.16, b=0.61)
+ display-html: BM25+RM3 doc seg (k1=2.16, b=0.61)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
@@ -157,8 +157,8 @@ conditions:
nDCG@10: 0.5030
R@1K: 0.8056
- name: bm25-rm3-doc-segmented-default
- display: BM25+RM3 doc segmented (k1=0.9, b=0.4)
- display-html: BM25+RM3 doc segmented (k1=0.9, b=0.4)
+ display: BM25+RM3 doc seg (k1=0.9, b=0.4)
+ display-html: BM25+RM3 doc seg (k1=0.9, b=0.4)
display-row: "[1] (1d)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
@@ -224,8 +224,8 @@ conditions:
nDCG@10: 0.5192
R@1K: 0.8273
- name: bm25-rocchio-doc-segmented-tuned
- display: BM25+Rocchio doc segmented (k1=2.16, b=0.61)
- display-html: BM25+Rocchio doc segmented (k1=2.16, b=0.61)
+ display: BM25+Rocchio doc seg (k1=2.16, b=0.61)
+ display-html: BM25+Rocchio doc seg (k1=2.16, b=0.61)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --rocchio --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
@@ -246,8 +246,8 @@ conditions:
nDCG@10: 0.4997
R@1K: 0.8042
- name: bm25-rocchio-doc-segmented-default
- display: BM25+Rocchio doc segmented (k1=0.9, b=0.4)
- display-html: BM25+Rocchio doc segmented (k1=0.9, b=0.4)
+ display: BM25+Rocchio doc seg (k1=0.9, b=0.4)
+ display-html: BM25+Rocchio doc seg (k1=0.9, b=0.4)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented --topics $topics --output $output --bm25 --rocchio --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
@@ -313,8 +313,8 @@ conditions:
nDCG@10: 0.5885
R@1K: 0.8403
- name: bm25-d2q-t5-doc-segmented-tuned
- display: BM25 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
- display-html: BM25 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
+ display: BM25 w/ doc2query-T5 doc seg (k1=2.56, b=0.59)
+ display-html: BM25 w/ doc2query-T5 doc seg (k1=2.56, b=0.59)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented.d2q-t5 --topics $topics --output $output --bm25 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
@@ -335,8 +335,8 @@ conditions:
nDCG@10: 0.5943
R@1K: 0.7968
- name: bm25-d2q-t5-doc-segmented-default
- display: BM25 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
- display-html: BM25 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
+ display: BM25 w/ doc2query-T5 doc seg (k1=0.9, b=0.4)
+ display-html: BM25 w/ doc2query-T5 doc seg (k1=0.9, b=0.4)
display-row: "[1] (2b)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented.d2q-t5 --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
@@ -403,8 +403,8 @@ conditions:
nDCG@10: 0.5427
R@1K: 0.8631
- name: bm25-rm3-d2q-t5-doc-segmented-tuned
- display: BM25+RM3 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
- display-html: BM25+RM3 w/ doc2query-T5 doc segmented (k1=2.56, b=0.59)
+ display: BM25+RM3 w/ doc2query-T5 doc seg (k1=2.56, b=0.59)
+ display-html: BM25+RM3 w/ doc2query-T5 doc seg (k1=2.56, b=0.59)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented.d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
@@ -425,8 +425,8 @@ conditions:
nDCG@10: 0.5711
R@1K: 0.8156
- name: bm25-rm3-d2q-t5-doc-segmented-default
- display: BM25+RM3 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
- display-html: BM25+RM3 w/ doc2query-T5 doc segmented (k1=0.9, b=0.4)
+ display: BM25+RM3 w/ doc2query-T5 doc seg (k1=0.9, b=0.4)
+ display-html: BM25+RM3 w/ doc2query-T5 doc seg (k1=0.9, b=0.4)
display-row: "[1] (2d)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented.d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4 --hits 10000 --max-passage-hits 1000 --max-passage
topics:
@@ -448,8 +448,8 @@ conditions:
nDCG@10: 0.5851
R@1K: 0.8266
- name: unicoil-noexp-pytorch
- display: "uniCOIL (noexp): query inference with PyTorch"
- display-html: "uniCOIL (noexp): query inference with PyTorch"
+ display: "uniCOIL (noexp): PyTorch"
+ display-html: "uniCOIL (noexp): PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented.unicoil-noexp --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
@@ -470,8 +470,8 @@ conditions:
nDCG@10: 0.5893
R@1K: 0.7623
- name: unicoil-noexp
- display: "uniCOIL (noexp): pre-encoded"
- display-html: "uniCOIL (noexp): pre-encoded queries"
+ display: "uniCOIL (noexp): cached queries"
+ display-html: "uniCOIL (noexp): cached queries"
display-row: "[1] (3a)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented.unicoil-noexp --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
@@ -493,8 +493,8 @@ conditions:
nDCG@10: 0.5893
R@1K: 0.7623
- name: unicoil-pytorch
- display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
- display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+ display: "uniCOIL (w/ doc2query-T5): PyTorch"
+ display-html: "uniCOIL (w/ doc2query-T5): PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented.unicoil --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-doc-dev
@@ -515,8 +515,8 @@ conditions:
nDCG@10: 0.6033
R@1K: 0.7869
- name: unicoil
- display: "uniCOIL (w/ doc2query-T5): pre-encoded"
- display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
+ display: "uniCOIL (w/ doc2query-T5): cached queries"
+ display-html: "uniCOIL (w/ doc2query-T5): cached queries"
display-row: "[1] (3b)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-doc-segmented.unicoil --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
diff --git a/pyserini/2cr/msmarco-v1-passage.yaml b/pyserini/2cr/msmarco-v1-passage.yaml
index 8f08e53a2..d4da2d3f7 100644
--- a/pyserini/2cr/msmarco-v1-passage.yaml
+++ b/pyserini/2cr/msmarco-v1-passage.yaml
@@ -1,7 +1,53 @@
conditions:
- - name: bge-base-en-v1.5-pytorch
- display: "BGE-base-en-v1.5: PyTorch"
- display-html: "BGE-base-en-v1.5: PyTorch"
+ - name: bge-base-en-v1.5.lucene-hnsw-int8.onnx
+ display: "BGE-base-en-v1.5: Lucene quantized HNSW, ONNX"
+ display-html: "BGE-base-en-v1.5: Lucene quantized HNSW, ONNX"
+ display-row: "[14]"
+ command: python -m pyserini.search.lucene --threads ${dense_threads} --batch-size ${dense_batch_size} --dense --hnsw --index msmarco-v1-passage.bge-base-en-v1.5.hnsw-int8 --topics $topics --onnx-encoder BgeBaseEn15 --output $output --hits 1000 --ef-search 1000
+ topics:
+ - topic_key: msmarco-passage-dev-subset
+ eval_key: msmarco-passage-dev-subset
+ scores:
+ - MRR@10: 0.3575
+ R@1K: 0.9772
+ - topic_key: dl19-passage
+ eval_key: dl19-passage
+ scores:
+ - MAP: 0.4454
+ nDCG@10: 0.7017
+ R@1K: 0.8436
+ - topic_key: dl20
+ eval_key: dl20-passage
+ scores:
+ - MAP: 0.4596
+ nDCG@10: 0.6767
+ R@1K: 0.8468
+ - name: bge-base-en-v1.5.lucene-hnsw.onnx
+ display: "BGE-base-en-v1.5: Lucene HNSW, ONNX"
+ display-html: "BGE-base-en-v1.5: Lucene HNSW, ONNX"
+ display-row: "[14]"
+ command: python -m pyserini.search.lucene --threads ${dense_threads} --batch-size ${dense_batch_size} --dense --hnsw --index msmarco-v1-passage.bge-base-en-v1.5.hnsw --topics $topics --onnx-encoder BgeBaseEn15 --output $output --hits 1000 --ef-search 1000
+ topics:
+ - topic_key: msmarco-passage-dev-subset
+ eval_key: msmarco-passage-dev-subset
+ scores:
+ - MRR@10: 0.3575
+ R@1K: 0.9788
+ - topic_key: dl19-passage
+ eval_key: dl19-passage
+ scores:
+ - MAP: 0.4486
+ nDCG@10: 0.7016
+ R@1K: 0.8441
+ - topic_key: dl20
+ eval_key: dl20-passage
+ scores:
+ - MAP: 0.4626
+ nDCG@10: 0.6768
+ R@1K: 0.8526
+ - name: bge-base-en-v1.5.faiss-flat.pytorch
+ display: "BGE-base-en-v1.5: Faiss flat, PyTorch"
+ display-html: "BGE-base-en-v1.5: Faiss flat, PyTorch"
display-row: "[14]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --encoder-class auto --encoder BAAI/bge-base-en-v1.5 --l2-norm --query-prefix "Represent this sentence for searching relevant passages:" --index msmarco-v1-passage.bge-base-en-v1.5 --topics $topics --output $output --hits 1000
topics:
@@ -22,11 +68,57 @@ conditions:
- MAP: 0.4628
nDCG@10: 0.6768
R@1K: 0.8547
- - name: cosdpr-distil-pytorch
- display: "cosDPR-distil: PyTorch"
- display-html: "cosDPR-distil: PyTorch"
+ - name: cosdpr-distil.lucene-hnsw-int8.onnx
+ display: "cosDPR-distil: Lucene quantized HNSW, ONNX"
+ display-html: "cosDPR-distil: Lucene quantized HNSW, ONNX"
+ display-row: "[13]"
+ command: python -m pyserini.search.lucene --threads ${dense_threads} --batch-size ${dense_batch_size} --dense --hnsw --index msmarco-v1-passage.cosdpr-distil.hnsw-int8 --topics $topics --onnx-encoder CosDprDistil --output $output --hits 1000 --ef-search 1000
+ topics:
+ - topic_key: msmarco-passage-dev-subset
+ eval_key: msmarco-passage-dev-subset
+ scores:
+ - MRR@10: 0.3899
+ R@1K: 0.9764
+ - topic_key: dl19-passage
+ eval_key: dl19-passage
+ scores:
+ - MAP: 0.4664
+ nDCG@10: 0.7247
+ R@1K: 0.8218
+ - topic_key: dl20
+ eval_key: dl20-passage
+ scores:
+ - MAP: 0.4871
+ nDCG@10: 0.6996
+ R@1K: 0.8538
+ - name: cosdpr-distil.lucene-hnsw.onnx
+ display: "cosDPR-distil: Lucene HNSW, ONNX"
+ display-html: "cosDPR-distil: Lucene HNSW, ONNX"
+ display-row: "[13]"
+ command: python -m pyserini.search.lucene --threads ${dense_threads} --batch-size ${dense_batch_size} --dense --hnsw --index msmarco-v1-passage.cosdpr-distil.hnsw --topics $topics --onnx-encoder CosDprDistil --output $output --hits 1000 --ef-search 1000
+ topics:
+ - topic_key: msmarco-passage-dev-subset
+ eval_key: msmarco-passage-dev-subset
+ scores:
+ - MRR@10: 0.3887
+ R@1K: 0.9765
+ - topic_key: dl19-passage
+ eval_key: dl19-passage
+ scores:
+ - MAP: 0.4660
+ nDCG@10: 0.7250
+ R@1K: 0.8222
+ - topic_key: dl20
+ eval_key: dl20-passage
+ scores:
+ - MAP: 0.4876
+ nDCG@10: 0.7025
+ R@1K: 0.8540
+ - name: cosdpr-distil.faiss-flat.pytorch
+ display: "cosDPR-distil: Faiss flat, PyTorch"
+ display-html: "cosDPR-distil: Faiss flat, PyTorch"
display-row: "[13]"
- command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.cosdpr-distil --topics $topics --encoder castorini/cosdpr-distil --output $output
+ command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.cosdpr-distil --topics $topics --encoder castorini/cosdpr-distil --output $output
topics:
- topic_key: msmarco-passage-dev-subset
eval_key: msmarco-passage-dev-subset
@@ -46,8 +138,8 @@ conditions:
nDCG@10: 0.7025
R@1K: 0.8533
- name: splade-pp-ed-rocchio-pytorch
- display: "SPLADE++ EnsembleDistil w/ Rocchio: PyTorch"
- display-html: "SPLADE++ EnsembleDistil w/ Rocchio: PyTorch"
+ display: "SPLADE++ EnsembleDistil w/ Rocchio: Lucene, PyTorch"
+ display-html: "SPLADE++ EnsembleDistil w/ Rocchio: Lucene, PyTorch"
display-row: ""
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.splade-pp-ed-text --topics $topics --encoder naver/splade-cocondenser-ensembledistil --output $output --hits 1000 --impact --rocchio
topics:
@@ -69,8 +161,8 @@ conditions:
nDCG@10: 0.7280
R@1K: 0.9069
- name: splade-pp-sd-rocchio-pytorch
- display: "SPLADE++ SelfDistil w/ Rocchio: PyTorch"
- display-html: "SPLADE++ SelfDistil w/ Rocchio: PyTorch"
+ display: "SPLADE++ SelfDistil w/ Rocchio: Lucene, PyTorch"
+ display-html: "SPLADE++ SelfDistil w/ Rocchio: Lucene, PyTorch"
display-row: ""
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.splade-pp-sd-text --topics $topics --encoder naver/splade-cocondenser-selfdistil --output $output --hits 1000 --impact --rocchio
topics:
@@ -92,8 +184,8 @@ conditions:
nDCG@10: 0.7388
R@1K: 0.9120
- name: splade-pp-ed-pytorch
- display: "SPLADE++ EnsembleDistil: PyTorch"
- display-html: "SPLADE++ EnsembleDistil: PyTorch"
+ display: "SPLADE++ EnsembleDistil: Lucene, PyTorch"
+ display-html: "SPLADE++ EnsembleDistil: Lucene, PyTorch"
display-row: "[2]"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.splade-pp-ed --topics $topics --encoder naver/splade-cocondenser-ensembledistil --output $output --hits 1000 --impact
topics:
@@ -115,8 +207,8 @@ conditions:
nDCG@10: 0.7197
R@1K: 0.8998
- name: splade-pp-sd-pytorch
- display: "SPLADE++ SelfDistil: PyTorch"
- display-html: "SPLADE++ SelfDistil: PyTorch"
+ display: "SPLADE++ SelfDistil: Lucene, PyTorch"
+ display-html: "SPLADE++ SelfDistil: Lucene, PyTorch"
display-row: "[2]"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.splade-pp-sd --topics $topics --encoder naver/splade-cocondenser-selfdistil --output $output --hits 1000 --impact
topics:
@@ -138,8 +230,8 @@ conditions:
nDCG@10: 0.7282
R@1K: 0.9024
- name: splade-pp-ed-rocchio-onnx
- display: "SPLADE++ EnsembleDistil w/ Rocchio: ONNX"
- display-html: "SPLADE++ EnsembleDistil w/ Rocchio: ONNX"
+ display: "SPLADE++ EnsembleDistil w/ Rocchio: Lucene, ONNX"
+ display-html: "SPLADE++ EnsembleDistil w/ Rocchio: Lucene, ONNX"
display-row: ""
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.splade-pp-ed-text --topics $topics --onnx-encoder SpladePlusPlusEnsembleDistil --output $output --hits 1000 --impact --rocchio
topics:
@@ -161,8 +253,8 @@ conditions:
nDCG@10: 0.7280
R@1K: 0.9069
- name: splade-pp-sd-rocchio-onnx
- display: "SPLADE++ SelfDistil w/ Rocchio: ONNX"
- display-html: "SPLADE++ SelfDistil w/ Rocchio: ONNX"
+ display: "SPLADE++ SelfDistil w/ Rocchio: Lucene, ONNX"
+ display-html: "SPLADE++ SelfDistil w/ Rocchio: Lucene, ONNX"
display-row: ""
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.splade-pp-sd-text --topics $topics --onnx-encoder SpladePlusPlusSelfDistil --output $output --hits 1000 --impact --rocchio
topics:
@@ -184,8 +276,8 @@ conditions:
nDCG@10: 0.7388
R@1K: 0.9120
- name: bm25-rocchio-d2q-t5-tuned
- display: BM25+Rocchio w/ doc2query-T5 (k1=2.18, b=0.86)
- display-html: BM25+Rocchio w/ doc2query-T5 (k1=2.18, b=0.86)
+ display: "BM25+Rocchio w/ doc2query-T5 (k1=2.18, b=0.86): Lucene"
+ display-html: "BM25+Rocchio w/ doc2query-T5 (k1=2.18, b=0.86): Lucene"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.d2q-t5-docvectors --topics $topics --output $output --bm25 --rocchio
topics:
- topic_key: msmarco-passage-dev-subset
@@ -206,8 +298,8 @@ conditions:
nDCG@10: 0.6224
R@1K: 0.8641
- name: bm25-rocchio-d2q-t5-default
- display: BM25+Rocchio w/ doc2query-T5 (k1=0.9, b=0.4)
- display-html: BM25+Rocchio w/ doc2query-T5 (k1=0.9, b=0.4)
+ display: "BM25+Rocchio w/ doc2query-T5 (k1=0.9, b=0.4): Lucene"
+ display-html: "BM25+Rocchio w/ doc2query-T5 (k1=0.9, b=0.4): Lucene"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.d2q-t5-docvectors --topics $topics --output $output --bm25 --rocchio --k1 0.9 --b 0.4
topics:
- topic_key: msmarco-passage-dev-subset
@@ -228,8 +320,8 @@ conditions:
nDCG@10: 0.6102
R@1K: 0.8675
- name: bm25-rocchio-default
- display: BM25+Rocchio (k1=0.9, b=0.4)
- display-html: BM25+Rocchio (k1=0.9, b=0.4)
+ display: "BM25+Rocchio (k1=0.9, b=0.4): Lucene"
+ display-html: "BM25+Rocchio (k1=0.9, b=0.4): Lucene"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --rocchio
topics:
- topic_key: msmarco-passage-dev-subset
@@ -250,8 +342,8 @@ conditions:
nDCG@10: 0.4910
R@1K: 0.8156
- name: bm25-rocchio-tuned
- display: BM25+Rocchio (k1=0.82, b=0.68)
- display-html: BM25+Rocchio (k1=0.82, b=0.68)
+ display: "BM25+Rocchio (k1=0.82, b=0.68): Lucene"
+ display-html: "BM25+Rocchio (k1=0.82, b=0.68): Lucene"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage --topics $topics --output $output --bm25 --rocchio
topics:
- topic_key: msmarco-passage-dev-subset
@@ -272,8 +364,8 @@ conditions:
nDCG@10: 0.4908
R@1K: 0.8327
- name: distilbert-kd-tasb
- display: "DistilBERT KD TASB: cached queries"
- display-html: "DistilBERT KD TASB: cached queries"
+ display: "DistilBERT KD TASB: Faiss flat, cached queries"
+ display-html: "DistilBERT KD TASB: Faiss flat, cached queries"
display-row: "[5]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.distilbert-dot-tas_b-b256 --topics $topics --encoded-queries distilbert_tas_b-$topics --output $output
topics:
@@ -295,8 +387,8 @@ conditions:
nDCG@10: 0.6854
R@1K: 0.8727
- name: distilbert-kd-tasb-pytorch
- display: "DistilBERT KD TASB: PyTorch"
- display-html: "DistilBERT KD TASB: PyTorch"
+ display: "DistilBERT KD TASB: Faiss flat, PyTorch"
+ display-html: "DistilBERT KD TASB: Faiss flat, PyTorch"
display-row: "[5]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.distilbert-dot-tas_b-b256 --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco --output $output
topics:
@@ -318,8 +410,8 @@ conditions:
nDCG@10: 0.6854
R@1K: 0.8727
- name: distilbert-kd-tasb-avg-prf-pytorch
- display: "DistilBERT KD TASB w/ Average PRF: PyTorch"
- display-html: "DistilBERT KD TASB w/ Average PRF: PyTorch"
+ display: "DistilBERT KD TASB w/ Average PRF: Faiss flat, PyTorch"
+ display-html: "DistilBERT KD TASB w/ Average PRF: Faiss flat, PyTorch"
display-row: "[9]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.distilbert-dot-tas_b-b256 --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco --output $output --prf-method avg --prf-depth 3
topics:
@@ -341,8 +433,8 @@ conditions:
nDCG@10: 0.7086
R@1K: 0.9030
- name: distilbert-kd-tasb-rocchio-prf-pytorch
- display: "DistilBERT KD TASB w/ Rocchio PRF: PyTorch"
- display-html: "DistilBERT KD TASB w/ Rocchio PRF: PyTorch"
+ display: "DistilBERT KD TASB w/ Rocchio PRF: Faiss flat, PyTorch"
+ display-html: "DistilBERT KD TASB w/ Rocchio PRF: Faiss flat, PyTorch"
display-row: "[9]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.distilbert-dot-tas_b-b256 --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-tas_b-b256-msmarco --output $output --prf-method rocchio --prf-depth 5 --rocchio-alpha 0.4 --rocchio-beta 0.6 --rocchio-topk 5
topics:
@@ -364,8 +456,8 @@ conditions:
nDCG@10: 0.7083
R@1K: 0.8926
- name: distilbert-kd
- display: "DistilBERT KD: cached queries"
- display-html: "DistilBERT KD: cached queries"
+ display: "DistilBERT KD: Faiss flat, cached queries"
+ display-html: "DistilBERT KD: Faiss flat, cached queries"
display-row: "[4]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.distilbert-dot-margin-mse-t2 --topics $topics --encoded-queries distilbert_kd-$topics --output $output
topics:
@@ -387,8 +479,8 @@ conditions:
nDCG@10: 0.6447
R@1K: 0.7953
- name: distilbert-kd-pytorch
- display: "DistilBERT KD: PyTorch"
- display-html: "DistilBERT KD: PyTorch"
+ display: "DistilBERT KD: Faiss flat, PyTorch"
+ display-html: "DistilBERT KD: Faiss flat, PyTorch"
display-row: "[4]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.distilbert-dot-margin-mse-t2 --topics $topics --encoder sebastian-hofstaetter/distilbert-dot-margin_mse-T2-msmarco --output $output
topics:
@@ -410,8 +502,8 @@ conditions:
nDCG@10: 0.6447
R@1K: 0.7953
- name: ance
- display: "ANCE: cached queries"
- display-html: "ANCE: cached queries"
+ display: "ANCE: Faiss flat, cached queries"
+ display-html: "ANCE: Faiss flat, cached queries"
display-row: "[3]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.ance --topics $topics --encoded-queries ance-$topics --output $output
topics:
@@ -433,8 +525,8 @@ conditions:
nDCG@10: 0.6458
R@1K: 0.7764
- name: ance-pytorch
- display: "ANCE: PyTorch"
- display-html: "ANCE: PyTorch"
+ display: "ANCE: Faiss flat, PyTorch"
+ display-html: "ANCE: Faiss flat, PyTorch"
display-row: "[3]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.ance --topics $topics --encoder castorini/ance-msmarco-passage --output $output
topics:
@@ -456,8 +548,8 @@ conditions:
nDCG@10: 0.6458
R@1K: 0.7764
- name: ance-avg-prf-pytorch
- display: "ANCE w/ Average PRF: PyTorch"
- display-html: "ANCE w/ Average PRF: PyTorch"
+ display: "ANCE w/ Average PRF: Faiss flat, PyTorch"
+ display-html: "ANCE w/ Average PRF: Faiss flat, PyTorch"
display-row: "[9]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.ance --topics $topics --encoder castorini/ance-msmarco-passage --output $output --prf-method avg --prf-depth 3
topics:
@@ -479,8 +571,8 @@ conditions:
nDCG@10: 0.6573
R@1K: 0.7909
- name: ance-rocchio-prf-pytorch
- display: "ANCE w/ Rocchio PRF: PyTorch"
- display-html: "ANCE w/ Rocchio PRF: PyTorch"
+ display: "ANCE w/ Rocchio PRF: Faiss flat, PyTorch"
+ display-html: "ANCE w/ Rocchio PRF: Faiss flat, PyTorch"
display-row: "[9]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.ance --topics $topics --encoder castorini/ance-msmarco-passage --output $output --prf-method rocchio --prf-depth 5 --rocchio-alpha 0.4 --rocchio-beta 0.6 --rocchio-topk 5
topics:
@@ -502,8 +594,8 @@ conditions:
nDCG@10: 0.6471
R@1K: 0.7957
- name: sbert-pytorch
- display: "SBERT: PyTorch"
- display-html: "SBERT: PyTorch"
+ display: "SBERT: Faiss flat, PyTorch"
+ display-html: "SBERT: Faiss flat, PyTorch"
display-row: "[10]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.sbert --topics $topics --encoder sentence-transformers/msmarco-distilbert-base-v3 --output $output
topics:
@@ -525,8 +617,8 @@ conditions:
nDCG@10: 0.6344
R@1K: 0.7937
- name: sbert-avg-prf-pytorch
- display: "SBERT w/ Average PRF: PyTorch"
- display-html: "SBERT w/ Average PRF: PyTorch"
+ display: "SBERT w/ Average PRF: Faiss flat, PyTorch"
+ display-html: "SBERT w/ Average PRF: Faiss flat, PyTorch"
display-row: "[9]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.sbert --topics $topics --encoder sentence-transformers/msmarco-distilbert-base-v3 --output $output --prf-method avg --prf-depth 3
topics:
@@ -548,8 +640,8 @@ conditions:
nDCG@10: 0.6412
R@1K: 0.8169
- name: sbert-rocchio-prf-pytorch
- display: "SBERT w/ Rocchio PRF: PyTorch"
- display-html: "SBERT w/ Rocchio PRF: PyTorch"
+ display: "SBERT w/ Rocchio PRF: Faiss flat, PyTorch"
+ display-html: "SBERT w/ Rocchio PRF: Faiss flat, PyTorch"
display-row: "[9]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.sbert --topics $topics --encoder sentence-transformers/msmarco-distilbert-base-v3 --output $output --prf-method rocchio --prf-depth 5 --rocchio-alpha 0.4 --rocchio-beta 0.6 --rocchio-topk 5
topics:
@@ -571,8 +663,8 @@ conditions:
nDCG@10: 0.6559
R@1K: 0.8226
- name: bm25-tuned
- display: BM25 (k1=0.82, b=0.68)
- display-html: BM25 (k1=0.82, b=0.68)
+ display: "BM25 (k1=0.82, b=0.68): Lucene"
+ display-html: "BM25 (k1=0.82, b=0.68): Lucene"
command: python -m pyserini.search.lucene --topics $topics --index msmarco-v1-passage --output $output --bm25
topics:
- topic_key: msmarco-passage-dev-subset
@@ -593,8 +685,8 @@ conditions:
nDCG@10: 0.4876
R@1K: 0.8031
- name: bm25-rm3-tuned
- display: BM25+RM3 (k1=0.82, b=0.68)
- display-html: BM25+RM3 (k1=0.82, b=0.68)
+ display: "BM25+RM3 (k1=0.82, b=0.68): Lucene"
+ display-html: "BM25+RM3 (k1=0.82, b=0.68): Lucene"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage --topics $topics --output $output --bm25 --rm3
topics:
- topic_key: msmarco-passage-dev-subset
@@ -615,8 +707,8 @@ conditions:
nDCG@10: 0.4924
R@1K: 0.8292
- name: bm25-default
- display: BM25 (k1=0.9, b=0.4)
- display-html: BM25 (k1=0.9, b=0.4)
+ display: "BM25 (k1=0.9, b=0.4): Lucene"
+ display-html: "BM25 (k1=0.9, b=0.4): Lucene"
display-row: "[1] (1a)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
topics:
@@ -638,8 +730,8 @@ conditions:
nDCG@10: 0.4796
R@1K: 0.7863
- name: bm25-rm3-default
- display: BM25+RM3 (k1=0.9, b=0.4)
- display-html: BM25+RM3 (k1=0.9, b=0.4)
+ display: "BM25+RM3 (k1=0.9, b=0.4): Lucene"
+ display-html: "BM25+RM3 (k1=0.9, b=0.4): Lucene"
display-row: "[1] (1b)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage --topics $topics --output $output --bm25 --k1 0.9 --b 0.4 --rm3
topics:
@@ -661,8 +753,8 @@ conditions:
nDCG@10: 0.4896
R@1K: 0.8236
- name: bm25-d2q-t5-tuned
- display: BM25 w/ doc2query-T5 (k1=2.18, b=0.86)
- display-html: BM25 w/ doc2query-T5 (k1=2.18, b=0.86)
+ display: "BM25 w/ doc2query-T5 (k1=2.18, b=0.86): Lucene"
+ display-html: "BM25 w/ doc2query-T5 (k1=2.18, b=0.86): Lucene"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.d2q-t5 --topics $topics --output $output --bm25
topics:
- topic_key: msmarco-passage-dev-subset
@@ -683,8 +775,8 @@ conditions:
nDCG@10: 0.6265
R@1K: 0.8393
- name: bm25-d2q-t5-default
- display: BM25 w/ doc2query-T5 (k1=0.9, b=0.4)
- display-html: BM25 w/ doc2query-T5 (k1=0.9, b=0.4)
+ display: "BM25 w/ doc2query-T5 (k1=0.9, b=0.4): Lucene"
+ display-html: "BM25 w/ doc2query-T5 (k1=0.9, b=0.4): Lucene"
display-row: "[1] (2a)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.d2q-t5 --topics $topics --output $output --bm25 --k1 0.9 --b 0.4
topics:
@@ -706,8 +798,8 @@ conditions:
nDCG@10: 0.6187
R@1K: 0.8452
- name: bm25-rm3-d2q-t5-tuned
- display: BM25+RM3 w/ doc2query-T5 (k1=2.18, b=0.86)
- display-html: BM25+RM3 w/ doc2query-T5 (k1=2.18, b=0.86)
+ display: "BM25+RM3 w/ doc2query-T5 (k1=2.18, b=0.86): Lucene"
+ display-html: "BM25+RM3 w/ doc2query-T5 (k1=2.18, b=0.86): Lucene"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3
topics:
- topic_key: msmarco-passage-dev-subset
@@ -728,8 +820,8 @@ conditions:
nDCG@10: 0.6235
R@1K: 0.8605
- name: bm25-rm3-d2q-t5-default
- display: BM25+RM3 w/ doc2query-T5 (k1=0.9, b=0.4)
- display-html: BM25+RM3 w/ doc2query-T5 (k1=0.9, b=0.4)
+ display: "BM25+RM3 w/ doc2query-T5 (k1=0.9, b=0.4): Lucene"
+ display-html: "BM25+RM3 w/ doc2query-T5 (k1=0.9, b=0.4): Lucene"
display-row: "[1] (2b)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.d2q-t5-docvectors --topics $topics --output $output --bm25 --rm3 --k1 0.9 --b 0.4
topics:
@@ -751,8 +843,8 @@ conditions:
nDCG@10: 0.6131
R@1K: 0.8700
- name: unicoil-pytorch
- display: "uniCOIL (w/ doc2query-T5): PyTorch"
- display-html: "uniCOIL (w/ doc2query-T5): PyTorch"
+ display: "uniCOIL (w/ doc2query-T5): Lucene, PyTorch"
+ display-html: "uniCOIL (w/ doc2query-T5): Lucene, PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.unicoil --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset
@@ -773,8 +865,8 @@ conditions:
nDCG@10: 0.6745
R@1K: 0.8430
- name: unicoil-onnx
- display: "uniCOIL (w/ doc2query-T5): ONNX"
- display-html: "uniCOIL (w/ doc2query-T5): ONNX"
+ display: "uniCOIL (w/ doc2query-T5): Lucene, ONNX"
+ display-html: "uniCOIL (w/ doc2query-T5): Lucene, ONNX"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.unicoil --topics $topics --onnx-encoder UniCoil --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset
@@ -795,8 +887,8 @@ conditions:
nDCG@10: 0.6745
R@1K: 0.8430
- name: unicoil
- display: "uniCOIL (w/ doc2query-T5): cached queries"
- display-html: "uniCOIL (w/ doc2query-T5): cached queries"
+ display: "uniCOIL (w/ doc2query-T5): Lucene, cached queries"
+ display-html: "uniCOIL (w/ doc2query-T5): Lucene, cached queries"
display-row: "[1] (3b)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.unicoil --topics $topics --output $output --hits 1000 --impact
topics:
@@ -818,8 +910,8 @@ conditions:
nDCG@10: 0.6745
R@1K: 0.8430
- name: unicoil-noexp-pytorch
- display: "uniCOIL (noexp): PyTorch"
- display-html: "uniCOIL (noexp): PyTorch"
+ display: "uniCOIL (noexp): Lucene, PyTorch"
+ display-html: "uniCOIL (noexp): Lucene, PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.unicoil-noexp --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset
@@ -840,8 +932,8 @@ conditions:
nDCG@10: 0.6523
R@1K: 0.7861
- name: unicoil-noexp-onnx
- display: "uniCOIL (noexp): ONNX"
- display-html: "uniCOIL (noexp): ONNX"
+ display: "uniCOIL (noexp): Lucene, ONNX"
+ display-html: "uniCOIL (noexp): Lucene, ONNX"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.unicoil-noexp --topics $topics --onnx-encoder UniCoil --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-passage-dev-subset
@@ -862,8 +954,8 @@ conditions:
nDCG@10: 0.6400
R@1K: 0.7910
- name: unicoil-noexp
- display: "uniCOIL (noexp): cached queries"
- display-html: "uniCOIL (noexp): cached queries"
+ display: "uniCOIL (noexp): Lucene, cached queries"
+ display-html: "uniCOIL (noexp): Lucene, cached queries"
display-row: "[1] (3a)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.unicoil-noexp --topics $topics --output $output --hits 1000 --impact
topics:
@@ -885,8 +977,8 @@ conditions:
nDCG@10: 0.6523
R@1K: 0.7861
- name: splade-pp-ed-onnx
- display: "SPLADE++ EnsembleDistil: ONNX"
- display-html: "SPLADE++ EnsembleDistil: ONNX"
+ display: "SPLADE++ EnsembleDistil: Lucene, ONNX"
+ display-html: "SPLADE++ EnsembleDistil: Lucene, ONNX"
display-row: "[2]"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.splade-pp-ed --topics $topics --onnx-encoder SpladePlusPlusEnsembleDistil --output $output --hits 1000 --impact
topics:
@@ -908,8 +1000,8 @@ conditions:
nDCG@10: 0.7197
R@1K: 0.8998
- name: splade-pp-sd-onnx
- display: "SPLADE++ SelfDistil: ONNX"
- display-html: "SPLADE++ SelfDistil: ONNX"
+ display: "SPLADE++ SelfDistil: Lucene, ONNX"
+ display-html: "SPLADE++ SelfDistil: Lucene, ONNX"
display-row: "[2]"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.splade-pp-sd --topics $topics --onnx-encoder SpladePlusPlusSelfDistil --output $output --hits 1000 --impact
topics:
@@ -931,8 +1023,8 @@ conditions:
nDCG@10: 0.7282
R@1K: 0.9024
- name: tct_colbert-v2-hnp
- display: "TCT_ColBERT-V2-HN+: cached queries"
- display-html: "TCT_ColBERT-V2-HN+: cached queries"
+ display: "TCT_ColBERT-V2-HN+: Faiss flat, cached queries"
+ display-html: "TCT_ColBERT-V2-HN+: Faiss flat, cached queries"
display-row: "[6]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.tct_colbert-v2-hnp --topics $topics --encoded-queries tct_colbert-v2-hnp-$topics --output $output
topics:
@@ -954,8 +1046,8 @@ conditions:
nDCG@10: 0.6882
R@1K: 0.8429
- name: tct_colbert-v2-hnp-pytorch
- display: "TCT_ColBERT-V2-HN+: PyTorch"
- display-html: "TCT_ColBERT-V2-HN+: PyTorch"
+ display: "TCT_ColBERT-V2-HN+: Faiss flat, PyTorch"
+ display-html: "TCT_ColBERT-V2-HN+: Faiss flat, PyTorch"
display-row: "[6]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.tct_colbert-v2-hnp --topics $topics --encoder castorini/tct_colbert-v2-hnp-msmarco --output $output
topics:
@@ -977,8 +1069,8 @@ conditions:
nDCG@10: 0.6882
R@1K: 0.8429
- name: tct_colbert-v2-hnp-avg-prf-pytorch
- display: "TCT_ColBERT-V2-HN+ w/ Average PRF: PyTorch"
- display-html: "TCT_ColBERT-V2-HN+ w/ Average PRF: PyTorch"
+ display: "TCT_ColBERT-V2-HN+ w/ Average PRF: Faiss flat, PyTorch"
+ display-html: "TCT_ColBERT-V2-HN+ w/ Average PRF: Faiss flat, PyTorch"
display-row: "[9]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.tct_colbert-v2-hnp --topics $topics --encoder castorini/tct_colbert-v2-hnp-msmarco --output $output --prf-method avg --prf-depth 3
topics:
@@ -1000,8 +1092,8 @@ conditions:
nDCG@10: 0.6836
R@1K: 0.8579
- name: tct_colbert-v2-hnp-rocchio-prf-pytorch
- display: "TCT_ColBERT-V2-HN+ w/ Rocchio PRF: PyTorch"
- display-html: "TCT_ColBERT-V2-HN+ w/ Rocchio PRF: PyTorch"
+ display: "TCT_ColBERT-V2-HN+ w/ Rocchio PRF: Faiss flat, PyTorch"
+ display-html: "TCT_ColBERT-V2-HN+ w/ Rocchio PRF: Faiss flat, PyTorch"
display-row: "[9]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.tct_colbert-v2-hnp --topics $topics --encoder castorini/tct_colbert-v2-hnp-msmarco --output $output --prf-method rocchio --prf-depth 5 --rocchio-alpha 0.4 --rocchio-beta 0.6 --rocchio-topk 5
topics:
@@ -1069,8 +1161,8 @@ conditions:
nDCG@10: 0.7244
R@1K: 0.8847
- name: slimr
- display: "SLIM: PyTorch"
- display-html: "SLIM: PyTorch"
+ display: "SLIM: Lucene, PyTorch"
+ display-html: "SLIM: Lucene, PyTorch"
display-row: "[7]"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.slimr --topics $topics --encoder castorini/slimr-msmarco-passage --encoded-corpus scipy-sparse-vectors.msmarco-v1-passage-slimr --output $output --output-format msmarco --hits 1000 --impact --min-idf 3
topics:
@@ -1092,8 +1184,8 @@ conditions:
nDCG@10: 0.6403
R@1K: 0.8543
- name: slimr-pp
- display: "SLIM++: PyTorch"
- display-html: "SLIM++: PyTorch"
+ display: "SLIM++: Lucene, PyTorch"
+ display-html: "SLIM++: Lucene, PyTorch"
display-row: "[7]"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.slimr-pp --topics $topics --encoder castorini/slimr-pp-msmarco-passage --encoded-corpus scipy-sparse-vectors.msmarco-v1-passage-slimr-pp --output $output --output-format msmarco --hits 1000 --impact --min-idf 3
topics:
@@ -1115,8 +1207,8 @@ conditions:
nDCG@10: 0.7021
R@1K: 0.8551
- name: aggretriever-distilbert-pytorch
- display: "Aggretriever-DistilBERT: PyTorch"
- display-html: "Aggretriever-DistilBERT: PyTorch"
+ display: "Aggretriever-DistilBERT: Faiss flat, PyTorch"
+ display-html: "Aggretriever-DistilBERT: Faiss flat, PyTorch"
display-row: "[8]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.aggretriever-distilbert --topics $topics --encoder castorini/aggretriever-distilbert --output $output
topics:
@@ -1138,8 +1230,8 @@ conditions:
nDCG@10: 0.6726
R@1K: 0.8351
- name: aggretriever-cocondenser-pytorch
- display: "Aggretriever-coCondenser: PyTorch"
- display-html: "Aggretriever-coCondenser: PyTorch"
+ display: "Aggretriever-coCondenser: Faiss flat, PyTorch"
+ display-html: "Aggretriever-coCondenser: Faiss flat, PyTorch"
display-row: "[8]"
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.aggretriever-cocondenser --topics $topics --encoder castorini/aggretriever-cocondenser --output $output
topics:
@@ -1161,8 +1253,8 @@ conditions:
nDCG@10: 0.6972
R@1K: 0.8555
- name: openai-ada2
- display: "OpenAI ada2: cached queries"
- display-html: "OpenAI ada2: cached queries"
+ display: "OpenAI ada2: Faiss flat, cached queries"
+ display-html: "OpenAI ada2: Faiss flat, cached queries"
display-row: "[11]"
command: python -m pyserini.search.faiss --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.openai-ada2 --topics $topics --encoded-queries openai-ada2-$topics --output $output
topics:
@@ -1184,8 +1276,8 @@ conditions:
nDCG@10: 0.6759
R@1K: 0.8705
- name: openai-ada2-hyde
- display: "HyDE-OpenAI ada2: cached queries"
- display-html: "HyDE-OpenAI ada2: cached queries"
+ display: "HyDE-OpenAI ada2: Faiss flat, cached queries"
+ display-html: "HyDE-OpenAI ada2: Faiss flat, cached queries"
display-row: "[12]"
command: python -m pyserini.search.faiss --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v1-passage.openai-ada2 --topics $topics --encoded-queries openai-ada2-$topics-hyde --output $output
topics:
@@ -1202,8 +1294,8 @@ conditions:
nDCG@10: 0.6666
R@1K: 0.8919
- name: openai-text-embedding-3-large
- display: "OpenAI text-embedding-3-large: cached queries"
- display-html: "OpenAI text-embedding-3-large: cached queries"
+ display: "OpenAI text-embedding-3-large: Faiss flat, cached queries"
+ display-html: "OpenAI text-embedding-3-large: Faiss flat, cached queries"
display-row: ""
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.openai-text-embedding-3-large --topics $topics --encoded-queries openai-text-embedding-3-large-$topics --output $output
topics:
@@ -1225,8 +1317,8 @@ conditions:
nDCG@10: 0.7163
R@1K: 0.8884
- name: cohere-embed-english-v3.0
- display: "Cohere Embed English v3.0: cached queries"
- display-html: "Cohere Embed English v3.0: cached queries"
+ display: "Cohere Embed English v3.0: Faiss flat, cached queries"
+ display-html: "Cohere Embed English v3.0: Faiss flat, cached queries"
display-row: ""
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --index msmarco-v1-passage.cohere-embed-english-v3.0 --topics $topics --encoded-queries cohere-embed-english-v3.0-$topics --output $output
topics:
diff --git a/pyserini/2cr/msmarco-v2-doc.yaml b/pyserini/2cr/msmarco-v2-doc.yaml
index be6a140d1..d4c029580 100644
--- a/pyserini/2cr/msmarco-v2-doc.yaml
+++ b/pyserini/2cr/msmarco-v2-doc.yaml
@@ -272,8 +272,8 @@ conditions:
nDCG@10: 0.3454
R@1K: 0.6006
- name: unicoil-noexp
- display: "uniCOIL (noexp): pre-encoded"
- display-html: "uniCOIL (noexp): pre-encoded queries"
+ display: "uniCOIL (noexp): cached queries"
+ display-html: "uniCOIL (noexp): cached queries"
display-row: (3a)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.unicoil-noexp-0shot --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
@@ -306,8 +306,8 @@ conditions:
nDCG@10: 0.3898
R@1K: 0.5462
- name: unicoil
- display: "uniCOIL (w/ doc2query-T5): pre-encoded"
- display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
+ display: "uniCOIL (w/ doc2query-T5): cached queries"
+ display-html: "uniCOIL (w/ doc2query-T5): cached queries"
display-row: (3b)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.unicoil-0shot --topics $topics --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
@@ -340,8 +340,8 @@ conditions:
nDCG@10: 0.4149
R@1K: 0.5753
- name: unicoil-noexp-otf
- display: "uniCOIL (noexp): query inference with PyTorch"
- display-html: "uniCOIL (noexp): query inference with PyTorch"
+ display: "uniCOIL (noexp): PyTorch"
+ display-html: "uniCOIL (noexp): PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.unicoil-noexp-0shot --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-v2-doc-dev
@@ -373,8 +373,8 @@ conditions:
nDCG@10: 0.3898
R@1K: 0.5462
- name: unicoil-otf
- display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
- display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+ display: "uniCOIL (w/ doc2query-T5): PyTorch"
+ display-html: "uniCOIL (w/ doc2query-T5): PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-doc-segmented.unicoil-0shot --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --impact --hits 10000 --max-passage-hits 1000 --max-passage
topics:
- topic_key: msmarco-v2-doc-dev
diff --git a/pyserini/2cr/msmarco-v2-passage.yaml b/pyserini/2cr/msmarco-v2-passage.yaml
index 38aea61a9..101eda74d 100644
--- a/pyserini/2cr/msmarco-v2-passage.yaml
+++ b/pyserini/2cr/msmarco-v2-passage.yaml
@@ -272,8 +272,8 @@ conditions:
nDCG@10: 0.2719
R@1K: 0.5623
- name: unicoil-noexp
- display: "uniCOIL (noexp): pre-encoded"
- display-html: "uniCOIL (noexp): pre-encoded queries"
+ display: "uniCOIL (noexp): cached queries"
+ display-html: "uniCOIL (noexp): cached queries"
display-row: (3a)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-passage.unicoil-noexp-0shot --topics $topics --output $output --hits 1000 --impact
topics:
@@ -306,8 +306,8 @@ conditions:
nDCG@10: 0.3262
R@1K: 0.5070
- name: unicoil
- display: "uniCOIL (w/ doc2query-T5): pre-encoded"
- display-html: "uniCOIL (w/ doc2query-T5): pre-encoded queries"
+ display: "uniCOIL (w/ doc2query-T5): cached queries"
+ display-html: "uniCOIL (w/ doc2query-T5): cached queries"
display-row: (3b)
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-passage.unicoil-0shot --topics $topics --output $output --hits 1000 --impact
topics:
@@ -340,8 +340,8 @@ conditions:
nDCG@10: 0.3855
R@1K: 0.5541
- name: unicoil-noexp-otf
- display: "uniCOIL (noexp): query inference with PyTorch"
- display-html: "uniCOIL (noexp): query inference with PyTorch"
+ display: "uniCOIL (noexp): PyTorch"
+ display-html: "uniCOIL (noexp): PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-passage.unicoil-noexp-0shot --topics $topics --encoder castorini/unicoil-noexp-msmarco-passage --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-v2-passage-dev
@@ -373,8 +373,8 @@ conditions:
nDCG@10: 0.3262
R@1K: 0.5070
- name: unicoil-otf
- display: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
- display-html: "uniCOIL (w/ doc2query-T5): query inference with PyTorch"
+ display: "uniCOIL (w/ doc2query-T5): PyTorch"
+ display-html: "uniCOIL (w/ doc2query-T5): PyTorch"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-passage.unicoil-0shot --topics $topics --encoder castorini/unicoil-msmarco-passage --output $output --hits 1000 --impact
topics:
- topic_key: msmarco-v2-passage-dev
@@ -407,7 +407,7 @@ conditions:
R@1K: 0.5541
- name: slimr-pp
display: "SLIM++ (norefine, tau=0.5, min_idf=1)"
- display-html: "SLIM++ (norefine, tau=0.5, min_idf=1)"
+ display-html: "SLIM++ (norefine, tau=0.5, min_idf=1)"
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index msmarco-v2-passage.slimr-pp --topics $topics --encoder castorini/slimr-pp-msmarco-passage --output $output --hits 1000 --impact --min-idf 1
topics:
- topic_key: msmarco-v2-passage-dev
diff --git a/pyserini/2cr/msmarco.py b/pyserini/2cr/msmarco.py
index fcde99c63..2bb464f85 100644
--- a/pyserini/2cr/msmarco.py
+++ b/pyserini/2cr/msmarco.py
@@ -105,9 +105,13 @@
'openai-ada2-hyde',
'openai-text-embedding-3-large',
'',
- 'cosdpr-distil-pytorch',
+ 'cosdpr-distil.faiss-flat.pytorch',
+ 'cosdpr-distil.lucene-hnsw.onnx',
+ 'cosdpr-distil.lucene-hnsw-int8.onnx',
'',
- 'bge-base-en-v1.5-pytorch',
+ 'bge-base-en-v1.5.faiss-flat.pytorch',
+ 'bge-base-en-v1.5.lucene-hnsw.onnx',
+ 'bge-base-en-v1.5.lucene-hnsw-int8.onnx',
'',
'cohere-embed-english-v3.0',
],
@@ -459,9 +463,9 @@ def generate_report(args):
all_rows = '\n'.join(html_rows)
if args.collection == 'msmarco-v1-passage':
- full_name = 'MS MARCO V1 Passage'
+ full_name = 'MS MARCO V1 Passage Regressions'
else:
- full_name = 'MS MARCO V1 Document'
+ full_name = 'MS MARCO V1 Document Regressions'
with open(args.output, 'w') as out:
out.write(Template(html_template).substitute(title=full_name, rows=all_rows))
@@ -511,9 +515,9 @@ def generate_report(args):
all_rows = '\n'.join(html_rows)
if args.collection == 'msmarco-v2-passage':
- full_name = 'MS MARCO V2 Passage'
+ full_name = 'MS MARCO V2 Passage Regressions'
else:
- full_name = 'MS MARCO V2 Document'
+ full_name = 'MS MARCO V2 Document Regressions'
with open(args.output, 'w') as out:
out.write(Template(html_template).substitute(title=full_name, rows=all_rows))
diff --git a/pyserini/2cr/msmarco_html_v1_doc.template b/pyserini/2cr/msmarco_html_v1_doc.template
index 50a413068..ca92770b6 100644
--- a/pyserini/2cr/msmarco_html_v1_doc.template
+++ b/pyserini/2cr/msmarco_html_v1_doc.template
@@ -131,7 +131,7 @@ pre[class*="prettyprint"] {
">
[1] Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin. diff --git a/pyserini/2cr/msmarco_html_v1_passage.template b/pyserini/2cr/msmarco_html_v1_passage.template index e5414fb0c..5c0fb9053 100644 --- a/pyserini/2cr/msmarco_html_v1_passage.template +++ b/pyserini/2cr/msmarco_html_v1_passage.template @@ -131,7 +131,7 @@ pre[class*="prettyprint"] { ">
[1] Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin. diff --git a/pyserini/2cr/msmarco_html_v2_doc.template b/pyserini/2cr/msmarco_html_v2_doc.template index 7d44926a5..62b831ab2 100644 --- a/pyserini/2cr/msmarco_html_v2_doc.template +++ b/pyserini/2cr/msmarco_html_v2_doc.template @@ -131,7 +131,7 @@ pre[class*="prettyprint"] { ">