Skip to content

Commit

Permalink
Refactor regressions (#1671)
Browse files Browse the repository at this point in the history
+ "threads" and "batch-size" parameters:
  + consistent formatting
  + settable externally in code (rather than hard-coded in yaml)
+ better logging information for running times
+ run_jobs_with_load.py won't spawn too many concurrent jobs, configurable parameter.
  • Loading branch information
lintool authored Oct 13, 2023
1 parent ce98af4 commit f1d623c
Show file tree
Hide file tree
Showing 22 changed files with 1,700 additions and 1,214 deletions.
464 changes: 261 additions & 203 deletions docs/2cr/beir.html

Large diffs are not rendered by default.

672 changes: 354 additions & 318 deletions docs/2cr/miracl.html

Large diffs are not rendered by default.

330 changes: 220 additions & 110 deletions docs/2cr/mrtydi.html

Large diffs are not rendered by default.

576 changes: 384 additions & 192 deletions docs/2cr/msmarco-v1-doc.html

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions docs/2cr/msmarco-v1-passage.html
Original file line number Diff line number Diff line change
Expand Up @@ -2447,7 +2447,7 @@ <h1 class="mb-3">MS MARCO V1 Passage</h1>
<td>0.7280</td>
<td>0.9069</td>
<td></td>
<td>0.3300</td>
<td>0.3301</td>
<td>0.9811</td>
</tr>
<tr class="hide-table-padding">
Expand Down Expand Up @@ -5324,7 +5324,7 @@ <h1 class="mb-3">MS MARCO V1 Passage</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 --batch 128 \
--threads 16 --batch-size 128 \
--index msmarco-v1-passage-slimr \
--topics dl19-passage \
--encoder castorini/slimr-msmarco-passage \
Expand All @@ -5350,7 +5350,7 @@ <h1 class="mb-3">MS MARCO V1 Passage</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 --batch 128 \
--threads 16 --batch-size 128 \
--index msmarco-v1-passage-slimr \
--topics dl20 \
--encoder castorini/slimr-msmarco-passage \
Expand All @@ -5376,7 +5376,7 @@ <h1 class="mb-3">MS MARCO V1 Passage</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 --batch 128 \
--threads 16 --batch-size 128 \
--index msmarco-v1-passage-slimr \
--topics msmarco-passage-dev-subset \
--encoder castorini/slimr-msmarco-passage \
Expand Down Expand Up @@ -5442,7 +5442,7 @@ <h1 class="mb-3">MS MARCO V1 Passage</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 --batch 128 \
--threads 16 --batch-size 128 \
--index msmarco-v1-passage-slimr-pp \
--topics dl19-passage \
--encoder castorini/slimr-pp-msmarco-passage \
Expand All @@ -5468,7 +5468,7 @@ <h1 class="mb-3">MS MARCO V1 Passage</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 --batch 128 \
--threads 16 --batch-size 128 \
--index msmarco-v1-passage-slimr-pp \
--topics dl20 \
--encoder castorini/slimr-pp-msmarco-passage \
Expand All @@ -5494,7 +5494,7 @@ <h1 class="mb-3">MS MARCO V1 Passage</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 --batch 128 \
--threads 16 --batch-size 128 \
--index msmarco-v1-passage-slimr-pp \
--topics msmarco-passage-dev-subset \
--encoder castorini/slimr-pp-msmarco-passage \
Expand Down
14 changes: 7 additions & 7 deletions docs/2cr/msmarco-v2-passage.html
Original file line number Diff line number Diff line change
Expand Up @@ -1464,17 +1464,17 @@ <h1 class="mb-3">MS MARCO V2 Passage</h1>
<td class="expand-button"></td>
<td></td>
<td style="min-width: 400px">SLIM++ (norefine, tau=0.5, min_idf=1)</td>
<td>0.2819</td>
<td>0.6340</td>
<td>0.2820</td>
<td>0.6337</td>
<td>0.7554</td>
<td>0.5092</td>
<td>0.8392</td>
<td>0.5093</td>
<td>0.8389</td>
<td></td>
<td>0.1915</td>
<td>0.8707</td>
<td>0.8710</td>
<td></td>
<td>0.1904</td>
<td>0.8683</td>
<td>0.1901</td>
<td>0.8681</td>
</tr>
<tr class="hide-table-padding">
<td></td>
Expand Down
48 changes: 16 additions & 32 deletions docs/2cr/odqa.html
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics dpr-trivia-test \
--output run.odqa.BM25-k1_0.9_b_0.4.dpr-trivia-test.hits-100.txt \
Expand Down Expand Up @@ -221,8 +220,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics nq-test \
--output run.odqa.BM25-k1_0.9_b_0.4.nq-test.hits-100.txt \
Expand Down Expand Up @@ -288,8 +286,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics dpr-trivia-test \
--output run.odqa.BM25-k1_0.9_b_0.4_dpr-topics.dpr-trivia-test.hits-100.txt \
Expand Down Expand Up @@ -321,8 +318,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics dpr-nq-test \
--output run.odqa.BM25-k1_0.9_b_0.4_dpr-topics.dpr-nq-test.hits-100.txt \
Expand Down Expand Up @@ -388,8 +384,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics dpr-trivia-test-gar-t5-answers \
--output run.odqa.GarT5-RRF.dpr-trivia-test.answers.hits-1000.txt \
Expand All @@ -399,8 +394,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics dpr-trivia-test-gar-t5-titles \
--output run.odqa.GarT5-RRF.dpr-trivia-test.titles.hits-1000.txt \
Expand All @@ -410,8 +404,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics dpr-trivia-test-gar-t5-sentences \
--output run.odqa.GarT5-RRF.dpr-trivia-test.sentences.hits-1000.txt \
Expand Down Expand Up @@ -453,8 +446,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics nq-test-gar-t5-answers \
--output run.odqa.GarT5-RRF.nq-test.answers.hits-1000.txt \
Expand All @@ -464,8 +456,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics nq-test-gar-t5-titles \
--output run.odqa.GarT5-RRF.nq-test.titles.hits-1000.txt \
Expand All @@ -475,8 +466,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.lucene \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 128 \
--index wikipedia-dpr-100w \
--topics nq-test-gar-t5-sentences \
--output run.odqa.GarT5-RRF.nq-test.sentences.hits-1000.txt \
Expand Down Expand Up @@ -552,8 +542,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.faiss \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 512 \
--index wikipedia-dpr-100w.dpr-multi \
--encoder facebook/dpr-question_encoder-multiset-base \
--topics dpr-trivia-test \
Expand Down Expand Up @@ -585,8 +574,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.faiss \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 512 \
--index wikipedia-dpr-100w.dpr-single-nq \
--encoder facebook/dpr-question_encoder-single-nq-base \
--topics nq-test \
Expand Down Expand Up @@ -652,8 +640,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.faiss \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 512 \
--index wikipedia-dpr-100w.dkrr-tqa \
--encoder castorini/dkrr-dpr-tqa-retriever \
--topics dpr-trivia-test \
Expand Down Expand Up @@ -685,8 +672,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>

<blockquote class="mycode">
<pre><code>python -m pyserini.search.faiss \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 512 \
--index wikipedia-dpr-100w.dkrr-nq \
--encoder castorini/dkrr-dpr-nq-retriever \
--topics nq-test \
Expand Down Expand Up @@ -758,8 +744,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>
fusion --alpha 0.95 \
run --topics dpr-trivia-test \
--output run.odqa.DPR-Hybrid.dpr-trivia-test.hits-100.txt \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 512 \
--hits 1000
</code></pre></blockquote>

Expand Down Expand Up @@ -793,8 +778,7 @@ <h1 class="mb-3">Retrieval for Open-Domain QA Datasets</h1>
fusion --alpha 1.2 \
run --topics nq-test \
--output run.odqa.DPR-Hybrid.nq-test.hits-100.txt \
--threads 16 \
--batch-size 512 \
--threads 16 --batch-size 512 \
--hits 1000
</code></pre></blockquote>

Expand Down
29 changes: 23 additions & 6 deletions pyserini/2cr/beir.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,22 @@
import argparse
import math
import os
import time
import sys
import time
from collections import defaultdict
from datetime import datetime
from string import Template
import pkg_resources

import pkg_resources
import yaml

from ._base import run_eval_and_return_metric, ok_str, fail_str

dense_threads = 16
dense_batch_size = 512
sparse_threads = 16
sparse_batch_size = 128

trec_eval_metric_definitions = {
'nDCG@10': '-c -m ndcg_cut.10',
'R@100': '-c -m recall.100',
Expand Down Expand Up @@ -67,10 +73,11 @@

def format_run_command(raw):
return raw.replace('--topics', '\\\n --topics') \
.replace('--threads', '\\\n --threads') \
.replace('--index', '\\\n --index') \
.replace('--encoder-class', '\\\n --encoder-class') \
.replace('--encoder-class', '\\\n --encoder-class') \
.replace('--output ', '\\\n --output ') \
.replace('--output-format trec', '\\\n --output-format trec \\\n ') \
.replace('--output-format trec ', '\\\n --output-format trec ') \
.replace('--hits ', '\\\n --hits ')


Expand Down Expand Up @@ -117,7 +124,9 @@ def generate_report(args):
dataset = datasets['dataset']

runfile = os.path.join(args.directory, f'run.beir.{name}.{dataset}.txt')
cmd = Template(cmd_template).substitute(dataset=dataset, output=runfile)
cmd = Template(cmd_template).substitute(dataset=dataset, output=runfile,
sparse_threads=sparse_threads, sparse_batch_size=sparse_batch_size,
dense_threads=dense_threads, dense_batch_size=dense_batch_size)
commands[dataset][name] = format_run_command(cmd)

for expected in datasets['scores']:
Expand Down Expand Up @@ -192,7 +201,9 @@ def run_conditions(args):
print(f' - dataset: {dataset}')

runfile = os.path.join(args.directory, f'run.beir.{name}.{dataset}.txt')
cmd = Template(cmd_template).substitute(dataset=dataset, output=runfile)
cmd = Template(cmd_template).substitute(dataset=dataset, output=runfile,
sparse_threads=sparse_threads, sparse_batch_size=sparse_batch_size,
dense_threads=dense_threads, dense_batch_size=dense_batch_size)

if args.display_commands:
print(f'\n```bash\n{format_run_command(cmd)}\n```\n')
Expand All @@ -216,6 +227,7 @@ def run_conditions(args):
table[dataset][name][metric] = score
else:
table[dataset][name][metric] = expected[metric]
print('')

print('')

Expand Down Expand Up @@ -264,7 +276,12 @@ def run_conditions(args):

end = time.time()

start_str = datetime.utcfromtimestamp(start).strftime('%Y-%m-%d %H:%M:%S')
end_str = datetime.utcfromtimestamp(end).strftime('%Y-%m-%d %H:%M:%S')

print('\n')
print(f'Start time: {start_str}')
print(f'End time: {end_str}')
print(f'Total elapsed time: {end - start:.0f}s ~{(end - start)/3600:.1f}hr')


Expand Down
10 changes: 5 additions & 5 deletions pyserini/2cr/beir.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
conditions:
- name: bm25-flat
command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.flat --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index beir-v1.0.0-${dataset}.flat --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --hits 1000 --bm25 --remove-query
datasets:
- dataset: trec-covid
scores:
Expand Down Expand Up @@ -148,7 +148,7 @@ conditions:
R@100: 0.9253
R@1000: 0.9767
- name: bm25-multifield
command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.multifield --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query --fields contents=1.0 title=1.0
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index beir-v1.0.0-${dataset}.multifield --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --hits 1000 --bm25 --remove-query --fields contents=1.0 title=1.0
datasets:
- dataset: trec-covid
scores:
Expand Down Expand Up @@ -296,7 +296,7 @@ conditions:
R@100: 0.9076
R@1000: 0.9800
- name: splade-distil-cocodenser-medium
command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}-splade_distil_cocodenser_medium --topics beir-v1.0.0-${dataset}-test-splade_distil_cocodenser_medium --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --impact --remove-query
command: python -m pyserini.search.lucene --threads ${sparse_threads} --batch-size ${sparse_batch_size} --index beir-v1.0.0-${dataset}-splade_distil_cocodenser_medium --topics beir-v1.0.0-${dataset}-test-splade_distil_cocodenser_medium --output $output --output-format trec --hits 1000 --impact --remove-query
datasets:
- dataset: trec-covid
scores:
Expand Down Expand Up @@ -444,7 +444,7 @@ conditions:
R@100: 0.9270
R@1000: 0.9767
- name: contriever
command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever --index beir-v1.0.0-${dataset}.contriever --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --encoder-class contriever --encoder facebook/contriever --index beir-v1.0.0-${dataset}.contriever --topics beir-v1.0.0-${dataset}-test --output $output --hits 1000 --remove-query
datasets:
- dataset: trec-covid
scores:
Expand Down Expand Up @@ -592,7 +592,7 @@ conditions:
R@100: 0.9260
R@1000: 0.9967
- name: contriever-msmarco
command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever-msmarco --index beir-v1.0.0-${dataset}.contriever-msmarco --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query
command: python -m pyserini.search.faiss --threads ${dense_threads} --batch-size ${dense_batch_size} --encoder-class contriever --encoder facebook/contriever-msmarco --index beir-v1.0.0-${dataset}.contriever-msmarco --topics beir-v1.0.0-${dataset}-test --output $output --hits 1000 --remove-query
datasets:
- dataset: trec-covid
scores:
Expand Down
Loading

0 comments on commit f1d623c

Please sign in to comment.