Skip to content

Commit

Permalink
Fix paper integration tests due to path changes in anserini and othe…
Browse files Browse the repository at this point in the history
…r minor issues (#1602)
  • Loading branch information
lintool authored Aug 27, 2023
1 parent a4b0661 commit 0b3ec90
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 6 deletions.
2 changes: 1 addition & 1 deletion integrations/papers/test_ecir2023.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import os
import unittest

from integrations.utils import clean_files, run_command, parse_score, parse_score_qa
from integrations.utils import clean_files, run_command, parse_score_qa


class TestECIR2023(unittest.TestCase):
Expand Down
3 changes: 1 addition & 2 deletions integrations/papers/test_sigir2021.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,7 @@ def test_section3_3(self):
msmarco-passage-dev-subset {output_file}'
stdout, stderr = run_command(eval_cmd)
score = parse_score_msmarco(stdout, "MRR @10")
self.assertAlmostEqual(score, 0.1872, delta=0.0001)
# Temporary fix: this is Lucene 9 code running on Lucene 8 prebuilt index.
self.assertAlmostEqual(score, 0.1874, delta=0.0001)

def tearDown(self):
clean_files(self.temp_files)
Expand Down
5 changes: 2 additions & 3 deletions integrations/papers/test_sigir2022.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from integrations.utils import clean_files, run_command, parse_score, parse_score_msmarco


class TestSIGIR2021(unittest.TestCase):
class TestSIGIR2022(unittest.TestCase):
def setUp(self):
self.temp_files = []

Expand Down Expand Up @@ -66,8 +66,7 @@ def test_Ma_etal_section4_1b(self):
eval_cmd = f'python -m pyserini.eval.trec_eval -c -M 100 -m map -m recip_rank msmarco-v2-passage-dev {output_file}'
stdout, stderr = run_command(eval_cmd)
score = parse_score(stdout, "recip_rank")
self.assertAlmostEqual(score, 0.1501, delta=0.0001)
# This is the score with otf; with pre-encoded, the score is 0.1499.
self.assertAlmostEqual(score, 0.1499, delta=0.0001)

def test_Trotman_etal(self):
"""Sample code in Trotman et al. demo paper."""
Expand Down
7 changes: 7 additions & 0 deletions pyserini/search/faiss/_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,13 @@ def from_prebuilt_index(cls, prebuilt_index_name: str, query_encoder: QueryEncod
Searcher built from the prebuilt faiss index.
"""
print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
# see integrations/papers/test_sigir2021.py - preserve working commands published in papers
if prebuilt_index_name == 'msmarco-passage-tct_colbert-hnsw':
prebuilt_index_name = 'msmarco-v1-passage.tct_colbert.hnsw'
# see integrations/papers/test_ecir2023.py - preserve working commands published in papers
elif prebuilt_index_name == 'wikipedia-dpr-dkrr-nq':
prebuilt_index_name = 'wikipedia-dpr-100w.dkrr-nq'

try:
index_dir = download_prebuilt_index(prebuilt_index_name)
except ValueError as e:
Expand Down
4 changes: 4 additions & 0 deletions pyserini/search/lucene/_impact_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ def from_prebuilt_index(cls, prebuilt_index_name: str, query_encoder: Union[Quer
Searcher built from the prebuilt index.
"""
print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')
# see integrations/papers/test_sigir2021.py - preserve working commands published in papers
if prebuilt_index_name == 'msmarco-passage-unicoil-d2q':
prebuilt_index_name = 'msmarco-v1-passage-unicoil'

try:
index_dir = download_prebuilt_index(prebuilt_index_name)
except ValueError as e:
Expand Down
7 changes: 7 additions & 0 deletions pyserini/search/lucene/_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ def from_prebuilt_index(cls, prebuilt_index_name: str, verbose=False):
LuceneSearcher
Searcher built from the prebuilt index.
"""
# see integrations/papers/test_sigir2021.py - preserve working commands published in papers
if prebuilt_index_name == 'msmarco-passage':
prebuilt_index_name = 'msmarco-v1-passage'
# see integrations/papers/test_ecir2023.py - preserve working commands published in papers
elif prebuilt_index_name == 'wikipedia-dpr':
prebuilt_index_name = 'wikipedia-dpr-100w'

if verbose:
print(f'Attempting to initialize pre-built index {prebuilt_index_name}.')

Expand Down

0 comments on commit 0b3ec90

Please sign in to comment.