Skip to content

Commit

Permalink
Switch from using jtrec_eval to trec_eval (#1986)
Browse files Browse the repository at this point in the history
+ trec_eval is already bundled with Anserini.
+ clean up trec_eval output to not be so noisy.
  • Loading branch information
lintool authored Sep 17, 2024
1 parent f5a2e94 commit 83537a3
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 161 deletions.
132 changes: 33 additions & 99 deletions integrations/clprf/test_clprf.py

Large diffs are not rendered by default.

48 changes: 12 additions & 36 deletions integrations/sparse/test_lucenesearcher_check_irst.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ def test_sum_aggregation_dl19_passage(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3281)
self.assertEqual(ndcg_score, 0.5260)

Expand All @@ -78,9 +76,7 @@ def test_sum_aggregation_dl20_passage(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3520)
self.assertEqual(ndcg_score, 0.5578)

Expand All @@ -103,9 +99,7 @@ def test_max_aggregation_dl19(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3286)
self.assertEqual(ndcg_score, 0.5371)

Expand All @@ -128,9 +122,7 @@ def test_max_aggregation_dl20_passage(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3357)
self.assertEqual(ndcg_score, 0.5469)

Expand Down Expand Up @@ -170,9 +162,7 @@ def test_sum_aggregation_dl19_doc(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2524)
self.assertEqual(ndcg_score, 0.5494)

Expand All @@ -194,9 +184,7 @@ def test_sum_aggregation_dl20_doc(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3825)
self.assertEqual(ndcg_score, 0.5559)

Expand All @@ -219,9 +207,7 @@ def test_max_aggregation_dl19_doc(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2205)
self.assertEqual(ndcg_score, 0.4917)

Expand All @@ -244,9 +230,7 @@ def test_max_aggregation_dl20_doc(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3373)
self.assertEqual(ndcg_score, 0.5015)

Expand Down Expand Up @@ -287,9 +271,7 @@ def test_sum_aggregation_dl19_doc_seg(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2711)
self.assertEqual(ndcg_score, 0.5596)

Expand All @@ -312,9 +294,7 @@ def test_sum_aggregation_dl20_doc_seg(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3759)
self.assertEqual(ndcg_score, 0.5343)

Expand All @@ -338,9 +318,7 @@ def test_max_aggregation_dl19_doc_seg(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.2425)
self.assertEqual(ndcg_score, 0.5193)

Expand All @@ -364,9 +342,7 @@ def test_max_aggregation_dl20_doc_seg(self):
ndcg_score = parse_score(stdout, "ndcg")

self.assertEqual(status, 0)
# Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
# so turn off check until this issue is resolved in a later JDK version.
# self.assertEqual(stderr, '')
self.assertEqual(stderr, '')
self.assertEqual(map_score, 0.3496)
self.assertEqual(ndcg_score, 0.5089)

Expand Down
3 changes: 0 additions & 3 deletions integrations/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ def run_command(cmd, echo=False):
def parse_score(output, metric, digits=4):
"""Function for parsing the output from `pyserini.eval.trec_eval`."""
lines = output.split('\n')
# The output begins with a bunch of debug information, get rid of lines until we get to 'Results'
while 'Results' not in lines[0]:
lines.pop(0)

for line in lines:
if metric in line:
Expand Down
51 changes: 34 additions & 17 deletions pyserini/eval/trec_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,44 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Example usage
# python -m pyserini.eval.trec_eval -m ndcg_cut.10,20 -m all_trec qrels.dev.small.tsv runs/run.Colbert.txt -remove-unjudged -cutoffs.20,50

# Example usage:
# python -m pyserini.eval.trec_eval -c \
# -m ndcg_cut.10 \
# -m judged.5,10 beir-v1.0.0-arguana-test run.beir.contriever-msmarco.arguana.txt -remove-unjudged

# From Jimmy, Sept 2024 -
#
# This file has a load sequence that is very different from all the other files.
# The JVM by default in Pyserini is loaded with the option '--add-modules=jdk.incubator.vector', which triggers the
# following warning: 'WARNING: Using incubator modules: jdk.incubator.vector'
#
# I have looked extensively online and was not able to find a way to suppress that warning.
# The solution here is to start the JVM without the vector module, which isn't needed here.
# This explains the code sequence below.

import glob
import importlib.resources
import jnius_config
import os
import re
import subprocess
import sys
import platform
import pandas as pd
import platform
import tempfile
import subprocess
import sys

from pyserini.search import get_qrels_file
from pyserini.util import download_evaluation_script
# Don't use the jdk.incubator.vector module.
jar_directory = str(importlib.resources.files("pyserini.resources.jars").joinpath(''))
jar_path = glob.glob(os.path.join(jar_directory, '*.jar'))[0]
jnius_config.add_classpath(jar_path)

script_path = download_evaluation_script('trec_eval')
# This triggers loading of the JVM.
from jnius import autoclass

if platform.platform().startswith('macOS'):
# Hack around the fact that jtrec_eval hasn't been compiled for Mac M processors.
# Explicitly set os to x86, and then force the use of Rosetta.
cmd_prefix = ['java', '-Dos.arch=x86_64', '-jar', script_path]
else:
cmd_prefix = ['java', '-jar', script_path]
# Now we can load qrels
from pyserini.search import get_qrels_file

cmd_prefix = ['java', '-cp', jar_path, 'trec_eval']

args = sys.argv

Expand Down Expand Up @@ -99,7 +114,10 @@
else:
cmd = cmd_prefix

print(f'Running command: {cmd}')
# We're going to shell out to call trec_eval.
# Obvious question here: why we *not* just call the trec_eval main (Java) class, which already wraps the executable?
# in Java (which wraps the binaries). The answer is that the Java class explicitly calls System.exit, so we wouldn't
# be able to do cleanup here in Python.
shell = platform.system() == "Windows"
process = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
Expand All @@ -109,7 +127,6 @@
if stderr:
print(stderr.decode("utf-8"))

print('Results:')
print(stdout.decode("utf-8").rstrip())

for judged in judged_result:
Expand Down
10 changes: 7 additions & 3 deletions pyserini/pyclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,13 @@

from .setup import configure_classpath, os

# If the environment variable isn't defined, look in the current directory.
configure_classpath(os.environ['ANSERINI_CLASSPATH'] if 'ANSERINI_CLASSPATH' in os.environ else
os.path.join(os.path.split(__file__)[0], 'resources/jars/'))
try:
# If the environment variable isn't defined, look in the current directory.
configure_classpath(os.environ['ANSERINI_CLASSPATH'] if 'ANSERINI_CLASSPATH' in os.environ else
os.path.join(os.path.split(__file__)[0], 'resources/jars/'))
except:
# This might happen if the JVM's already been initialized. Just eat the error.
pass

from jnius import autoclass, cast

Expand Down
1 change: 0 additions & 1 deletion tests/resources/simple_trec_run_unjudged_keep.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
Results:
ndcg_cut_5 all 0.0848
ndcg_cut_10 all 0.0550
judged_5 all 0.5000
Expand Down
1 change: 0 additions & 1 deletion tests/resources/simple_trec_run_unjudged_remove.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
Results:
ndcg_cut_5 all 0.1131
ndcg_cut_10 all 0.0734
judged_5 all 1.0000
Expand Down
2 changes: 1 addition & 1 deletion tests/test_trectools.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def test_normalize_scores(self):
self.output_path))

# This and the next test case go together - to keep and to remove unjudged docs.
def test_undjudged_keep(self):
def test_unjudged_keep(self):
qrels_path = os.path.join(self.root, 'tools/topics-and-qrels/qrels.covid-round1.txt')
run_path = os.path.join(self.root, 'tests/resources/simple_trec_run_filter.txt')
results = subprocess.check_output(
Expand Down

0 comments on commit 83537a3

Please sign in to comment.