Switch from using jtrec_eval to trec_eval (#1986)

+ trec_eval is already bundled with Anserini. + clean up trec_eval output to not be so noisy.
castorini · Sep 17, 2024 · 83537a3 · 83537a3
1 parent f5a2e94
commit 83537a3
Show file tree

Hide file tree

Showing 8 changed files with 87 additions and 161 deletions.
diff --git a/integrations/clprf/test_clprf.py b/integrations/clprf/test_clprf.py
diff --git a/integrations/sparse/test_lucenesearcher_check_irst.py b/integrations/sparse/test_lucenesearcher_check_irst.py
@@ -54,9 +54,7 @@ def test_sum_aggregation_dl19_passage(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.3281)
         self.assertEqual(ndcg_score, 0.5260)
 
@@ -78,9 +76,7 @@ def test_sum_aggregation_dl20_passage(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.3520)
         self.assertEqual(ndcg_score, 0.5578)
 
@@ -103,9 +99,7 @@ def test_max_aggregation_dl19(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.3286)
         self.assertEqual(ndcg_score, 0.5371)
 
@@ -128,9 +122,7 @@ def test_max_aggregation_dl20_passage(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.3357)
         self.assertEqual(ndcg_score, 0.5469)
 
@@ -170,9 +162,7 @@ def test_sum_aggregation_dl19_doc(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.2524)
         self.assertEqual(ndcg_score, 0.5494)
 
@@ -194,9 +184,7 @@ def test_sum_aggregation_dl20_doc(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.3825)
         self.assertEqual(ndcg_score, 0.5559)
 
@@ -219,9 +207,7 @@ def test_max_aggregation_dl19_doc(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.2205)
         self.assertEqual(ndcg_score, 0.4917)
 
@@ -244,9 +230,7 @@ def test_max_aggregation_dl20_doc(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.3373)
         self.assertEqual(ndcg_score, 0.5015)
 
@@ -287,9 +271,7 @@ def test_sum_aggregation_dl19_doc_seg(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.2711)
         self.assertEqual(ndcg_score, 0.5596)
 
@@ -312,9 +294,7 @@ def test_sum_aggregation_dl20_doc_seg(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.3759)
         self.assertEqual(ndcg_score, 0.5343)
 
@@ -338,9 +318,7 @@ def test_max_aggregation_dl19_doc_seg(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.2425)
         self.assertEqual(ndcg_score, 0.5193)
 
@@ -364,9 +342,7 @@ def test_max_aggregation_dl20_doc_seg(self):
         ndcg_score = parse_score(stdout, "ndcg")
 
         self.assertEqual(status, 0)
-        # Currently, we get 'WARNING: Using incubator modules: jdk.incubator.vector\n' from stderr,
-        # so turn off check until this issue is resolved in a later JDK version.
-        # self.assertEqual(stderr, '')
+        self.assertEqual(stderr, '')
         self.assertEqual(map_score, 0.3496)
         self.assertEqual(ndcg_score, 0.5089)
 

diff --git a/integrations/utils.py b/integrations/utils.py
@@ -45,9 +45,6 @@ def run_command(cmd, echo=False):
 def parse_score(output, metric, digits=4):
     """Function for parsing the output from `pyserini.eval.trec_eval`."""
     lines = output.split('\n')
-    # The output begins with a bunch of debug information, get rid of lines until we get to 'Results'
-    while 'Results' not in lines[0]:
-        lines.pop(0)
 
     for line in lines:
         if metric in line:

diff --git a/pyserini/eval/trec_eval.py b/pyserini/eval/trec_eval.py
@@ -13,29 +13,44 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-# Example usage
-# python -m pyserini.eval.trec_eval -m ndcg_cut.10,20 -m all_trec qrels.dev.small.tsv runs/run.Colbert.txt -remove-unjudged -cutoffs.20,50
 
+# Example usage:
+# python -m pyserini.eval.trec_eval -c \
+#   -m ndcg_cut.10 \
+#   -m judged.5,10 beir-v1.0.0-arguana-test run.beir.contriever-msmarco.arguana.txt -remove-unjudged
 
+# From Jimmy, Sept 2024 -
+#
+# This file has a load sequence that is very different from all the other files.
+# The JVM by default in Pyserini is loaded with the option '--add-modules=jdk.incubator.vector', which triggers the
+# following warning: 'WARNING: Using incubator modules: jdk.incubator.vector'
+#
+# I have looked extensively online and was not able to find a way to suppress that warning.
+# The solution here is to start the JVM without the vector module, which isn't needed here.
+# This explains the code sequence below.
+
+import glob
+import importlib.resources
+import jnius_config
 import os
-import re
-import subprocess
-import sys
-import platform
 import pandas as pd
+import platform
 import tempfile
+import subprocess
+import sys
 
-from pyserini.search import get_qrels_file
-from pyserini.util import download_evaluation_script
+# Don't use the jdk.incubator.vector module.
+jar_directory = str(importlib.resources.files("pyserini.resources.jars").joinpath(''))
+jar_path = glob.glob(os.path.join(jar_directory, '*.jar'))[0]
+jnius_config.add_classpath(jar_path)
 
-script_path = download_evaluation_script('trec_eval')
+# This triggers loading of the JVM.
+from jnius import autoclass
 
-if platform.platform().startswith('macOS'):
-    # Hack around the fact that jtrec_eval hasn't been compiled for Mac M processors.
-    # Explicitly set os to x86, and then force the use of Rosetta.
-    cmd_prefix = ['java', '-Dos.arch=x86_64', '-jar', script_path]
-else:
-    cmd_prefix = ['java', '-jar', script_path]
+# Now we can load qrels
+from pyserini.search import get_qrels_file
+
+cmd_prefix = ['java', '-cp', jar_path, 'trec_eval']
 
 args = sys.argv
 
@@ -99,7 +114,10 @@
 else:
     cmd = cmd_prefix
 
-print(f'Running command: {cmd}')
+# We're going to shell out to call trec_eval.
+# Obvious question here: why we *not* just call the trec_eval main (Java) class, which already wraps the executable?
+# in Java (which wraps the binaries). The answer is that the Java class explicitly calls System.exit, so we wouldn't
+# be able to do cleanup here in Python.
 shell = platform.system() == "Windows"
 process = subprocess.Popen(cmd,
                            stdout=subprocess.PIPE,
@@ -109,7 +127,6 @@
 if stderr:
     print(stderr.decode("utf-8"))
 
-print('Results:')
 print(stdout.decode("utf-8").rstrip())
 
 for judged in judged_result:

diff --git a/pyserini/pyclass.py b/pyserini/pyclass.py
@@ -20,9 +20,13 @@
 
 from .setup import configure_classpath, os
 
-# If the environment variable isn't defined, look in the current directory.
-configure_classpath(os.environ['ANSERINI_CLASSPATH'] if 'ANSERINI_CLASSPATH' in os.environ else
-                    os.path.join(os.path.split(__file__)[0], 'resources/jars/'))
+try:
+    # If the environment variable isn't defined, look in the current directory.
+    configure_classpath(os.environ['ANSERINI_CLASSPATH'] if 'ANSERINI_CLASSPATH' in os.environ else
+                        os.path.join(os.path.split(__file__)[0], 'resources/jars/'))
+except:
+    # This might happen if the JVM's already been initialized. Just eat the error.
+    pass
 
 from jnius import autoclass, cast
 

diff --git a/tests/resources/simple_trec_run_unjudged_keep.txt b/tests/resources/simple_trec_run_unjudged_keep.txt
@@ -1,4 +1,3 @@
-Results:
 ndcg_cut_5            	all	0.0848
 ndcg_cut_10           	all	0.0550
 judged_5              	all	0.5000

diff --git a/tests/resources/simple_trec_run_unjudged_remove.txt b/tests/resources/simple_trec_run_unjudged_remove.txt
@@ -1,4 +1,3 @@
-Results:
 ndcg_cut_5            	all	0.1131
 ndcg_cut_10           	all	0.0734
 judged_5              	all	1.0000

diff --git a/tests/test_trectools.py b/tests/test_trectools.py
@@ -79,7 +79,7 @@ def test_normalize_scores(self):
                                     self.output_path))
 
     # This and the next test case go together - to keep and to remove unjudged docs.
-    def test_undjudged_keep(self):
+    def test_unjudged_keep(self):
         qrels_path = os.path.join(self.root, 'tools/topics-and-qrels/qrels.covid-round1.txt')
         run_path = os.path.join(self.root, 'tests/resources/simple_trec_run_filter.txt')
         results = subprocess.check_output(