Merge branch 'master' into regression-refactoring

castorini · Oct 7, 2023 · a613e4d · a613e4d
2 parents dbcd54f + a935a36
commit a613e4d
Show file tree

Hide file tree

Showing 11 changed files with 180 additions and 49 deletions.
diff --git a/docs/2cr/msmarco-v2-passage.html b/docs/2cr/msmarco-v2-passage.html
@@ -1457,6 +1457,115 @@ <h1 class="mb-3">MS MARCO V2 Passage</h1>
 </div>
 <!-- Tabs content -->
 
+</div></td>
+</tr>
+<!-- Condition: SLIM++ (norefine, tau=0.5, min_idf=1) -->
+<tr class="accordion-toggle collapsed" id="row13" data-toggle="collapse" data-parent="#row13" href="#collapse13">
+<td class="expand-button"></td>
+<td></td>
+<td style="min-width: 400px">SLIM++ (norefine, tau=0.5, min_idf=1)</td>
+<td>0.2819</td>
+<td>0.6340</td>
+<td>0.7554</td>
+<td>0.5092</td>
+<td>0.8392</td>
+<td></td>
+<td>0.1915</td>
+<td>0.8707</td>
+<td></td>
+<td>0.1904</td>
+<td>0.8683</td>
+</tr>
+<tr class="hide-table-padding">
+<td></td>
+<td colspan="12">
+<div id="collapse13" class="collapse in p-3">
+
+<!-- Tabs navs -->
+<ul class="nav nav-tabs mb-3" id="row13-tabs" role="tablist">
+  <li class="nav-item" role="presentation">
+    <a class="nav-link active" id="row13-tab1-header" data-mdb-toggle="tab" href="#row13-tab1" role="tab" aria-controls="row13-tab1" aria-selected="true" style="text-transform:none">TREC 2021</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="row13-tab2-header" data-mdb-toggle="tab" href="#row13-tab2" role="tab" aria-controls="row13-tab2" aria-selected="false" style="text-transform:none">dev</a>
+  </li>
+  <li class="nav-item" role="presentation">
+    <a class="nav-link" id="row13-tab3-header" data-mdb-toggle="tab" href="#row13-tab3" role="tab" aria-controls="row13-tab3" aria-selected="false" style="text-transform:none">dev2</a>
+  </li>
+</ul>
+<!-- Tabs navs -->
+
+<!-- Tabs content -->
+<div class="tab-content" id="row13-content">
+  <div class="tab-pane fade show active" id="row13-tab1" role="tabpanel" aria-labelledby="row13-tab1">
+Command to generate run on TREC 2021 queries:
+
+  <blockquote class="mycode">
+<pre><code>python -m pyserini.search.lucene \
+  --threads 16 --batch-size 128 \
+  --index msmarco-v2-passage-slimr-pp-norefine-0shot \
+  --topics dl21 \
+  --encoder castorini/slimr-pp-msmarco-passage \
+  --output run.msmarco-v2-passage.slimr-pp.dl21.txt \
+  --hits 1000 --impact --min-idf 1
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>python -m pyserini.eval.trec_eval -c -l 2 -M 100 -m map dl21-passage run.msmarco-v2-passage.slimr-pp.dl21.txt
+python -m pyserini.eval.trec_eval -c -m ndcg_cut.10 dl21-passage run.msmarco-v2-passage.slimr-pp.dl21.txt
+python -m pyserini.eval.trec_eval -c -l 2 -M 100 -m recip_rank dl21-passage run.msmarco-v2-passage.slimr-pp.dl21.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.100 dl21-passage run.msmarco-v2-passage.slimr-pp.dl21.txt
+python -m pyserini.eval.trec_eval -c -l 2 -m recall.1000 dl21-passage run.msmarco-v2-passage.slimr-pp.dl21.txt
+</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="row13-tab2" role="tabpanel" aria-labelledby="row13-tab2">
+    Command to generate run on dev queries:
+
+  <blockquote class="mycode">
+<pre><code>python -m pyserini.search.lucene \
+  --threads 16 --batch-size 128 \
+  --index msmarco-v2-passage-slimr-pp-norefine-0shot \
+  --topics msmarco-v2-passage-dev \
+  --encoder castorini/slimr-pp-msmarco-passage \
+  --output run.msmarco-v2-passage.slimr-pp.dev.txt \
+  --hits 1000 --impact --min-idf 1
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>python -m pyserini.eval.trec_eval -c -M 100 -m recip_rank msmarco-v2-passage-dev run.msmarco-v2-passage.slimr-pp.dev.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-v2-passage-dev run.msmarco-v2-passage.slimr-pp.dev.txt
+</code></pre>
+  </blockquote>
+
+  </div>
+  <div class="tab-pane fade" id="row13-tab3" role="tabpanel" aria-labelledby="row13-tab3">
+    Command to generate run on dev2 queries:
+
+  <blockquote class="mycode">
+<pre><code>python -m pyserini.search.lucene \
+  --threads 16 --batch-size 128 \
+  --index msmarco-v2-passage-slimr-pp-norefine-0shot \
+  --topics msmarco-v2-passage-dev2 \
+  --encoder castorini/slimr-pp-msmarco-passage \
+  --output run.msmarco-v2-passage.slimr-pp.dev2.txt \
+  --hits 1000 --impact --min-idf 1
+</code></pre></blockquote>
+Evaluation commands:
+
+  <blockquote class="mycode">
+<pre><code>python -m pyserini.eval.trec_eval -c -M 100 -m recip_rank msmarco-v2-passage-dev2 run.msmarco-v2-passage.slimr-pp.dev2.txt
+python -m pyserini.eval.trec_eval -c -m recall.1000 msmarco-v2-passage-dev2 run.msmarco-v2-passage.slimr-pp.dev2.txt
+</code></pre>
+  </blockquote>
+
+  </div>
+</div>
+<!-- Tabs content -->
+
 </div></td>
 </tr>
 

diff --git a/pyserini/2cr/miracl.py b/pyserini/2cr/miracl.py
@@ -390,23 +390,18 @@ def run_conditions(args):
                             if math.isclose(score, float(expected[metric])):
                                 result_str = ok_str
                             # Flaky tests
-                            elif (name == 'mdpr-tied-pft-msmarco.hi' and split == 'train'
-                                  and math.isclose(score, float(expected[metric]), abs_tol=2e-4)) or \
-                                 (name == 'bm25-mdpr-tied-pft-msmarco-hybrid.zh'
-                                  and split == 'dev' and metric == 'nDCG@10'
-                                  and math.isclose(score, float(expected[metric]), abs_tol=2e-4)) or \
-                                 (name == 'mdpr-tied-pft-msmarco-ft-all.ru'
-                                  # Flaky on Jimmy's Mac Studio (Apple M1 Ultra), nDCG@10: 0.3932 -> expected 0.3933
+                            elif (name == 'bm25-mdpr-tied-pft-msmarco-hybrid.zh'
+                                  # Flaky on Jimmy's Mac Studio (Apple M1 Ultra), nDCG@10: 0.5255 -> expected 0.5254
                                   and split == 'dev' and metric == 'nDCG@10'
                                   and math.isclose(score, float(expected[metric]), abs_tol=2e-4)) or \
                                  (name == 'bm25-mdpr-tied-pft-msmarco-hybrid.te'
                                   # Flaky on Jimmy's Mac Studio (Apple M1 Ultra), nDCG@10: 0.6000 -> expected 0.5999
                                   and split == 'train' and metric == 'nDCG@10'
                                   and math.isclose(score, float(expected[metric]), abs_tol=2e-4)) or \
                                  (name == 'mcontriever-tied-pft-msmarco.id'
-                                  # Flaky on Jimmy's Mac Studio (Apple M1 Ultra), nDCG@10: 0.3748 -> expected 0.3749
+                                  # Flaky on Jimmy's Mac Studio (Apple M1 Ultra), nDCG@10: 0.3749 -> expected 0.3748
                                   and split == 'train' and metric == 'nDCG@10'
-                                  and math.isclose(score, float(expected[metric]), abs_tol=2e-4)):
+                                  and math.isclose(score, float(expected[metric]), abs_tol=1e-4)):
                                 result_str = okish_str
                             else:
                                 result_str = fail_str + f' expected {expected[metric]:.4f}'

diff --git a/pyserini/2cr/miracl.yaml b/pyserini/2cr/miracl.yaml
@@ -562,7 +562,7 @@ conditions:
             R@100: 0.8188
       - split: dev
         scores:
-          - nDCG@10: 0.3933
+          - nDCG@10: 0.3932
             R@100: 0.6707
   - name: mdpr-tied-pft-msmarco-ft-all.sw
     eval_key: miracl-v1.0-sw
@@ -791,7 +791,7 @@ conditions:
     splits:
       - split: train
         scores:
-          - nDCG@10: 0.6000
+          - nDCG@10: 0.5998
             R@100: 0.8717
       - split: dev
         scores:
@@ -1071,7 +1071,7 @@ conditions:
     splits:
       - split: train
         scores:
-          - nDCG@10: 0.3749
+          - nDCG@10: 0.3748
             R@100: 0.7955
       - split: dev
         scores:

diff --git a/pyserini/2cr/mrtydi.py b/pyserini/2cr/mrtydi.py
@@ -274,18 +274,6 @@ def run_conditions(args):
                                                                      trec_eval_metric_definitions[metric], runfile))
                             if math.isclose(score, float(expected[metric])):
                                 result_str = ok_str
-                            # Flaky test: small difference on orca
-                            elif name == 'mdpr-tied-pft-nq.te' and split == 'dev' \
-                                    and math.isclose(score, float(expected[metric]), abs_tol=2e-4):
-                                result_str = okish_str
-                            # Flaky test: small difference on orca
-                            elif name == 'mdpr-tied-pft-msmarco-ft-all.ko' and split == 'train' \
-                                    and math.isclose(score, float(expected[metric]), abs_tol=4e-4):
-                                result_str = okish_str
-                            # Flaky test: small difference on Mac Studio (M1)
-                            elif name == 'mdpr-tied-pft-msmarco.th' and split == 'train' \
-                                    and math.isclose(score, float(expected[metric]), abs_tol=3e-4):
-                                result_str = okish_str
                             else:
                                 result_str = fail_str + f' expected {expected[metric]:.4f}'
                             print(f'      {metric:7}: {score:.4f} {result_str}')

diff --git a/pyserini/2cr/msmarco-v1-passage.yaml b/pyserini/2cr/msmarco-v1-passage.yaml
@@ -8,7 +8,7 @@ conditions:
       - topic_key: msmarco-passage-dev-subset
         eval_key: msmarco-passage-dev-subset
         scores:
-          - MRR@10: 0.3300
+          - MRR@10: 0.3301
             R@1K: 0.9811
       - topic_key: dl19-passage
         eval_key: dl19-passage

diff --git a/pyserini/2cr/msmarco-v2-passage.yaml b/pyserini/2cr/msmarco-v2-passage.yaml
@@ -283,5 +283,29 @@ conditions:
           - MAP@100: 0.2193
             nDCG@10: 0.5756
             MRR@100: 0.6991
-            R@100: 0.4246
-            R@1K: 0.6897
+            R@100: 0.4247
+            R@1K: 0.6893
+  - name: slimr-pp
+    display: "SLIM++ (norefine, tau=0.5, min_idf=1)"
+    display-html: "SLIM++ (norefine, tau=0.5, min_idf=1)"
+    command: python -m pyserini.search.lucene --threads 16 --batch-size 128 --index msmarco-v2-passage-slimr-pp-norefine-0shot --topics $topics --encoder castorini/slimr-pp-msmarco-passage --output $output --hits 1000 --impact --min-idf 1
+    topics:
+      - topic_key: msmarco-v2-passage-dev
+        eval_key: msmarco-v2-passage-dev
+        scores:
+          - MRR@100: 0.1915
+            R@1K: 0.8707
+      - topic_key: msmarco-v2-passage-dev2
+        eval_key: msmarco-v2-passage-dev2
+        scores:
+          - MRR@100: 0.1904
+            R@1K: 0.8683
+      - topic_key: dl21
+        eval_key: dl21-passage
+        scores:
+          - MAP@100: 0.2819
+            nDCG@10: 0.6340
+            MRR@100: 0.7554
+            R@100: 0.5092
+            R@1K: 0.8392
+
diff --git a/pyserini/2cr/msmarco.py b/pyserini/2cr/msmarco.py
@@ -152,9 +152,8 @@
      'unicoil',
      '',
      'unicoil-noexp-otf',
-     'unicoil-otf'],
-
-    # MS MARCO v2 doc
+     'unicoil-otf',
+     'slimr-pp'],
     'msmarco-v2-doc':
     ['bm25-doc-default',
      'bm25-doc-segmented-default',
@@ -526,11 +525,6 @@ def run_conditions(args):
                                     runfile))
                             if math.isclose(score, float(expected[metric])):
                                 result_str = ok_str
-                            # Flaky test on Jimmy's iMac Pro and Jimmy's Mac Studio
-                            elif args.collection == 'msmarco-v1-passage' and name == 'splade-pp-ed-rocchio-pytorch' \
-                                    and topic_key == 'msmarco-passage-dev-subset' \
-                                    and metric == 'MRR@10' and abs(score-float(expected[metric])) <= 0.0001:
-                                result_str = okish_str
                             # Flaky test on Jimmy's Mac Studio
                             elif args.collection == 'msmarco-v1-passage' and name == 'distilbert-kd-tasb-avg-prf-pytorch' \
                                     and topic_key == 'msmarco-passage-dev-subset' \
@@ -668,4 +662,4 @@ def run_conditions(args):
         print(f'Must specify a specific condition using --condition or use --all to run all conditions.')
         sys.exit()
 
-    run_conditions(args)
+    run_conditions(args)
diff --git a/pyserini/prebuilt_index_info.py b/pyserini/prebuilt_index_info.py
@@ -2433,27 +2433,27 @@
 IMPACT_INDEX_INFO_MSMARCO = {
     "msmarco-v1-passage-slimr": {
         "description": "Lucene impact index of the MS MARCO V1 passage corpus enoded by SLIM trained with BM25 negatives. (Lucene 9)",
-        "filename": "lucene-index.msmarco-v1-passage-slimr.20230220.tar.gz",
-        "readme": "lucene-index.msmarco-v1-passage-slimr.20230220.md",
+        "filename": "lucene-index.msmarco-v1-passage-slimr.20230925.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-slimr.20230925.md",
         "urls": [
-            "https://vault.cs.uwaterloo.ca/s/EptAojzmCxz7mYM/download",
+            "https://vault.cs.uwaterloo.ca/s/SjnaFWA7C9NLmqW/download",
         ],
-        "md5": "79e566fee4f376096e12a33cf67c8012",
-        "size compressed (bytes)": 1942207690,
+        "md5": "3532a09a4a47f862d63b8df81b39ecc9",
+        "size compressed (bytes)": 1902711967,
         "total_terms": 100694232684,
         "documents": 8841823,
         "unique_terms": 28121,
         "downloaded": False
     },
     "msmarco-v1-passage-slimr-pp": {
         "description": "Lucene impact index of the MS MARCO V1 passage corpus enoded by SLIM trained with cross-encoder distillation and hardnegative mining. (Lucene 9)",
-        "filename": "lucene-index.msmarco-v1-passage-slimr-pp.20230220.tar.gz",
-        "readme": "lucene-index.msmarco-v1-passage-slimr-pp.20230220.md",
+        "filename": "lucene-index.msmarco-v1-passage-slimr-pp.20230925.tar.gz",
+        "readme": "lucene-index.msmarco-v1-passage-slimr-pp.20230925.md",
         "urls": [
-            "https://vault.cs.uwaterloo.ca/s/22Gjmnp5EP2HpqR/download",
+            "https://vault.cs.uwaterloo.ca/s/mFTgJJENBZseXXX/download",
         ],
-        "md5": "17b2edd909bcda4980a93fb0ab87e72b",
-        "size compressed (bytes)": 2164253966,
+        "md5": "5badbe47b6a50cf252cafb8a648743f1",
+        "size compressed (bytes)": 2135049683,
         "total_terms": 104421954301,
         "documents": 8841823,
         "unique_terms": 27766,
@@ -2672,7 +2672,19 @@
         "unique_terms": 29148,
         "downloaded": False
     },
-
+    "msmarco-v2-passage-slimr-pp-norefine-0shot": {
+        "description": "Lucene impact index of the MS MARCO V2 passage corpus enoded by SLIM (norefine) trained with cross-encoder distillation and hardnegative mining. (Lucene 9)",
+        "filename": "lucene-index.msmarco-v2-passage-slimr-pp.20230614.tar.gz",
+        "readme": "lucene-index.msmarco-v2-passage-slimr-pp.20230614.md",
+        "urls": [
+            "https://vault.cs.uwaterloo.ca/s/q89FZmcYSagP7Rr/download",
+        ],
+        "md5": "0251a882369dd9c27f6a629198123a40",
+        "size compressed (bytes)": 35297323293,
+        "total_terms": 1668035574958,
+        "documents": 138364197,
+        "downloaded": False
+    },
     "msmarco-v2-doc-segmented-unicoil-0shot": {
         "description": "Lucene impact index of the MS MARCO V2 segmented document corpus for uniCOIL, with title prepended. (Lucene 9)",
         "filename": "lucene-index.msmarco-v2-doc-segmented-unicoil-0shot.20220808.4d6d2a.tar.gz",

diff --git a/...x.msmarco-v1-passage-slimr-pp.20230220.md → ...x.msmarco-v1-passage-slimr-pp.20230925.md b/...x.msmarco-v1-passage-slimr-pp.20230220.md → ...x.msmarco-v1-passage-slimr-pp.20230925.md
@@ -8,4 +8,4 @@ python -m pyserini.index.lucene \
   --threads 48 \
   --impact --pretokenized
 
-lucene-index.msmarco-v1-passage-slimr-pp.20230220.tar.gz MD5 checksum = 17b2edd909bcda4980a93fb0ab87e72b
+lucene-index.msmarco-v1-passage-slimr-pp.20230925.tar.gz MD5 checksum = 5badbe47b6a50cf252cafb8a648743f1
diff --git a/...ndex.msmarco-v1-passage-slimr.20230220.md → ...ndex.msmarco-v1-passage-slimr.20230925.md b/...ndex.msmarco-v1-passage-slimr.20230220.md → ...ndex.msmarco-v1-passage-slimr.20230925.md
@@ -8,4 +8,4 @@ python -m pyserini.index.lucene \
   --threads 48 \
   --impact --pretokenized
 
-lucene-index.msmarco-v1-passage-slimr.20230220.tar.gz MD5 checksum = 79e566fee4f376096e12a33cf67c8012
+lucene-index.msmarco-v1-passage-slimr.20230925.tar.gz MD5 checksum = 3532a09a4a47f862d63b8df81b39ecc9
diff --git a/scripts/jobs.regressions-all.txt b/scripts/jobs.regressions-all.txt
@@ -0,0 +1,9 @@
+python -m pyserini.2cr.msmarco --collection v1-passage --all --directory runs/ --display-commands > logs/log.msmarco-v1-passage 2>&1
+python -m pyserini.2cr.msmarco --collection v1-doc     --all --directory runs/ --display-commands > logs/log.msmarco-v1-doc 2>&1
+python -m pyserini.2cr.msmarco --collection v2-passage --all --directory runs/ --display-commands > logs/log.msmarco-v2-passage 2>&1
+python -m pyserini.2cr.msmarco --collection v2-doc     --all --directory runs/ --display-commands > logs/log.msmarco-v2-doc 2>&1
+python -m pyserini.2cr.miracl --all --directory runs/ --display-commands > logs/log.miracl 2>&1
+python -m pyserini.2cr.mrtydi --all --directory runs/ --display-commands > logs/log.mrtydi 2>&1
+python -m pyserini.2cr.beir   --all --directory runs/ --display-commands > logs/log.beir 2>&1
+python -m pyserini.2cr.odqa   --all --directory runs/ --topic tqa --display-commands > logs/log.odqa.tqa 2>&1
+python -m pyserini.2cr.odqa   --all --directory runs/ --topic nq  --display-commands > logs/log.odqa.nq 2>&1