Skip to content

Commit

Permalink
Add bindings to Lucene HNSW indexes for MS MARCO v1 passage (#1993)
Browse files Browse the repository at this point in the history
  • Loading branch information
lintool authored Sep 25, 2024
1 parent 5900347 commit bc13901
Show file tree
Hide file tree
Showing 18 changed files with 1,042 additions and 370 deletions.
61 changes: 32 additions & 29 deletions docs/2cr/msmarco-v1-doc.html
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
">
<div class="container d-flex align-items-center justify-content-center text-center h-100" style="max-height: 150px">
<div class="text-white">
<h1 class="mb-3">MS MARCO V1 Document</h1>
<h1 class="mb-3"><img width="80" src="../pyserini-logo.png"/> &nbsp; MS MARCO V1 Document Regressions</h1>
</div>
</div>
</div>
Expand Down Expand Up @@ -284,11 +284,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<!-- Condition: BM25 doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<tr class="accordion-toggle collapsed" id="row2" data-toggle="collapse" data-parent="#row2" href="#collapse2">
<td class="expand-button"></td>
<td style="min-width: 75px">[<a href="#" data-mdb-toggle="tooltip" title="Ma et al. (SIGIR 2022) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.">1</a>]&nbsp;(1b)</td>
<td style="min-width: 410px">BM25 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td style="min-width: 410px">BM25 doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td>0.2449</td>
<td>0.5302</td>
<td>0.6871</td>
Expand Down Expand Up @@ -508,11 +508,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25+RM3 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<!-- Condition: BM25+RM3 doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<tr class="accordion-toggle collapsed" id="row4" data-toggle="collapse" data-parent="#row4" href="#collapse4">
<td class="expand-button"></td>
<td style="min-width: 75px">[<a href="#" data-mdb-toggle="tooltip" title="Ma et al. (SIGIR 2022) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.">1</a>]&nbsp;(1d)</td>
<td style="min-width: 410px">BM25+RM3 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td style="min-width: 410px">BM25+RM3 doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td>0.2892</td>
<td>0.5684</td>
<td>0.7368</td>
Expand Down Expand Up @@ -732,11 +732,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25+Rocchio doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<!-- Condition: BM25+Rocchio doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<tr class="accordion-toggle collapsed" id="row6" data-toggle="collapse" data-parent="#row6" href="#collapse6">
<td class="expand-button"></td>
<td style="min-width: 75px"></td>
<td style="min-width: 410px">BM25+Rocchio doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td style="min-width: 410px">BM25+Rocchio doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td>0.2889</td>
<td>0.5570</td>
<td>0.7423</td>
Expand Down Expand Up @@ -957,11 +957,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25 doc segmented (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61) -->
<!-- Condition: BM25 doc seg (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61) -->
<tr class="accordion-toggle collapsed" id="row8" data-toggle="collapse" data-parent="#row8" href="#collapse8">
<td class="expand-button"></td>
<td style="min-width: 75px"></td>
<td style="min-width: 410px">BM25 doc segmented (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61)</td>
<td style="min-width: 410px">BM25 doc seg (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61)</td>
<td>0.2398</td>
<td>0.5389</td>
<td>0.6565</td>
Expand Down Expand Up @@ -1181,11 +1181,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25+RM3 doc segmented (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61) -->
<!-- Condition: BM25+RM3 doc seg (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61) -->
<tr class="accordion-toggle collapsed" id="row10" data-toggle="collapse" data-parent="#row10" href="#collapse10">
<td class="expand-button"></td>
<td style="min-width: 75px"></td>
<td style="min-width: 410px">BM25+RM3 doc segmented (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61)</td>
<td style="min-width: 410px">BM25+RM3 doc seg (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61)</td>
<td>0.2655</td>
<td>0.5392</td>
<td>0.7037</td>
Expand Down Expand Up @@ -1405,11 +1405,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25+Rocchio doc segmented (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61) -->
<!-- Condition: BM25+Rocchio doc seg (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61) -->
<tr class="accordion-toggle collapsed" id="row12" data-toggle="collapse" data-parent="#row12" href="#collapse12">
<td class="expand-button"></td>
<td style="min-width: 75px"></td>
<td style="min-width: 410px">BM25+Rocchio doc segmented (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61)</td>
<td style="min-width: 410px">BM25+Rocchio doc seg (<i>k<sub><small>1</small></sub></i>=2.16, <i>b</i>=0.61)</td>
<td>0.2672</td>
<td>0.5421</td>
<td>0.7115</td>
Expand Down Expand Up @@ -1630,11 +1630,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<!-- Condition: BM25 w/ doc2query-T5 doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<tr class="accordion-toggle collapsed" id="row14" data-toggle="collapse" data-parent="#row14" href="#collapse14">
<td class="expand-button"></td>
<td style="min-width: 75px">[<a href="#" data-mdb-toggle="tooltip" title="Ma et al. (SIGIR 2022) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.">1</a>]&nbsp;(2b)</td>
<td style="min-width: 410px">BM25 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td style="min-width: 410px">BM25 w/ doc2query-T5 doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td>0.2798</td>
<td>0.6119</td>
<td>0.7165</td>
Expand Down Expand Up @@ -1854,11 +1854,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25+RM3 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<!-- Condition: BM25+RM3 w/ doc2query-T5 doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4) -->
<tr class="accordion-toggle collapsed" id="row16" data-toggle="collapse" data-parent="#row16" href="#collapse16">
<td class="expand-button"></td>
<td style="min-width: 75px">[<a href="#" data-mdb-toggle="tooltip" title="Ma et al. (SIGIR 2022) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.">1</a>]&nbsp;(2d)</td>
<td style="min-width: 410px">BM25+RM3 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td style="min-width: 410px">BM25+RM3 w/ doc2query-T5 doc seg (<i>k<sub><small>1</small></sub></i>=0.9, <i>b</i>=0.4)</td>
<td>0.3030</td>
<td>0.6290</td>
<td>0.7483</td>
Expand Down Expand Up @@ -2079,11 +2079,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59) -->
<!-- Condition: BM25 w/ doc2query-T5 doc seg (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59) -->
<tr class="accordion-toggle collapsed" id="row18" data-toggle="collapse" data-parent="#row18" href="#collapse18">
<td class="expand-button"></td>
<td style="min-width: 75px"></td>
<td style="min-width: 410px">BM25 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59)</td>
<td style="min-width: 410px">BM25 w/ doc2query-T5 doc seg (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59)</td>
<td>0.2658</td>
<td>0.6273</td>
<td>0.6707</td>
Expand Down Expand Up @@ -2303,11 +2303,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: BM25+RM3 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59) -->
<!-- Condition: BM25+RM3 w/ doc2query-T5 doc seg (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59) -->
<tr class="accordion-toggle collapsed" id="row20" data-toggle="collapse" data-parent="#row20" href="#collapse20">
<td class="expand-button"></td>
<td style="min-width: 75px"></td>
<td style="min-width: 410px">BM25+RM3 w/ doc2query-T5 doc segmented (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59)</td>
<td style="min-width: 410px">BM25+RM3 w/ doc2query-T5 doc seg (<i>k<sub><small>1</small></sub></i>=2.56, <i>b</i>=0.59)</td>
<td>0.2892</td>
<td>0.6247</td>
<td>0.7069</td>
Expand Down Expand Up @@ -2416,11 +2416,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>
</div></td>
</tr>
<tr><td style="border-bottom: 0"></td></tr>
<!-- Condition: uniCOIL (noexp): pre-encoded queries -->
<!-- Condition: uniCOIL (noexp): cached queries -->
<tr class="accordion-toggle collapsed" id="row21" data-toggle="collapse" data-parent="#row21" href="#collapse21">
<td class="expand-button"></td>
<td style="min-width: 75px">[<a href="#" data-mdb-toggle="tooltip" title="Ma et al. (SIGIR 2022) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.">1</a>]&nbsp;(3a)</td>
<td style="min-width: 410px">uniCOIL (noexp): pre-encoded queries</td>
<td style="min-width: 410px">uniCOIL (noexp): cached queries</td>
<td>0.2665</td>
<td>0.6349</td>
<td>0.6391</td>
Expand Down Expand Up @@ -2528,11 +2528,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: uniCOIL (noexp): query inference with PyTorch -->
<!-- Condition: uniCOIL (noexp): PyTorch -->
<tr class="accordion-toggle collapsed" id="row22" data-toggle="collapse" data-parent="#row22" href="#collapse22">
<td class="expand-button"></td>
<td style="min-width: 75px"></td>
<td style="min-width: 410px">uniCOIL (noexp): query inference with PyTorch</td>
<td style="min-width: 410px">uniCOIL (noexp): PyTorch</td>
<td>0.2665</td>
<td>0.6349</td>
<td>0.6391</td>
Expand Down Expand Up @@ -2644,11 +2644,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>
</div></td>
</tr>
<tr><td style="border-bottom: 0"></td></tr>
<!-- Condition: uniCOIL (w/ doc2query-T5): pre-encoded queries -->
<!-- Condition: uniCOIL (w/ doc2query-T5): cached queries -->
<tr class="accordion-toggle collapsed" id="row23" data-toggle="collapse" data-parent="#row23" href="#collapse23">
<td class="expand-button"></td>
<td style="min-width: 75px">[<a href="#" data-mdb-toggle="tooltip" title="Ma et al. (SIGIR 2022) Document Expansions and Learned Sparse Lexical Representations for MS MARCO V1 and V2.">1</a>]&nbsp;(3b)</td>
<td style="min-width: 410px">uniCOIL (w/ doc2query-T5): pre-encoded queries</td>
<td style="min-width: 410px">uniCOIL (w/ doc2query-T5): cached queries</td>
<td>0.2789</td>
<td>0.6396</td>
<td>0.6652</td>
Expand Down Expand Up @@ -2756,11 +2756,11 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>

</div></td>
</tr>
<!-- Condition: uniCOIL (w/ doc2query-T5): query inference with PyTorch -->
<!-- Condition: uniCOIL (w/ doc2query-T5): PyTorch -->
<tr class="accordion-toggle collapsed" id="row24" data-toggle="collapse" data-parent="#row24" href="#collapse24">
<td class="expand-button"></td>
<td style="min-width: 75px"></td>
<td style="min-width: 410px">uniCOIL (w/ doc2query-T5): query inference with PyTorch</td>
<td style="min-width: 410px">uniCOIL (w/ doc2query-T5): PyTorch</td>
<td>0.2789</td>
<td>0.6396</td>
<td>0.6652</td>
Expand Down Expand Up @@ -2876,6 +2876,9 @@ <h1 class="mb-3">MS MARCO V1 Document</h1>
</table>
</div>

<div style="padding-top: 20px"/>
<h4>References</h4>

<ul style="list-style-type:none; padding-top: 25px">

<li><p>[1] Xueguang Ma, Ronak Pradeep, Rodrigo Nogueira, and Jimmy Lin.
Expand Down
Loading

0 comments on commit bc13901

Please sign in to comment.