From 53514b1ab29398a4bb6ff4a315b7394e509e6be5 Mon Sep 17 00:00:00 2001 From: "Matt J. H. Yang" Date: Sat, 13 Jan 2024 13:12:00 -0500 Subject: [PATCH] update splade-pp-ed beir topics (#2337) add pre-encoded topic bindings for pyserini 2CR remove -optimize flag to reduce runtime --- ...ssions-beir-v1.0.0-arguana-splade-pp-ed.md | 2 +- ...essions-beir-v1.0.0-bioasq-splade-pp-ed.md | 2 +- ...-beir-v1.0.0-climate-fever-splade-pp-ed.md | 2 +- ...v1.0.0-cqadupstack-android-splade-pp-ed.md | 2 +- ...v1.0.0-cqadupstack-english-splade-pp-ed.md | 2 +- ...-v1.0.0-cqadupstack-gaming-splade-pp-ed.md | 2 +- ...eir-v1.0.0-cqadupstack-gis-splade-pp-ed.md | 2 +- ....0-cqadupstack-mathematica-splade-pp-ed.md | 2 +- ...v1.0.0-cqadupstack-physics-splade-pp-ed.md | 2 +- ....0-cqadupstack-programmers-splade-pp-ed.md | 2 +- ...r-v1.0.0-cqadupstack-stats-splade-pp-ed.md | 2 +- ...eir-v1.0.0-cqadupstack-tex-splade-pp-ed.md | 2 +- ...ir-v1.0.0-cqadupstack-unix-splade-pp-ed.md | 2 +- ...0.0-cqadupstack-webmasters-splade-pp-ed.md | 2 +- ....0.0-cqadupstack-wordpress-splade-pp-ed.md | 2 +- ...beir-v1.0.0-dbpedia-entity-splade-pp-ed.md | 2 +- ...ressions-beir-v1.0.0-fever-splade-pp-ed.md | 2 +- ...gressions-beir-v1.0.0-fiqa-splade-pp-ed.md | 2 +- ...sions-beir-v1.0.0-hotpotqa-splade-pp-ed.md | 2 +- ...sions-beir-v1.0.0-nfcorpus-splade-pp-ed.md | 2 +- ...regressions-beir-v1.0.0-nq-splade-pp-ed.md | 2 +- ...ressions-beir-v1.0.0-quora-splade-pp-ed.md | 2 +- ...sions-beir-v1.0.0-robust04-splade-pp-ed.md | 2 +- ...ssions-beir-v1.0.0-scidocs-splade-pp-ed.md | 2 +- ...ssions-beir-v1.0.0-scifact-splade-pp-ed.md | 2 +- ...sions-beir-v1.0.0-signal1m-splade-pp-ed.md | 2 +- ...ons-beir-v1.0.0-trec-covid-splade-pp-ed.md | 2 +- ...ions-beir-v1.0.0-trec-news-splade-pp-ed.md | 2 +- ...ir-v1.0.0-webis-touche2020-splade-pp-ed.md | 2 +- .../anserini/search/topicreader/Topics.java | 31 +++++++++++++++++++ .../beir-v1.0.0-arguana-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-bioasq-splade-pp-ed.yaml | 2 +- ...eir-v1.0.0-climate-fever-splade-pp-ed.yaml | 2 +- ....0.0-cqadupstack-android-splade-pp-ed.yaml | 2 +- ....0.0-cqadupstack-english-splade-pp-ed.yaml | 2 +- ...1.0.0-cqadupstack-gaming-splade-pp-ed.yaml | 2 +- ...r-v1.0.0-cqadupstack-gis-splade-pp-ed.yaml | 2 +- ...-cqadupstack-mathematica-splade-pp-ed.yaml | 2 +- ....0.0-cqadupstack-physics-splade-pp-ed.yaml | 2 +- ...-cqadupstack-programmers-splade-pp-ed.yaml | 2 +- ...v1.0.0-cqadupstack-stats-splade-pp-ed.yaml | 2 +- ...r-v1.0.0-cqadupstack-tex-splade-pp-ed.yaml | 2 +- ...-v1.0.0-cqadupstack-unix-splade-pp-ed.yaml | 2 +- ...0-cqadupstack-webmasters-splade-pp-ed.yaml | 2 +- ....0-cqadupstack-wordpress-splade-pp-ed.yaml | 2 +- ...ir-v1.0.0-dbpedia-entity-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-fever-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-fiqa-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-hotpotqa-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-nfcorpus-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-nq-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-quora-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-robust04-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-scidocs-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-scifact-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-signal1m-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-trec-covid-splade-pp-ed.yaml | 2 +- .../beir-v1.0.0-trec-news-splade-pp-ed.yaml | 2 +- ...-v1.0.0-webis-touche2020-splade-pp-ed.yaml | 2 +- .../search/topicreader/TopicReaderTest.java | 2 +- 60 files changed, 90 insertions(+), 59 deletions(-) diff --git a/docs/regressions/regressions-beir-v1.0.0-arguana-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-arguana-splade-pp-ed.md index 5c0c69668c..82a55ae17a 100644 --- a/docs/regressions/regressions-beir-v1.0.0-arguana-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-arguana-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-arguana-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-arguana-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-arguana-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-bioasq-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-bioasq-splade-pp-ed.md index 7b1fc41b4f..ff7f9fda9c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-bioasq-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-bioasq-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-bioasq-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-bioasq-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-bioasq-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-climate-fever-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-climate-fever-splade-pp-ed.md index b66a06170b..61d0d283ec 100644 --- a/docs/regressions/regressions-beir-v1.0.0-climate-fever-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-climate-fever-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-climate-fever-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-climate-fever-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-climate-fever-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android-splade-pp-ed.md index 25f1cdf51f..767f0257d8 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-android-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-android-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-android-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-android-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english-splade-pp-ed.md index 5f8df3f2d8..301e8dfb96 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-english-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-english-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-english-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-english-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming-splade-pp-ed.md index 1a1264f6bb..72dacd9f05 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gaming-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-gaming-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-gaming-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis-splade-pp-ed.md index 951f17b0b8..c69ddf6cc7 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-gis-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-gis-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-gis-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed.md index 94115e9c82..358ae79188 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics-splade-pp-ed.md index dfa997a4dd..c59291a5cb 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-physics-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-physics-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-physics-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers-splade-pp-ed.md index b8d0586982..3720a4c1de 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-programmers-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-programmers-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-programmers-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats-splade-pp-ed.md index 23425751c7..f215c94a7c 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-stats-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-stats-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-stats-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex-splade-pp-ed.md index 10f6f9af15..2ffe2439d9 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-tex-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-tex-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-tex-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix-splade-pp-ed.md index f77636d383..56cf1e1436 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-unix-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-unix-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-unix-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed.md index a361c654fe..0c77f851cc 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed.md index da33273c5b..c81dfa6e10 100644 --- a/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity-splade-pp-ed.md index 3966df3f1d..6506a631dd 100644 --- a/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-dbpedia-entity-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-dbpedia-entity-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-dbpedia-entity-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-dbpedia-entity-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-fever-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-fever-splade-pp-ed.md index b4691eea58..c270e9b95f 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fever-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-fever-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-fever-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-fever-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-fever-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-fiqa-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-fiqa-splade-pp-ed.md index d55ac60dc0..816d26dc2d 100644 --- a/docs/regressions/regressions-beir-v1.0.0-fiqa-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-fiqa-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-fiqa-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-fiqa-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-fiqa-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-hotpotqa-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-hotpotqa-splade-pp-ed.md index 53c9b48122..3dc9999350 100644 --- a/docs/regressions/regressions-beir-v1.0.0-hotpotqa-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-hotpotqa-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-hotpotqa-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-hotpotqa-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-hotpotqa-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-nfcorpus-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-nfcorpus-splade-pp-ed.md index 6dff53adb5..20df849190 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nfcorpus-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-nfcorpus-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-nfcorpus-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-nfcorpus-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-nfcorpus-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-nq-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-nq-splade-pp-ed.md index 7ebf13fb16..2818302d27 100644 --- a/docs/regressions/regressions-beir-v1.0.0-nq-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-nq-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-nq-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-nq-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-nq-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-quora-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-quora-splade-pp-ed.md index 1ccf58ee17..cef0cb2665 100644 --- a/docs/regressions/regressions-beir-v1.0.0-quora-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-quora-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-quora-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-quora-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-quora-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-robust04-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-robust04-splade-pp-ed.md index d0eb6fe20a..5b2959de82 100644 --- a/docs/regressions/regressions-beir-v1.0.0-robust04-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-robust04-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-robust04-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-robust04-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-robust04-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-scidocs-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-scidocs-splade-pp-ed.md index 35e539a8cc..cbd79412b4 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scidocs-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-scidocs-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-scidocs-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-scidocs-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-scidocs-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-scifact-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-scifact-splade-pp-ed.md index dc964af46a..165aae2674 100644 --- a/docs/regressions/regressions-beir-v1.0.0-scifact-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-scifact-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-scifact-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-scifact-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-scifact-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-signal1m-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-signal1m-splade-pp-ed.md index 1b9ddb5fec..9e438040f4 100644 --- a/docs/regressions/regressions-beir-v1.0.0-signal1m-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-signal1m-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-signal1m-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-signal1m-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-signal1m-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-covid-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-trec-covid-splade-pp-ed.md index 6b34cac9d4..df59c1a48e 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-covid-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-covid-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-trec-covid-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-trec-covid-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-trec-covid-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-trec-news-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-trec-news-splade-pp-ed.md index 85e5326f27..57667eea01 100644 --- a/docs/regressions/regressions-beir-v1.0.0-trec-news-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-trec-news-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-trec-news-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-trec-news-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-trec-news-splade-pp-ed & ``` diff --git a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020-splade-pp-ed.md b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020-splade-pp-ed.md index 5cdd6cfab2..5d76e9c4ea 100644 --- a/docs/regressions/regressions-beir-v1.0.0-webis-touche2020-splade-pp-ed.md +++ b/docs/regressions/regressions-beir-v1.0.0-webis-touche2020-splade-pp-ed.md @@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \ -input /path/to/beir-v1.0.0-webis-touche2020-splade-pp-ed \ -generator DefaultLuceneDocumentGenerator \ -index indexes/lucene-index.beir-v1.0.0-webis-touche2020-splade-pp-ed/ \ - -threads 16 -impact -pretokenized -optimize \ + -threads 16 -impact -pretokenized \ >& logs/log.beir-v1.0.0-webis-touche2020-splade-pp-ed & ``` diff --git a/src/main/java/io/anserini/search/topicreader/Topics.java b/src/main/java/io/anserini/search/topicreader/Topics.java index cad0768f61..8ff1251251 100755 --- a/src/main/java/io/anserini/search/topicreader/Topics.java +++ b/src/main/java/io/anserini/search/topicreader/Topics.java @@ -277,6 +277,37 @@ public enum Topics { BEIR_V1_0_0_CLIMATE_FEVER_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM(TsvStringTopicReader.class, "topics.beir-v1.0.0-climate-fever.test.splade_distil_cocodenser_medium.tsv.gz"), BEIR_V1_0_0_SCIFACT_TEST_SPLADE_DISTILL_COCODENSER_MEDIUM(TsvStringTopicReader.class, "topics.beir-v1.0.0-scifact.test.splade_distil_cocodenser_medium.tsv.gz"), + // BEIR (v1.0.0): pre-encoded queries for SPLADE++ (CoCondenser-EnsembleDistil) + BEIR_V1_0_0_TREC_COVID_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-trec-covid.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_BIOASQ_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-bioasq.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_NFCORPUS_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-nfcorpus.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_NQ_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-nq.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_HOTPOTQA_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-hotpotqa.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_FIQA_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-fiqa.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_SIGNAL1M_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-signal1m.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_TREC_NEWS_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-trec-news.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_ROBUST04_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-robust04.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_ARGUANA_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-arguana.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_WEBIS_TOUCHE2020_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-webis-touche2020.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_ANDROID_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-android.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_ENGLISH_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-english.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_GAMING_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-gaming.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_GIS_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-gis.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_MATHEMATICA_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-mathematica.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_PHYSICS_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-physics.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_PROGRAMMERS_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-programmers.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_STATS_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-stats.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_TEX_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-tex.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_UNIX_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-unix.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_WEBMASTERS_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-webmasters.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CQADUPSTACK_WORDPRESS_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-cqadupstack-wordpress.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_QUORA_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-quora.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_DBPEDIA_ENTITY_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-dbpedia-entity.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_SCIDOCS_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-scidocs.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_FEVER_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-fever.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_CLIMATE_FEVER_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-climate-fever.test.splade-pp-ed.tsv.gz"), + BEIR_V1_0_0_SCIFACT_TEST_SPLADE_PP_ED(TsvStringTopicReader.class, "topics.beir-v1.0.0-scifact.test.splade-pp-ed.tsv.gz"), + // BEIR (v1.0.0): pre-encoded queries for uniCOIL-noexp BEIR_V1_0_0_TREC_COVID_TEST_UNCOIL_NOEXP(TsvStringTopicReader.class, "topics.beir-v1.0.0-trec-covid.test.unicoil-noexp.tsv.gz"), BEIR_V1_0_0_BIOASQ_TEST_UNCOIL_NOEXP(TsvStringTopicReader.class, "topics.beir-v1.0.0-bioasq.test.unicoil-noexp.tsv.gz"), diff --git a/src/main/resources/regression/beir-v1.0.0-arguana-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-arguana-splade-pp-ed.yaml index 507022fe95..132be2c83f 100644 --- a/src/main/resources/regression/beir-v1.0.0-arguana-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-arguana-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-arguana-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 8674 documents (non-empty): 8674 diff --git a/src/main/resources/regression/beir-v1.0.0-bioasq-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-bioasq-splade-pp-ed.yaml index 0ea99b8e9f..84a2c21838 100644 --- a/src/main/resources/regression/beir-v1.0.0-bioasq-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-bioasq-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-bioasq-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 14914603 documents (non-empty): 14914603 diff --git a/src/main/resources/regression/beir-v1.0.0-climate-fever-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-climate-fever-splade-pp-ed.yaml index e083a6eb28..b1ec1ad372 100644 --- a/src/main/resources/regression/beir-v1.0.0-climate-fever-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-climate-fever-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-climate-fever-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 5416593 documents (non-empty): 5416593 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android-splade-pp-ed.yaml index e4c12fdac1..7888239e78 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-android-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-android-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-android-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 22998 documents (non-empty): 22998 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english-splade-pp-ed.yaml index 2785df3339..90b86796d2 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-english-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-english-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-english-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 40221 documents (non-empty): 40221 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming-splade-pp-ed.yaml index 15e6381688..7b478eaf52 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gaming-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-gaming-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 45301 documents (non-empty): 45301 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis-splade-pp-ed.yaml index 89be36f77f..8d81985838 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-gis-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-gis-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 37637 documents (non-empty): 37637 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed.yaml index e65df57eb5..0a9ddc0d0d 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-mathematica-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-mathematica-splade-pp-e collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 16705 documents (non-empty): 16705 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics-splade-pp-ed.yaml index bab20d3010..b5ba6b4e12 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-physics-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-physics-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 38316 documents (non-empty): 38316 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers-splade-pp-ed.yaml index 10ab209f6a..c363d4c6cf 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-programmers-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-programmers-splade-pp-e collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 32176 documents (non-empty): 32176 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats-splade-pp-ed.yaml index dad5caa8bb..5b84921e9b 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-stats-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-stats-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 42269 documents (non-empty): 42269 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex-splade-pp-ed.yaml index 0501e75d09..92c21795ea 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-tex-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-tex-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 68184 documents (non-empty): 68184 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix-splade-pp-ed.yaml index 05f28d5eb7..df6c46625f 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-unix-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-unix-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 47382 documents (non-empty): 47382 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed.yaml index 3a6566e3f1..1d628d22af 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-webmasters-splade-pp-ed collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 17405 documents (non-empty): 17405 diff --git a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed.yaml index 31b013e890..6c058792df 100644 --- a/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-cqadupstack-wordpress-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 48605 documents (non-empty): 48605 diff --git a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity-splade-pp-ed.yaml index 8cdbb09bf1..4fe6616975 100644 --- a/src/main/resources/regression/beir-v1.0.0-dbpedia-entity-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-dbpedia-entity-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-dbpedia-entity-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 4635922 documents (non-empty): 4635922 diff --git a/src/main/resources/regression/beir-v1.0.0-fever-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-fever-splade-pp-ed.yaml index d89b62b5b5..9773d349e0 100644 --- a/src/main/resources/regression/beir-v1.0.0-fever-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fever-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-fever-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 5416593 documents (non-empty): 5416593 diff --git a/src/main/resources/regression/beir-v1.0.0-fiqa-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-fiqa-splade-pp-ed.yaml index 9a68260fe9..fbd7201fa5 100644 --- a/src/main/resources/regression/beir-v1.0.0-fiqa-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-fiqa-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-fiqa-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 57638 documents (non-empty): 57638 diff --git a/src/main/resources/regression/beir-v1.0.0-hotpotqa-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-hotpotqa-splade-pp-ed.yaml index 51936f50f8..0a32b64d41 100644 --- a/src/main/resources/regression/beir-v1.0.0-hotpotqa-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-hotpotqa-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-hotpotqa-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 5233329 documents (non-empty): 5233329 diff --git a/src/main/resources/regression/beir-v1.0.0-nfcorpus-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-nfcorpus-splade-pp-ed.yaml index c9af0ee876..95dd6a684b 100644 --- a/src/main/resources/regression/beir-v1.0.0-nfcorpus-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nfcorpus-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-nfcorpus-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 3633 documents (non-empty): 3633 diff --git a/src/main/resources/regression/beir-v1.0.0-nq-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-nq-splade-pp-ed.yaml index 38203fd56e..88b35e0c00 100644 --- a/src/main/resources/regression/beir-v1.0.0-nq-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-nq-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-nq-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 2681468 documents (non-empty): 2681468 diff --git a/src/main/resources/regression/beir-v1.0.0-quora-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-quora-splade-pp-ed.yaml index 889f8cc1b8..7f9111ce54 100644 --- a/src/main/resources/regression/beir-v1.0.0-quora-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-quora-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-quora-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 522931 documents (non-empty): 522931 diff --git a/src/main/resources/regression/beir-v1.0.0-robust04-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-robust04-splade-pp-ed.yaml index bb1884a7f9..8812316f09 100644 --- a/src/main/resources/regression/beir-v1.0.0-robust04-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-robust04-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-robust04-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 528155 documents (non-empty): 528155 diff --git a/src/main/resources/regression/beir-v1.0.0-scidocs-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-scidocs-splade-pp-ed.yaml index be83aeb3e8..d9db28eedc 100644 --- a/src/main/resources/regression/beir-v1.0.0-scidocs-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scidocs-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-scidocs-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 25657 documents (non-empty): 25657 diff --git a/src/main/resources/regression/beir-v1.0.0-scifact-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-scifact-splade-pp-ed.yaml index 2ad1f3f4e5..c2069e2964 100644 --- a/src/main/resources/regression/beir-v1.0.0-scifact-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-scifact-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-scifact-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 5183 documents (non-empty): 5183 diff --git a/src/main/resources/regression/beir-v1.0.0-signal1m-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-signal1m-splade-pp-ed.yaml index c7f4b7e620..bf9cf569f9 100644 --- a/src/main/resources/regression/beir-v1.0.0-signal1m-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-signal1m-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-signal1m-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 2866316 documents (non-empty): 2866316 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-covid-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-trec-covid-splade-pp-ed.yaml index 4b90e23fd8..cab26733b2 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-covid-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-covid-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-trec-covid-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 171332 documents (non-empty): 171332 diff --git a/src/main/resources/regression/beir-v1.0.0-trec-news-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-trec-news-splade-pp-ed.yaml index c64a2464c8..2d95fc5920 100644 --- a/src/main/resources/regression/beir-v1.0.0-trec-news-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-trec-news-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-trec-news-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 594977 documents (non-empty): 594977 diff --git a/src/main/resources/regression/beir-v1.0.0-webis-touche2020-splade-pp-ed.yaml b/src/main/resources/regression/beir-v1.0.0-webis-touche2020-splade-pp-ed.yaml index 4eb0b9e634..735d4eb5b1 100644 --- a/src/main/resources/regression/beir-v1.0.0-webis-touche2020-splade-pp-ed.yaml +++ b/src/main/resources/regression/beir-v1.0.0-webis-touche2020-splade-pp-ed.yaml @@ -6,7 +6,7 @@ index_path: indexes/lucene-index.beir-v1.0.0-webis-touche2020-splade-pp-ed/ collection_class: JsonVectorCollection generator_class: DefaultLuceneDocumentGenerator index_threads: 16 -index_options: -impact -pretokenized -optimize +index_options: -impact -pretokenized index_stats: documents: 382545 documents (non-empty): 382545 diff --git a/src/test/java/io/anserini/search/topicreader/TopicReaderTest.java b/src/test/java/io/anserini/search/topicreader/TopicReaderTest.java index dadec3311c..f86dc59026 100755 --- a/src/test/java/io/anserini/search/topicreader/TopicReaderTest.java +++ b/src/test/java/io/anserini/search/topicreader/TopicReaderTest.java @@ -38,7 +38,7 @@ public void testIterateThroughAllEnums() { String path = topic.path; assertEquals(topic.readerClass, TopicReader.getTopicReaderClassByFile(path)); } - assertEquals(383, cnt); + assertEquals(412, cnt); } @Test