diff --git a/README.md b/README.md index bba9112e5..cd8f1dadc 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,7 @@ Documentation is organized into reproduction matrices for different corpora that + [Mr.TyDi](https://castorini.github.io/pyserini/2cr/mrtydi.html) + [MIRACL](https://castorini.github.io/pyserini/2cr/miracl.html) + [Open-Domain Question Answering](https://castorini.github.io/pyserini/2cr/odqa.html) ++ [CIRAL](https://castorini.github.io/pyserini/2cr/ciral.html) For more details, see our paper on [Building a Culture of Reproducibility in Academic Research](https://arxiv.org/abs/2212.13534). diff --git a/docs/2cr/ciral.html b/docs/2cr/ciral.html index 47b370602..b1b709521 100644 --- a/docs/2cr/ciral.html +++ b/docs/2cr/ciral.html @@ -145,7 +145,7 @@
Evaluation commands:python -m pyserini.search.lucene \ --language ha \ - --topics ciral-v1.0-ha-dev-native \ + --topics ciral-v1.0-ha-test-b-native \ --index ciral-v1.0-ha \ - --output run.ciral.bm25-mono.ha.dev.txt \ + --output run.ciral.bm25-qt.ha.test-b.txt \ --batch 128 --threads 16 --bm25 --hits 1000
@@ -218,17 +218,17 @@+ -c -m ndcg_cut.20 ciral-v1.0-ha-test-b \ + run.ciral.bm25-qt.ha.test-b.txtpython -m pyserini.eval.trec_eval \ - -c -m ndcg_cut.20 ciral-v1.0-ha-dev \ - run.ciral.bm25-mono.ha.dev.txt
Evaluation commands:python -m pyserini.search.lucene \ --language so \ - --topics ciral-v1.0-so-dev-native \ + --topics ciral-v1.0-so-test-b-native \ --index ciral-v1.0-so \ - --output run.ciral.bm25-mono.so.dev.txt \ + --output run.ciral.bm25-qt.so.test-b.txt \ --batch 128 --threads 16 --bm25 --hits 1000
@@ -238,17 +238,17 @@+ -c -m ndcg_cut.20 ciral-v1.0-so-test-b \ + run.ciral.bm25-qt.so.test-b.txtpython -m pyserini.eval.trec_eval \ - -c -m ndcg_cut.20 ciral-v1.0-so-dev \ - run.ciral.bm25-mono.so.dev.txt
Evaluation commands:python -m pyserini.search.lucene \ --language sw \ - --topics ciral-v1.0-sw-dev-native \ + --topics ciral-v1.0-sw-test-b-native \ --index ciral-v1.0-sw \ - --output run.ciral.bm25-mono.sw.dev.txt \ + --output run.ciral.bm25-qt.sw.test-b.txt \ --batch 128 --threads 16 --bm25 --hits 1000
@@ -258,17 +258,17 @@+ -c -m ndcg_cut.20 ciral-v1.0-sw-test-b \ + run.ciral.bm25-qt.sw.test-b.txtpython -m pyserini.eval.trec_eval \ - -c -m ndcg_cut.20 ciral-v1.0-sw-dev \ - run.ciral.bm25-mono.sw.dev.txt
Evaluation commands:python -m pyserini.search.lucene \ --language yo \ - --topics ciral-v1.0-yo-dev-native \ + --topics ciral-v1.0-yo-test-b-native \ --index ciral-v1.0-yo \ - --output run.ciral.bm25-mono.yo.dev.txt \ + --output run.ciral.bm25-qt.yo.test-b.txt \ --batch 128 --threads 16 --bm25 --hits 1000
@@ -278,54 +278,34 @@+ -c -m ndcg_cut.20 ciral-v1.0-yo-test-b \ + run.ciral.bm25-qt.yo.test-b.txtpython -m pyserini.eval.trec_eval \ - -c -m ndcg_cut.20 ciral-v1.0-yo-dev \ - run.ciral.bm25-mono.yo.dev.txt
- | MRR@10, dev queries | -Hausa | -Somali | -Swahili | -Yoruba | -- | Avg | -||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
- | BM25 Monolingual (Human QT) | -0.3153 | -0.4000 | -0.1681 | -0.3833 | +BM25 Machine DT | +0.2124 | +0.2186 | +0.2582 | +0.3700 | - | 0.3167 | +0.2648 | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
-
+
-
CIRAL-
-
+
+
Command to generate run:
- Evaluation commands: + -c -m ndcg_cut.20 ciral-v1.0-ha-test-b \ + run.ciral.bm25-dt.ha.test-b.txt
+
Command to generate run:
- Evaluation commands: + -c -m ndcg_cut.20 ciral-v1.0-so-test-b \ + run.ciral.bm25-dt.so.test-b.txt
+
Command to generate run:
- Evaluation commands: + -c -m ndcg_cut.20 ciral-v1.0-sw-test-b \ + run.ciral.bm25-dt.sw.test-b.txt
+
+
+
+
Command to generate run:
+Evaluation commands: + ++ -c -m ndcg_cut.20 ciral-v1.0-yo-test-b \ + run.ciral.bm25-dt.yo.test-b.txt + |
+|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+ | mDPR (tied encoders), pre-FT w/ MS MARCO | +0.0397 | +0.0635 | +0.1227 | +0.1458 | ++ | 0.0929 | +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+
+
+
+
+
+
+
+
+
+
+
+
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+++Evaluation commands: + + ++ ++ |
+|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+ | Afriberta, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi | +0.2028 | +0.1682 | +0.2166 | +0.1157 | ++ | 0.1758 | +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+
+
+
+
+
+
+
+
+
+
+
+
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+++Evaluation commands: + + ++ ++ |
+|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+ | RRF Fusion of BM25 Machine DT and Afriberta-DPR | +0.2935 | +0.2878 | +0.3187 | +0.3435 | ++ | 0.3109 | +||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
+
+
+
+
+
+
+
+
+
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
@@ -429,7 +771,7 @@ ++Evaluation commands: + + + CIRAL
- | Recall@100, dev queries |
+ Recall@100, Test Set B |
Hausa |
Somali |
Swahili |
@@ -440,16 +782,16 @@ CIRAL
- | BM25 Monolingual (Human QT) |
-0.2760 |
-0.1850 |
-0.4742 |
-0.5114 |
+BM25 Human QT |
+0.3800 |
+0.3479 |
+0.4166 |
+0.6434 |
- | 0.3617 |
+0.4470 |
@@ -482,17 +824,17 @@ |
+CIRALEvaluation commands: @@ -502,17 +844,17 @@+ -c -m recall.100 ciral-v1.0-ha-test-b \ + run.ciral.bm25-qt.ha.test-b.txt CIRALEvaluation commands: @@ -522,17 +864,17 @@+ -c -m recall.100 ciral-v1.0-so-test-b \ + run.ciral.bm25-qt.so.test-b.txt CIRALEvaluation commands: @@ -542,17 +884,501 @@+ -c -m recall.100 ciral-v1.0-sw-test-b \ + run.ciral.bm25-qt.sw.test-b.txt CIRALEvaluation commands: + + + + + ++ -c -m recall.100 ciral-v1.0-yo-test-b \ + run.ciral.bm25-qt.yo.test-b.txt +
+ | BM25 Machine DT |
+0.4394 |
+0.4637 |
+0.4918 |
+0.7348 |
+
+ | 0.5324 |
+
+ |
+
+
+
+
+
+
+
+
+
+
+
+
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+++Evaluation commands: + + ++ ++
+ | mDPR (tied encoders), pre-FT w/ MS MARCO |
+0.1027 |
+0.1345 |
+0.3019 |
+0.3249 |
+
+ | 0.2160 |
+
+ |
+
+
+
+
+
+
+
+
+
+
+
+
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+++Evaluation commands: + + ++ ++
+ | Afriberta, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi |
+0.3900 |
+0.3558 |
+0.4608 |
+0.2907 |
+
+ | 0.3743 |
+
+ |
+
+
+
+
+
+
+
+
+
+
+
+
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+++Evaluation commands: + + ++ ++
+ | RRF Fusion of BM25 Machine DT and Afriberta-DPR |
+0.6007 |
+0.5618 |
+0.7007 |
+0.7525 |
+
+ | 0.6539 |
+
+ |
+
+
+
+
+
+
+
+
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
+ ++Evaluation commands: + + ++ ++
+Command to generate run:
+
+
@@ -584,13 +1410,13 @@ ++Evaluation commands: + + + Programmatic ExecutionRun all languages for a specific condition and show commands: -python -m pyserini.2cr.ciral --condition bm25-mono --display-commands +python -m pyserini.2cr.ciral --condition bm25-qt --display-commands Run a particular language for a specific condition and show commands: -python -m pyserini.2cr.ciral --condition bm25-mono --language somali --display-commands +python -m pyserini.2cr.ciral --condition bm25-qt --language somali --display-commands Run all languages for all conditions and show commands: @@ -604,7 +1430,7 @@Programmatic ExecutionFor a specific condition, just show the commands and do not run: -python -m pyserini.2cr.ciral --condition bm25-mono --display-commands --dry-run +python -m pyserini.2cr.ciral --condition bm25-qt --display-commands --dry-run This will generate exactly the commands for a specific condition above (corresponding to a row in the table). @@ -612,7 +1438,7 @@Programmatic ExecutionFor a specific condition and language, just show the commands and do not run: -python -m pyserini.2cr.ciral --condition bm25-mono --language somali --display-commands --dry-run +python -m pyserini.2cr.ciral --condition bm25-qt --language somali --display-commands --dry-run For all conditions, just show the commands and do not run and skip evaluation: @@ -624,7 +1450,7 @@Programmatic ExecutionFinally, to generate this page: -python -m pyserini.2cr.ciral --generate-report --output docs/2cr/ciral.html +python -m pyserini.2cr.ciral --generate-report --output docs/2cr/ciral.html --display-split test-b The output file ciral.html should be identical to this page. diff --git a/pyserini/2cr/ciral.py b/pyserini/2cr/ciral.py index 763ccfbda..bc07a421d 100644 --- a/pyserini/2cr/ciral.py +++ b/pyserini/2cr/ciral.py @@ -32,6 +32,7 @@ dense_batch_size = 512 sparse_threads = 16 sparse_batch_size = 128 +fusion_tag="rrf-afridpr-bmdt" languages = [ ['ha', 'hausa'], @@ -40,22 +41,24 @@ ['yo', 'yoruba'] ] -html_display = OrderedDict() -html_display['bm25-mono'] = 'BM25 Monolingual (Human QT)' - -## Other models to add +all_splits = { + 'test-a': 'Test Set A', + 'test-a-pools': 'Test Set A (Pools)', + 'test-b': 'Test Set B' +} -# html_display['bm25-qt'] = 'BM25 Machine QT' -# html_display['bm25-dt'] = 'BM25 Machine DT' -# html_display['mdpr-tied-pft-msmarco'] = 'mDPR (tied encoders), pre-FT w/ MS MARCO' -# html_display['mdpr-tied-pft-msmarco-ft-all'] = 'mDPR (tied encoders), pre-FT w/ MS MARCO FT w/ all Mr. TyDi' -# html_display['afriberta-pft-msmarco-ft-mrtydi'] = 'Afriberta, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi' +html_display = OrderedDict() +# html_display['bm25-mono'] = 'BM25 Monolingual (Human QT)' +html_display['bm25-qt'] = 'BM25 Human QT' +html_display['bm25-dt'] = 'BM25 Machine DT' +html_display['mdpr-tied-pft-msmarco'] = 'mDPR (tied encoders), pre-FT w/ MS MARCO' +html_display['afriberta-pft-msmarco-ft-mrtydi'] = 'Afriberta, pre-FT w/ MS MARCO FT w/ latin Mr. TyDi' +html_display['bm25-dt-afriberta-dpr-fusion'] = 'RRF Fusion of BM25 Machine DT and Afriberta-DPR' models = list(html_display) trec_eval_metric_definitions = { 'nDCG@20': '-c -m ndcg_cut.20', - 'MRR@10': '-c -M 10 -m recip_rank', 'R@100': '-c -m recall.100', } @@ -67,6 +70,7 @@ def format_run_command(raw): .replace('--index', '\\\n --index') \ .replace('--output ', '\\\n --output ') \ .replace('--runs', '\\\n --runs ') \ + .replace('--runtag', '\\\n --runtag ') \ .replace('--batch ', '\\\n --batch ') \ .replace('--threads 12', '--threads 12 \\\n ') @@ -168,41 +172,56 @@ def generate_report(args): yaml_data = yaml.safe_load(f) for condition in yaml_data['conditions']: name = condition['name'] + lang = name.split('.')[-1] eval_key = condition['eval_key'] cmd_template = condition['command'] + is_fusion = 'fusion' in name - split = 'dev' - - runfile = os.path.join(args.directory, f'run.ciral.{name}.{split}.txt') - cmd = Template(cmd_template).substitute(split=split, output=runfile, - sparse_threads=sparse_threads, sparse_batch_size=sparse_batch_size, - dense_threads=dense_threads, dense_batch_size=dense_batch_size) + display_split = args.display_split + + runfile = os.path.join(args.directory, f'run.ciral.{name}.{display_split}.txt') + if is_fusion: + bm25_dt_output = os.path.join(args.directory, + f'run.ciral.bm25-dt.{lang}.{display_split}.txt') + afriberta_dpr_output = os.path.join(args.directory, + f'run.ciral.afriberta-pft-msmarco-ft-mrtydi.{lang}.{display_split}.txt') + expected_args = dict(output=runfile, bm25_dt_output=bm25_dt_output, + afriberta_dpr_output=afriberta_dpr_output, fusion_tag=fusion_tag) + else: + expected_args = dict(split=display_split, output=runfile, + sparse_threads=sparse_threads, sparse_batch_size=sparse_batch_size, + dense_threads=dense_threads, dense_batch_size=dense_batch_size) + + # cmd = Template(cmd_template).substitute(split=display_split, output=runfile, + # sparse_threads=sparse_threads, sparse_batch_size=sparse_batch_size, + # dense_threads=dense_threads, dense_batch_size=dense_batch_size) + cmd = Template(cmd_template).substitute(**expected_args) commands[name] = format_run_command(cmd) - for expected in condition['splits'][0]['scores']: - for metric in expected: - table[name][split][metric] = expected[metric] + # for expected in condition['splits'][0]['scores']: + for split in condition['splits']: + if split['split'] == display_split: + for scores in split['scores']: + for metric in scores: + table[name][display_split][metric] = scores[metric] - eval_cmd = f'python -m pyserini.eval.trec_eval ' + \ - f'{trec_eval_metric_definitions[metric]} {eval_key}-{split} {runfile}' - eval_commands[name][metric] = format_eval_command(eval_cmd) + eval_cmd = f'python -m pyserini.eval.trec_eval ' + \ + f'{trec_eval_metric_definitions[metric]} {eval_key}-{display_split} {runfile}' + eval_commands[name][metric] = format_eval_command(eval_cmd) tables_html = [] # Build the table for nDCG@20, dev queries - html_rows = generate_table_rows(table, row_template, commands, eval_commands, 1, split, 'nDCG@20') + html_rows = generate_table_rows(table, row_template, commands, eval_commands, 1, display_split, 'nDCG@20') all_rows = '\n'.join(html_rows) - tables_html.append(Template(table_template).substitute(desc=f'nDCG@20, {split} queries', rows=all_rows)) - - # Build the table for MRR@10, dev queries - html_rows = generate_table_rows(table, row_template, commands, eval_commands, 2, split, 'MRR@10') - all_rows = '\n'.join(html_rows) - tables_html.append(Template(table_template).substitute(desc=f'MRR@10, {split} queries', rows=all_rows)) + tables_html.append(Template(table_template).substitute(desc=f'nDCG@20, {all_splits[display_split]}', + rows=all_rows)) # Build the table for R@100, dev queries - html_rows = generate_table_rows(table, row_template, commands, eval_commands, 3, split, 'R@100') + html_rows = generate_table_rows(table, row_template, commands, eval_commands, 3, display_split, 'R@100') all_rows = '\n'.join(html_rows) - tables_html.append(Template(table_template).substitute(desc=f'Recall@100, {split} queries', rows=all_rows)) + tables_html.append(Template(table_template).substitute(desc=f'Recall@100, {all_splits[display_split]}', + rows=all_rows)) with open(args.output, 'w') as out: out.write(Template(html_template).substitute(title='CIRAL', tables=' '.join(tables_html))) @@ -232,49 +251,63 @@ def run_conditions(args): eval_key = condition['eval_key'] cmd_template = condition['command'] - split = "dev" - - print(f' - split: {split}') + # split = "dev" + print(f'condition {name}:') + is_fusion = 'fusion' in name + + for splits in condition['splits']: + split = splits['split'] + print(f' - split: {split}') + + if split.endswith('pools'): + test_split = "test-a" + else: + test_split = split + runfile = os.path.join(args.directory, f'run.ciral.{name}.{split}.txt') + if is_fusion: + bm25_dt_output = os.path.join(args.directory, + f'run.ciral.bm25-dt.{lang}.{split}.txt') + afriberta_dpr_output = os.path.join(args.directory, + f'run.ciral.afriberta-pft-msmarco-ft-mrtydi.{lang}.{split}.txt') + cmd = Template(cmd_template).substitute(split=test_split, output=runfile, + bm25_dt_output=bm25_dt_output, afriberta_dpr_output=afriberta_dpr_output, fusion_tag=fusion_tag) + else: + cmd = Template(cmd_template).substitute(split=test_split, output=runfile, + sparse_threads=sparse_threads, sparse_batch_size=sparse_batch_size, + dense_threads=dense_threads, dense_batch_size=dense_batch_size) - runfile = os.path.join(args.directory, f'run.ciral.{name}.{split}.txt') - cmd = Template(cmd_template).substitute(split=split, output=runfile, - sparse_threads=sparse_threads, sparse_batch_size=sparse_batch_size, - dense_threads=dense_threads, dense_batch_size=dense_batch_size) - - if args.display_commands: - print(f'\n```bash\n{format_run_command(cmd)}\n```\n') - - if not os.path.exists(runfile): - if not args.dry_run: - os.system(cmd) - - - for expected in condition['splits'][0]['scores']: - for metric in expected: - if not args.skip_eval: - if not os.path.exists(runfile): - continue - score = float(run_eval_and_return_metric(metric, f'{eval_key}-{split}', - trec_eval_metric_definitions[metric], runfile)) - if math.isclose(score, float(expected[metric])): - result_str = ok_str + if args.display_commands: + print(f'\n```bash\n{format_run_command(cmd)}\n```\n') + + if not os.path.exists(runfile): + if not args.dry_run: + os.system(cmd) + + for expected in splits['scores']: + for metric in expected: + if not args.skip_eval: + if not os.path.exists(runfile): + continue + score = float(run_eval_and_return_metric(metric, f'{eval_key}-{split}', + trec_eval_metric_definitions[metric], runfile)) + if math.isclose(score, float(expected[metric])): + result_str = ok_str + else: + result_str = fail_str + f' expected {expected[metric]:.4f}' + print(f' {metric:7}: {score:.4f} {result_str}') + table[name][split][metric] = score else: - result_str = fail_str + f' expected {expected[metric]:.4f}' - print(f' {metric:7}: {score:.4f} {result_str}') - table[name][split][metric] = score - else: - table[name][split][metric] = expected[metric] + table[name][split][metric] = expected[metric] - print('') + print('') - for metric in ['nDCG@20', 'MRR@10', 'R@100']: - for split in ['dev']: # To add test later + for metric in ['nDCG@20', 'R@100']: + for split in ['test-a', 'test-b']: # To add test later print_results(table, metric, split) end = time.time() print(f'Total elapsed time: {end - start:.0f}s') - if __name__ == '__main__': parser = argparse.ArgumentParser(description='Generate regression matrix for CIRAL.') parser.add_argument('--condition', type=str, @@ -283,6 +316,8 @@ def run_conditions(args): parser.add_argument('--list-conditions', action='store_true', default=False, help='List available conditions.') # For generating reports parser.add_argument('--generate-report', action='store_true', default=False, help='Generate report.') + parser.add_argument('--display-split', type=str, help='Split to generate report on.', + default='test-b', required=False) parser.add_argument('--output', type=str, help='File to store report.', required=False) # For actually running the experimental conditions parser.add_argument('--all', action='store_true', default=False, help='Run using all languages.') diff --git a/pyserini/2cr/ciral.yaml b/pyserini/2cr/ciral.yaml index 05e882274..1cdaa72a6 100644 --- a/pyserini/2cr/ciral.yaml +++ b/pyserini/2cr/ciral.yaml @@ -1,58 +1,334 @@ conditions: - # BM25 Monolingual - - name: bm25-mono.ha + # BM25 QT + - name: bm25-qt.ha eval_key: ciral-v1.0-ha - command: python -m pyserini.search.lucene --language ha --topics ciral-v1.0-ha-${split}-native --index ciral-v1.0-ha --output $output --batch ${sparse_batch_size} --threads ${sparse_threads} --bm25 --hits 1000 + command: python -m pyserini.search.lucene --language ha --topics ciral-v1.0-ha-${split}-native --index ciral-v1.0-ha --output $output --threads ${sparse_threads} --bm25 --hits 1000 splits: - - split: dev + - split: test-a scores: - - nDCG@20: 0.2039 - MRR@10: 0.3153 - R@100: 0.2760 - - split: test + - nDCG@20: 0.1656 + R@100: 0.2874 + - split: test-a-pools scores: - - nDCG@20: - MRR@10: - R@100: - - name: bm25-mono.so + - nDCG@20: 0.1161 + R@100: 0.1916 + - split: test-b + scores: + - nDCG@20: 0.2121 + R@100: 0.3800 + - name: bm25-qt.so eval_key: ciral-v1.0-so command: python -m pyserini.search.lucene --language so --topics ciral-v1.0-so-${split}-native --index ciral-v1.0-so --output $output --batch ${sparse_batch_size} --threads ${sparse_threads} --bm25 --hits 1000 splits: - - split: dev + - split: test-a scores: - - nDCG@20: 0.1500 - MRR@10: 0.4000 - R@100: 0.1850 - - split: test - scores: - - nDCG@20: - MRR@10: - R@100: - - name: bm25-mono.sw + - nDCG@20: 0.1214 + R@100: 0.2615 + - split: test-a-pools + scores: + - nDCG@20: 0.1232 + R@100: 0.1923 + - split: test-b + scores: + - nDCG@20: 0.1725 + R@100: 0.3479 + - name: bm25-qt.sw eval_key: ciral-v1.0-sw command: python -m pyserini.search.lucene --language sw --topics ciral-v1.0-sw-${split}-native --index ciral-v1.0-sw --output $output --batch ${sparse_batch_size} --threads ${sparse_threads} --bm25 --hits 1000 splits: - - split: dev + - split: test-a + scores: + - nDCG@20: 0.1720 + R@100: 0.4161 + - split: test-a-pools scores: - - nDCG@20: 0.1812 - MRR@10: 0.1681 - R@100: 0.4742 - - split: test + - nDCG@20: 0.1500 + R@100: 0.2430 + - split: test-b scores: - - nDCG@20: - MRR@10: - R@100: - - name: bm25-mono.yo + - nDCG@20: 0.1727 + R@100: 0.4166 + - name: bm25-qt.yo eval_key: ciral-v1.0-yo command: python -m pyserini.search.lucene --language yo --topics ciral-v1.0-yo-${split}-native --index ciral-v1.0-yo --output $output --batch ${sparse_batch_size} --threads ${sparse_threads} --bm25 --hits 1000 splits: - - split: dev + - split: test-a + scores: + - nDCG@20: 0.4023 + R@100: 0.6659 + - split: test-a-pools + scores: + - nDCG@20: 0.3118 + R@100: 0.4899 + - split: test-b + scores: + - nDCG@20: 0.3459 + R@100: 0.6434 + +# BM25 DT + - name: bm25-dt.ha + eval_key: ciral-v1.0-ha + command: python -m pyserini.search.lucene --topics ciral-v1.0-ha-${split} --index ciral-v1.0-ha-en --output $output --batch ${sparse_batch_size} --threads ${sparse_threads} --bm25 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.1619 + R@100: 0.4099 + - split: test-a-pools + scores: + - nDCG@20: 0.2142 + R@100: 0.4039 + - split: test-b + scores: + - nDCG@20: 0.2124 + R@100: 0.4394 + - name: bm25-dt.so + eval_key: ciral-v1.0-so + command: python -m pyserini.search.lucene --topics ciral-v1.0-so-${split} --index ciral-v1.0-so-en --output $output --batch ${sparse_batch_size} --threads ${sparse_threads} --bm25 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.1590 + R@100: 0.3904 + - split: test-a-pools + scores: + - nDCG@20: 0.2461 + R@100: 0.4379 + - split: test-b + scores: + - nDCG@20: 0.2186 + R@100: 0.4637 + - name: bm25-dt.sw + eval_key: ciral-v1.0-sw + command: python -m pyserini.search.lucene --topics ciral-v1.0-sw-${split} --index ciral-v1.0-sw-en --output $output --batch ${sparse_batch_size} --threads ${sparse_threads} --bm25 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.2033 + R@100: 0.4786 + - split: test-a-pools + scores: + - nDCG@20: 0.2327 + R@100: 0.3636 + - split: test-b + scores: + - nDCG@20: 0.2582 + R@100: 0.4918 + - name: bm25-dt.yo + eval_key: ciral-v1.0-yo + command: python -m pyserini.search.lucene --topics ciral-v1.0-yo-${split} --index ciral-v1.0-yo-en --output $output --batch ${sparse_batch_size} --threads ${sparse_threads} --bm25 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.4265 + R@100: 0.7832 + - split: test-a-pools + scores: + - nDCG@20: 0.4451 + R@100: 0.7199 + - split: test-b + scores: + - nDCG@20: 0.3700 + R@100: 0.7348 + +# mdpr-tied-pft-msmarco + - name: mdpr-tied-pft-msmarco.ha + eval_key: ciral-v1.0-ha + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics ciral-v1.0-ha-${split} --index ciral-v1.0-ha-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.0150 + R@100: 0.0845 + - split: test-a-pools + scores: + - nDCG@20: 0.0472 + R@100: 0.0947 + - split: test-b + scores: + - nDCG@20: 0.0397 + R@100: 0.1027 + - name: mdpr-tied-pft-msmarco.so + eval_key: ciral-v1.0-so + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics ciral-v1.0-so-${split} --index ciral-v1.0-so-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.0563 + R@100: 0.1253 + - split: test-a-pools + scores: + - nDCG@20: 0.0621 + R@100: 0.0988 + - split: test-b + scores: + - nDCG@20: 0.0635 + R@100: 0.1345 + - name: mdpr-tied-pft-msmarco.sw + eval_key: ciral-v1.0-sw + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics ciral-v1.0-sw-${split} --index ciral-v1.0-sw-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.0942 + R@100: 0.2655 + - split: test-a-pools + scores: + - nDCG@20: 0.1556 + R@100: 0.2117 + - split: test-b + scores: + - nDCG@20: 0.1227 + R@100: 0.3019 + - name: mdpr-tied-pft-msmarco.yo + eval_key: ciral-v1.0-yo + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/mdpr-tied-pft-msmarco --topics ciral-v1.0-yo-${split} --index ciral-v1.0-yo-mdpr-tied-pft-msmarco --output $output --batch 128 --threads 16 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.1776 + R@100: 0.3877 + - split: test-a-pools + scores: + - nDCG@20: 0.1819 + R@100: 0.3132 + - split: test-b + scores: + - nDCG@20: 0.1458 + R@100: 0.3249 + + +# afriberta-pft-msmarco-ft-mrtydi-latin + - name: afriberta-pft-msmarco-ft-mrtydi.ha + eval_key: ciral-v1.0-ha + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --topics ciral-v1.0-ha-${split} --index ciral-v1.0-ha-afriberta-dpr-ptf-msmarco-ft-mrtydi --output $output --batch 128 --threads 16 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.1864 + R@100: 0.4379 + - split: test-a-pools + scores: + - nDCG@20: 0.1726 + R@100: 0.2692 + - split: test-b + scores: + - nDCG@20: 0.2028 + R@100: 0.3900 + - name: afriberta-pft-msmarco-ft-mrtydi.so + eval_key: ciral-v1.0-so + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --topics ciral-v1.0-so-${split} --index ciral-v1.0-so-afriberta-dpr-ptf-msmarco-ft-mrtydi --output $output --batch 128 --threads 16 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.1878 + R@100: 0.4029 + - split: test-a-pools + scores: + - nDCG@20: 0.1345 + R@100: 0.2017 + - split: test-b + scores: + - nDCG@20: 0.1682 + R@100: 0.3558 + - name: afriberta-pft-msmarco-ft-mrtydi.sw + eval_key: ciral-v1.0-sw + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --topics ciral-v1.0-sw-${split} --index ciral-v1.0-sw-afriberta-dpr-ptf-msmarco-ft-mrtydi --output $output --batch 128 --threads 16 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.2311 + R@100: 0.4977 + - split: test-a-pools + scores: + - nDCG@20: 0.1602 + R@100: 0.2093 + - split: test-b + scores: + - nDCG@20: 0.2166 + R@100: 0.4608 + - name: afriberta-pft-msmarco-ft-mrtydi.yo + eval_key: ciral-v1.0-yo + command: python -m pyserini.search.faiss --encoder-class auto --encoder castorini/afriberta-dpr-ptf-msmarco-ft-latin-mrtydi --topics ciral-v1.0-yo-${split} --index ciral-v1.0-yo-afriberta-dpr-ptf-msmarco-ft-mrtydi --output $output --batch 128 --threads 16 --hits 1000 + splits: + - split: test-a + scores: + - nDCG@20: 0.1288 + R@100: 0.3421 + - split: test-a-pools + scores: + - nDCG@20: 0.0916 + R@100: 0.2262 + - split: test-b + scores: + - nDCG@20: 0.1157 + R@100: 0.2907 + +# fusion + - name: bm25-dt-afriberta-dpr-fusion.ha + eval_key: ciral-v1.0-ha + command: python -m pyserini.fusion --runs ${bm25_dt_output} ${afriberta_dpr_output} --runtag ${fusion_tag} --method rrf --rrf.k 60 --output $output + splits: + - split: test-a + scores: + - nDCG@20: 0.2842 + R@100: 0.6107 + - split: test-a-pools + scores: + - nDCG@20: 0.3108 + R@100: 0.4638 + - split: test-b + scores: + - nDCG@20: 0.2935 + R@100: 0.6007 + + - name: bm25-dt-afriberta-dpr-fusion.so + eval_key: ciral-v1.0-so + command: python -m pyserini.fusion --runs ${bm25_dt_output} ${afriberta_dpr_output} --runtag ${fusion_tag} --method rrf --rrf.k 60 --output $output + splits: + - split: test-a + scores: + - nDCG@20: 0.2608 + R@100: 0.5512 + - split: test-a-pools + scores: + - nDCG@20: 0.2860 + R@100: 0.4565 + - split: test-b + scores: + - nDCG@20: 0.2878 + R@100: 0.5618 + + - name: bm25-dt-afriberta-dpr-fusion.sw + eval_key: ciral-v1.0-sw + command: python -m pyserini.fusion --runs ${bm25_dt_output} ${afriberta_dpr_output} --runtag ${fusion_tag} --method rrf --rrf.k 60 --output $output + splits: + - split: test-a + scores: + - nDCG@20: 0.2716 + R@100: 0.7456 + - split: test-a-pools + scores: + - nDCG@20: 0.2821 + R@100: 0.4290 + - split: test-b + scores: + - nDCG@20: 0.3187 + R@100: 0.7007 + + - name: bm25-dt-afriberta-dpr-fusion.yo + eval_key: ciral-v1.0-yo + command: python -m pyserini.fusion --runs ${bm25_dt_output} ${afriberta_dpr_output} --runtag ${fusion_tag} --method rrf --rrf.k 60 --output $output + splits: + - split: test-a + scores: + - nDCG@20: 0.3843 + R@100: 0.8195 + - split: test-a-pools scores: - - nDCG@20: 0.2797 - MRR@10: 0.3833 - R@100: 0.5114 - - split: test + - nDCG@20: 0.3832 + R@100: 0.6960 + - split: test-b scores: - - nDCG@20: - MRR@10: - R@100: \ No newline at end of file + - nDCG@20: 0.3435 + R@100: 0.7525 \ No newline at end of file diff --git a/pyserini/2cr/ciral_html.template b/pyserini/2cr/ciral_html.template index 50cba14aa..f399b8d64 100644 --- a/pyserini/2cr/ciral_html.template +++ b/pyserini/2cr/ciral_html.template @@ -158,13 +158,13 @@ python -m pyserini.2cr.ciral --list-conditionsRun all languages for a specific condition and show commands: -python -m pyserini.2cr.ciral --condition bm25-mono --display-commands +python -m pyserini.2cr.ciral --condition bm25-qt --display-commands Run a particular language for a specific condition and show commands: -python -m pyserini.2cr.ciral --condition bm25-mono --language somali --display-commands +python -m pyserini.2cr.ciral --condition bm25-qt --language somali --display-commands Run all languages for all conditions and show commands: @@ -178,7 +178,7 @@ python -m pyserini.2cr.ciral --all --display-commandsFor a specific condition, just show the commands and do not run: -python -m pyserini.2cr.ciral --condition bm25-mono --display-commands --dry-run +python -m pyserini.2cr.ciral --condition bm25-qt --display-commands --dry-run This will generate exactly the commands for a specific condition above (corresponding to a row in the table). @@ -186,7 +186,7 @@ python -m pyserini.2cr.ciral --condition bm25-mono --display-commands --dry-runFor a specific condition and language, just show the commands and do not run: -python -m pyserini.2cr.ciral --condition bm25-mono --language somali --display-commands --dry-run +python -m pyserini.2cr.ciral --condition bm25-qt --language somali --display-commands --dry-run For all conditions, just show the commands and do not run and skip evaluation: @@ -198,7 +198,7 @@ python -m pyserini.2cr.ciral --all --display-commands --dry-run --skip-evalFinally, to generate this page: -python -m pyserini.2cr.ciral --generate-report --output docs/2cr/ciral.html +python -m pyserini.2cr.ciral --generate-report --output docs/2cr/ciral.html --display-split test-b The output file ciral.html should be identical to this page. diff --git a/pyserini/prebuilt_index_info.py b/pyserini/prebuilt_index_info.py index d320d2d56..17cf48d16 100644 --- a/pyserini/prebuilt_index_info.py +++ b/pyserini/prebuilt_index_info.py @@ -6461,7 +6461,7 @@ "size compressed (bytes)": 2023010322, "documents": 715355, "downloaded": False, - "texts": "ciral-v1.0-ha", + "texts": "ciral-v1.0-ha" }, "ciral-v1.0-so-mdpr-tied-pft-msmarco": { @@ -6475,7 +6475,7 @@ "size compressed (bytes)": 2356035617, "documents": 827552, "downloaded": False, - "texts": "ciral-v1.0-so", + "texts": "ciral-v1.0-so" }, "ciral-v1.0-sw-mdpr-tied-pft-msmarco": { @@ -6489,7 +6489,7 @@ "size compressed (bytes)": 2689039681, "documents": 949013, "downloaded": False, - "texts": "ciral-v1.0-sw", + "texts": "ciral-v1.0-sw" }, "ciral-v1.0-yo-mdpr-tied-pft-msmarco": { @@ -6503,7 +6503,7 @@ "size compressed (bytes)": 233478865, "documents": 82095, "downloaded": False, - "texts": "ciral-v1.0-yo", + "texts": "ciral-v1.0-yo" }, "ciral-v1.0-ha-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi": { @@ -6517,7 +6517,7 @@ "size compressed (bytes)": 2023992537, "documents": 715355, "downloaded": False, - "texts": "ciral-v1.0-ha", + "texts": "ciral-v1.0-ha" }, "ciral-v1.0-so-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi": { @@ -6531,7 +6531,7 @@ "size compressed (bytes)": 2356542056, "documents": 827552, "downloaded": False, - "texts": "ciral-v1.0-so", + "texts": "ciral-v1.0-so" }, "ciral-v1.0-sw-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi": { @@ -6545,7 +6545,7 @@ "size compressed (bytes)": 2688836963, "documents": 949013, "downloaded": False, - "texts": "ciral-v1.0-sw", + "texts": "ciral-v1.0-sw" }, "ciral-v1.0-yo-afriberta-dpr-ptf-msmarco-ft-latin-mrtydi": { @@ -6559,9 +6559,9 @@ "size compressed (bytes)": 233490972, "documents": 82095, "downloaded": False, - "texts": "ciral-v1.0-yo", + "texts": "ciral-v1.0-yo" }, -}, +} FAISS_INDEX_INFO_WIKIPEDIA = { "wikipedia-dpr-100w.dpr-multi": { @@ -6985,4 +6985,5 @@ **FAISS_INDEX_INFO_MRTYDI, **FAISS_INDEX_INFO_MIRACL, **FAISS_INDEX_INFO_WIKIPEDIA, + **FAISS_INDEX_INFO_CIRAL, **FAISS_INDEX_INFO_OTHER} diff --git a/pyserini/search/_base.py b/pyserini/search/_base.py index b306c20b4..2dd77a389 100644 --- a/pyserini/search/_base.py +++ b/pyserini/search/_base.py @@ -357,6 +357,22 @@ 'atomic-v0.2-facebook.flava-full-image-validation': JTopics.ATOMIC_V021_FACEBOOK_FLAVA_FULL_IMAGE_VAL, # CIRAL topics + 'ciral-v1.0-ha-test-a': JTopics.CIRAL_V10_HA_TEST_A, + 'ciral-v1.0-so-test-a': JTopics.CIRAL_V10_SO_TEST_A, + 'ciral-v1.0-sw-test-a': JTopics.CIRAL_V10_SW_TEST_A, + 'ciral-v1.0-yo-test-a': JTopics.CIRAL_V10_YO_TEST_A, + 'ciral-v1.0-ha-test-a-native': JTopics.CIRAL_V10_HA_TEST_A_NATIVE, + 'ciral-v1.0-so-test-a-native': JTopics.CIRAL_V10_SO_TEST_A_NATIVE, + 'ciral-v1.0-sw-test-a-native': JTopics.CIRAL_V10_SW_TEST_A_NATIVE, + 'ciral-v1.0-yo-test-a-native': JTopics.CIRAL_V10_YO_TEST_A_NATIVE, + 'ciral-v1.0-ha-test-b': JTopics.CIRAL_V10_HA_TEST_B, + 'ciral-v1.0-so-test-b': JTopics.CIRAL_V10_SO_TEST_B, + 'ciral-v1.0-sw-test-b': JTopics.CIRAL_V10_SW_TEST_B, + 'ciral-v1.0-yo-test-b': JTopics.CIRAL_V10_YO_TEST_B, + 'ciral-v1.0-ha-test-b-native': JTopics.CIRAL_V10_HA_TEST_B_NATIVE, + 'ciral-v1.0-so-test-b-native': JTopics.CIRAL_V10_SO_TEST_B_NATIVE, + 'ciral-v1.0-sw-test-b-native': JTopics.CIRAL_V10_SW_TEST_B_NATIVE, + 'ciral-v1.0-yo-test-b-native': JTopics.CIRAL_V10_YO_TEST_B_NATIVE, 'ciral-v1.0-ha-dev-native': JTopics.CIRAL_V10_HA_DEV_MONO, 'ciral-v1.0-so-dev-native': JTopics.CIRAL_V10_SO_DEV_MONO, 'ciral-v1.0-sw-dev-native': JTopics.CIRAL_V10_SW_DEV_MONO, @@ -510,6 +526,18 @@ 'ciral-v1.0-so-dev': JQrels.CIRAL_V10_SO_DEV, 'ciral-v1.0-sw-dev': JQrels.CIRAL_V10_SW_DEV, 'ciral-v1.0-yo-dev': JQrels.CIRAL_V10_YO_DEV, + 'ciral-v1.0-ha-test-a': JQrels.CIRAL_V10_HA_TEST_A, + 'ciral-v1.0-so-test-a': JQrels.CIRAL_V10_SO_TEST_A, + 'ciral-v1.0-sw-test-a': JQrels.CIRAL_V10_SW_TEST_A, + 'ciral-v1.0-yo-test-a': JQrels.CIRAL_V10_YO_TEST_A, + 'ciral-v1.0-ha-test-a-pools': JQrels.CIRAL_V10_HA_TEST_A_POOLS, + 'ciral-v1.0-so-test-a-pools': JQrels.CIRAL_V10_SO_TEST_A_POOLS, + 'ciral-v1.0-sw-test-a-pools': JQrels.CIRAL_V10_SW_TEST_A_POOLS, + 'ciral-v1.0-yo-test-a-pools': JQrels.CIRAL_V10_YO_TEST_A_POOLS, + 'ciral-v1.0-ha-test-b': JQrels.CIRAL_V10_HA_TEST_B, + 'ciral-v1.0-so-test-b': JQrels.CIRAL_V10_SO_TEST_B, + 'ciral-v1.0-sw-test-b': JQrels.CIRAL_V10_SW_TEST_B, + 'ciral-v1.0-yo-test-b': JQrels.CIRAL_V10_YO_TEST_B } diff --git a/tools b/tools index 03ebff1c8..d4f2be22d 160000 --- a/tools +++ b/tools @@ -1 +1 @@ -Subproject commit 03ebff1c854dae8a59e53e58fd54ff0b035ded24 +Subproject commit d4f2be22d4a9d19ef375a700ea1068a0dc877051 |