Skip to content

Commit

Permalink
Refactor and prune dependencies (#1997)
Browse files Browse the repository at this point in the history
Starts to untangles "core" dependencies (tests/, integrations/) from "optional" dependencies (tests-optional/, integrations-optional/). faiss is considered optional.
  • Loading branch information
lintool authored Oct 8, 2024
1 parent 67d07a0 commit 7ed8369
Show file tree
Hide file tree
Showing 116 changed files with 907 additions and 624 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
import os
import unittest

from integrations.utils import clean_files, run_command, parse_score, parse_score_qa, parse_score_msmarco
from pyserini.search import QueryEncoder
from integrations.utils import clean_files, run_command, parse_score_qa, parse_score_msmarco
from pyserini.search import get_topics
from pyserini.search.faiss._searcher import QueryEncoder


class TestAnce(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
import unittest

from integrations.utils import clean_files, run_command, parse_score_qa
from pyserini.search import QueryEncoder
from pyserini.search import get_topics
from pyserini.search.faiss._searcher import QueryEncoder


class TestDpr(unittest.TestCase):
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
import unittest

from integrations.utils import clean_files, run_command, parse_score
from pyserini.search import QueryEncoder
from pyserini.search import get_topics
from pyserini.search.faiss._searcher import QueryEncoder


class TestTctColBert(unittest.TestCase):
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from integrations.utils import clean_files, run_command, parse_score_msmarco
from pyserini.dsearch import SimpleDenseSearcher, TctColBertQueryEncoder
from pyserini.hsearch import HybridSearcher
from pyserini.index import IndexReader
from pyserini.search import SimpleSearcher
from pyserini.index.lucene import LuceneIndexReader
from pyserini.search import get_topics, get_qrels
from pyserini.search._deprecated import SimpleSearcher


class TestSIGIR2021(unittest.TestCase):
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_figure5(self):
"""Sample code in Figure 5."""

# Initialize from a pre-built index:
reader = IndexReader.from_prebuilt_index('robust04')
reader = LuceneIndexReader.from_prebuilt_index('robust04')

terms = reader.terms()
term = next(terms)
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion integrations/clprf/test_clprf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from integrations.lucenesearcher_score_checker import LuceneSearcherScoreChecker
from integrations.utils import run_command, parse_score
from pyserini.search import LuceneSearcher
from pyserini.search.lucene import LuceneSearcher


class TestSearchIntegration(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion integrations/sparse/test_lucenesearcher_check_core17.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import unittest

from integrations.lucenesearcher_anserini_checker import LuceneSearcherAnseriniMatchChecker
from pyserini.search import LuceneSearcher
from pyserini.search.lucene import LuceneSearcher


class CheckSearchResultsAgainstAnseriniForCore17(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion integrations/sparse/test_lucenesearcher_check_core18.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import unittest

from integrations.lucenesearcher_anserini_checker import LuceneSearcherAnseriniMatchChecker
from pyserini.search import LuceneSearcher
from pyserini.search.lucene import LuceneSearcher


class CheckSearchResultsAgainstAnseriniForCore18(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion integrations/sparse/test_lucenesearcher_check_robust04.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import unittest

from integrations.lucenesearcher_anserini_checker import LuceneSearcherAnseriniMatchChecker
from pyserini.search import LuceneSearcher
from pyserini.search.lucene import LuceneSearcher


class CheckSearchResultsAgainstAnseriniForRobust04(unittest.TestCase):
Expand Down
2 changes: 1 addition & 1 deletion integrations/sparse/test_lucenesearcher_check_robust05.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import unittest

from integrations.lucenesearcher_anserini_checker import LuceneSearcherAnseriniMatchChecker
from pyserini.search import LuceneSearcher
from pyserini.search.lucene import LuceneSearcher


class CheckSearchResultsAgainstAnseriniForRobust05(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from pyserini.fusion import FusionMethod
from pyserini.search import get_topics
from pyserini.search import LuceneFusionSearcher
from pyserini.search.lucene import LuceneFusionSearcher
from pyserini.trectools import TrecRun
from pyserini.util import download_url, download_and_unpack_index

Expand Down
25 changes: 21 additions & 4 deletions pyserini/2cr/atomic.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,31 @@
#
# Pyserini: Reproducible IR research with sparse and dense representations
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import importlib.resources
import math
import os
import sys
import time
from collections import defaultdict
from string import Template
import importlib.resources
import time

import yaml
import math
from ._base import run_eval_and_return_metric, ok_str, fail_str

from ._base import run_eval_and_return_metric, ok_str, fail_str

atomic_models = [
'ViT-L-14.laion2b_s32b_b82k',
Expand Down
2 changes: 1 addition & 1 deletion pyserini/2cr/beir.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#

import argparse
import importlib.resources
import math
import os
import sys
Expand All @@ -23,7 +24,6 @@
from datetime import datetime
from string import Template

import importlib.resources
import yaml

from ._base import run_eval_and_return_metric, ok_str, okish_str, fail_str
Expand Down
4 changes: 2 additions & 2 deletions pyserini/2cr/ciral.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@
#

import argparse
import importlib.resources
import math
import os
import sys
import time
import importlib.resources
from collections import defaultdict, OrderedDict
from datetime import datetime
from string import Template

import yaml

from ._base import run_eval_and_return_metric, ok_str, okish_str, fail_str
from ._base import run_eval_and_return_metric, ok_str, fail_str

dense_threads = 16
dense_batch_size = 512
Expand Down
2 changes: 1 addition & 1 deletion pyserini/2cr/miracl.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#

import argparse
import importlib.resources
import math
import os
import subprocess
Expand All @@ -24,7 +25,6 @@
from datetime import datetime
from string import Template

import importlib.resources
import yaml

from ._base import run_eval_and_return_metric, ok_str, okish_str, fail_str
Expand Down
2 changes: 1 addition & 1 deletion pyserini/2cr/mrtydi.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#

import argparse
import importlib.resources
import math
import os
import sys
Expand All @@ -23,7 +24,6 @@
from datetime import datetime
from string import Template

import importlib.resources
import yaml

from ._base import run_eval_and_return_metric, ok_str, okish_str, fail_str
Expand Down
2 changes: 1 addition & 1 deletion pyserini/2cr/msmarco.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#

import argparse
import importlib.resources
import math
import os
import re
Expand All @@ -24,7 +25,6 @@
from datetime import datetime
from string import Template

import importlib.resources
import yaml

from ._base import run_eval_and_return_metric, ok_str, okish_str, fail_str
Expand Down
2 changes: 1 addition & 1 deletion pyserini/2cr/odqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#

import argparse
import importlib.resources
import math
import os
import sys
Expand All @@ -23,7 +24,6 @@
from datetime import datetime
from string import Template

import importlib.resources
import yaml

from ._base import run_dpr_retrieval_eval_and_return_metric, convert_trec_run_to_dpr_retrieval_json, run_fusion, ok_str, \
Expand Down
2 changes: 0 additions & 2 deletions pyserini/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,3 @@
#

from ._base import get_lucene_analyzer, Analyzer, JAnalyzer, JAnalyzerUtils, JDefaultEnglishAnalyzer, JWhiteSpaceAnalyzer

__all__ = ['get_lucene_analyzer', 'Analyzer', 'JAnalyzer', 'JAnalyzerUtils', 'JDefaultEnglishAnalyzer', 'JWhiteSpaceAnalyzer']
2 changes: 1 addition & 1 deletion pyserini/analysis/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from typing import List

from ..pyclass import autoclass
from pyserini.pyclass import autoclass

# Wrappers around Lucene classes
JAnalyzer = autoclass('org.apache.lucene.analysis.Analyzer')
Expand Down
2 changes: 0 additions & 2 deletions pyserini/collection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,3 @@

from ._base import Collection, FileSegment, SourceDocument
from ._collection_support import Cord19Article

__all__ = ['Collection', 'FileSegment', 'SourceDocument', 'Cord19Article']
8 changes: 5 additions & 3 deletions pyserini/demo/acl.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@
--port [PORT] --hits [Number of hits]
--k1 [BM25 k1] --b [BM25 b] --device [cpu, cuda]
"""
import json

import logging
from argparse import ArgumentParser
from functools import partial
from typing import Callable, Optional, Tuple, Union

from flask import Flask, render_template, request, flash, jsonify
from pyserini.search import LuceneSearcher, FaissSearcher, AutoQueryEncoder
from flask import Flask, render_template, request, flash

from pyserini.search.faiss import FaissSearcher
from pyserini.search.lucene import LuceneSearcher

logging.basicConfig(
format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
Expand Down
4 changes: 3 additions & 1 deletion pyserini/demo/atomic.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@
--port [PORT] --hits [Number of hits] --index [BM25 or {dense retrieval flag}]
--k1 [BM25 k1] --b [BM25 b] --device [cpu, cuda]
"""

import json
from argparse import ArgumentParser
from functools import partial
from typing import Callable, Optional, Tuple, Union

from flask import Flask, render_template, request, flash, jsonify
from pyserini.search import LuceneSearcher, FaissSearcher

from pyserini.search.faiss import FaissSearcher
from pyserini.search.lucene import LuceneSearcher

INDEX_NAMES = (
'atomic_image_v0.2_small_validation',
Expand Down
8 changes: 4 additions & 4 deletions pyserini/demo/dpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@
import json
import random

from pyserini.search.lucene import LuceneSearcher
from pyserini.search import get_topics
from pyserini.search.faiss import FaissSearcher, DprQueryEncoder
from pyserini.search.hybrid import HybridSearcher
from pyserini import search
from pyserini.search.lucene import LuceneSearcher


class DPRDemo(cmd.Cmd):
nq_dev_topics = list(search.get_topics('dpr-nq-dev').values())
trivia_dev_topics = list(search.get_topics('dpr-trivia-dev').values())
nq_dev_topics = list(get_topics('dpr-nq-dev').values())
trivia_dev_topics = list(get_topics('dpr-trivia-dev').values())

ssearcher = LuceneSearcher.from_prebuilt_index('wikipedia-dpr')
searcher = ssearcher
Expand Down
6 changes: 5 additions & 1 deletion pyserini/demo/miracl.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,18 @@
--port [PORT] --hits [Number of hits] --index [BM25 or mdpr-tied-pft-msmarco]
--k1 [BM25 k1] --b [BM25 b] --device [cpu, cuda]
"""

import json
import logging
from argparse import ArgumentParser
from functools import partial
from typing import Callable, Optional, Tuple, Union

from flask import Flask, render_template, request, flash, jsonify
from pyserini.search import LuceneSearcher, FaissSearcher, AutoQueryEncoder

from pyserini.encode import AutoQueryEncoder
from pyserini.search.faiss import FaissSearcher
from pyserini.search.lucene import LuceneSearcher

logging.basicConfig(
format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
Expand Down
7 changes: 3 additions & 4 deletions pyserini/demo/msmarco.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,16 @@

import cmd
import json
import os
import random

from pyserini.search.lucene import LuceneSearcher
from pyserini.search import get_topics
from pyserini.search.faiss import FaissSearcher, TctColBertQueryEncoder, AnceQueryEncoder
from pyserini.search.hybrid import HybridSearcher
from pyserini import search
from pyserini.search.lucene import LuceneSearcher


class MsMarcoDemo(cmd.Cmd):
dev_topics = list(search.get_topics('msmarco-passage-dev-subset').values())
dev_topics = list(get_topics('msmarco-passage-dev-subset').values())

ssearcher = LuceneSearcher.from_prebuilt_index('msmarco-passage')
dsearcher = None
Expand Down
8 changes: 4 additions & 4 deletions pyserini/dsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,20 @@
import os
import sys

import pyserini.search.faiss
from pyserini.search.faiss import TctColBertQueryEncoder
from pyserini.search.faiss import FaissSearcher
from pyserini.search.faiss._searcher import TctColBertQueryEncoder, BinaryDenseSearcher

__all__ = ['SimpleDenseSearcher', 'BinaryDenseSearcher', 'TctColBertQueryEncoder']


class SimpleDenseSearcher(pyserini.search.faiss.FaissSearcher):
class SimpleDenseSearcher(FaissSearcher):
def __new__(cls, *args, **kwargs):
print('pyserini.dsearch.SimpleDenseSearcher class has been deprecated, '
'please use FaissSearcher from pyserini.search.faiss instead')
return super().__new__(cls)


class BinaryDenseSearcher(pyserini.search.faiss.BinaryDenseSearcher):
class BinaryDenseSearcher(BinaryDenseSearcher):
def __new__(cls, *args, **kwargs):
print('pyserini.dsearch.BinaryDenseSearcher class has been deprecated, '
'please use BinaryDenseSearcher from pyserini.search.faiss instead')
Expand Down
Loading

0 comments on commit 7ed8369

Please sign in to comment.