changes required for repo name change

kdricci · Sep 23, 2020 · 50d42fc · 50d42fc
1 parent 6cc3852
commit 50d42fc
Show file tree

Hide file tree

Showing 18 changed files with 37 additions and 37 deletions.
diff --git a/README.md b/README.md
@@ -42,19 +42,19 @@ For tf_idf prediction, you need to first calculate the idf score for your datase
 
 ## Pre-training
 To run pretraining :
-`bash sentence_encoders/scripts/pretrain_bert.sh --model-type [model type]`
+`bash olfmlm/scripts/pretrain_bert.sh --model-type [model type]`
 Where model type is the name of the model you want to train. If model type is one of the modes, it will train using mlm and that mode (if model type is mlm, it will train using just mlm).
 The --modes argument will override this default behaviour. If model type is not a specified mode, the--modes argument is required.
 
 ## Distributed Pretraining
 Use pretrain_bert_distributed.sh instead.
-`bash sentence_encoders/scripts/pretrain_bert_distributed.sh --model-type [model type]`
+`bash olfmlm/scripts/pretrain_bert_distributed.sh --model-type [model type]`
 
 ## Evaluation
 To run evaluation:
 You will need to convert the saved state dict of the required model using the convert_state_dict.py file.
 Then run:
-`python3 -m sentence_encoders.evaluate.main --exp_name [experiment name]`
+`python3 -m olfmlm.evaluate.main --exp_name [experiment name]`
 Where experiment name is the same as the model type above. If using a saved checkpoint instead of the best model, use the --checkpoint argument.
 
 

diff --git a/arguments.py b/arguments.py
@@ -19,7 +19,7 @@
 import os
 import re
 import torch
-from sentence_encoders.paths import bert_config_file, pretrained_path
+from olfmlm.paths import bert_config_file, pretrained_path
 
 def str2bool(v):
     if isinstance(v, bool):

diff --git a/configure_data.py b/configure_data.py
@@ -17,7 +17,7 @@
 
 import copy
 import torch
-from sentence_encoders import data_utils
+from olfmlm import data_utils
 
 class DataConfig:
 

diff --git a/data_utils/__init__.py b/data_utils/__init__.py
@@ -16,12 +16,12 @@
 import os
 import math
 
-from sentence_encoders.data_utils.samplers import DistributedBatchSampler
-from sentence_encoders.data_utils.datasets import json_dataset, csv_dataset, split_ds, ConcatDataset, SplitDataset, \
+from olfmlm.data_utils.samplers import DistributedBatchSampler
+from olfmlm.data_utils.datasets import json_dataset, csv_dataset, split_ds, ConcatDataset, SplitDataset, \
                                                   bert_dataset
-from sentence_encoders.data_utils.lazy_loader import exists_lazy, make_lazy, lazy_array_loader
-from sentence_encoders.data_utils.tokenization import Tokenization, CommandToken, Tokenizer, CharacterLevelTokenizer, BertWordPieceTokenizer, make_tokenizer
-import sentence_encoders.data_utils.corpora
+from olfmlm.data_utils.lazy_loader import exists_lazy, make_lazy, lazy_array_loader
+from olfmlm.data_utils.tokenization import Tokenization, CommandToken, Tokenizer, CharacterLevelTokenizer, BertWordPieceTokenizer, make_tokenizer
+import olfmlm.data_utils.corpora
 
 TRAIN_DATA = 0
 VAL_DATA = 1

diff --git a/data_utils/corpora.py b/data_utils/corpora.py
@@ -13,9 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """several datasets with preset arguments"""
-from sentence_encoders.data_utils.datasets import json_dataset, csv_dataset
+from olfmlm.data_utils.datasets import json_dataset, csv_dataset
 
-from sentence_encoders.paths import train_data_path
+from olfmlm.paths import train_data_path
 import os
 
 class wikipedia(json_dataset):

diff --git a/data_utils/datasets.py b/data_utils/datasets.py
@@ -38,8 +38,8 @@
 nltk.download('punkt')
 from nltk import tokenize
 
-from sentence_encoders.data_utils.lazy_loader import lazy_array_loader, exists_lazy, make_lazy
-from sentence_encoders.data_utils.tokenization import Tokenization
+from olfmlm.data_utils.lazy_loader import lazy_array_loader, exists_lazy, make_lazy
+from olfmlm.data_utils.tokenization import Tokenization
 
 def clean_tokens(text):
     """Runs basic whitespace cleaning and splitting on a piece of text."""

diff --git a/data_utils/make_dataset.py b/data_utils/make_dataset.py
@@ -12,7 +12,7 @@
 """
 
 
-from sentence_encoders import data_utils
+from olfmlm import data_utils
 from multiprocessing import Pool
 from blingfire import text_to_sentences
 from torch.utils import data

diff --git a/data_utils/tokenization.py b/data_utils/tokenization.py
@@ -23,7 +23,7 @@
 from nltk import tokenize as nltk_tokenize
 import sentencepiece as spm
 
-from sentence_encoders.data_utils.wordpiece import BertTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP
+from olfmlm.data_utils.wordpiece import BertTokenizer, PRETRAINED_VOCAB_ARCHIVE_MAP
 
 def make_tokenizer(tokenizer_type, corpus, model_path=None, vocab_size=None, model_type='bpe', pad_token=0, character_coverage=1.0, command_tokens=None, type_tokens=None, **kwargs):
     """

diff --git a/data_utils/wordpiece.py b/data_utils/wordpiece.py
@@ -22,7 +22,7 @@
 import unicodedata
 from io import open
 
-from sentence_encoders.data_utils.file_utils import cached_path
+from olfmlm.data_utils.file_utils import cached_path
 
 logger = logging.getLogger(__name__)
 

diff --git a/evaluate/utils/config.py b/evaluate/utils/config.py
@@ -121,7 +121,7 @@ def params_from_file(config_files: Union[str, Iterable[str]], overrides: str = N
     if isinstance(config_files, str):
         config_files = [config_files]
     for config_file in config_files:
-        config_file = os.path.join("sentence_encoders/evaluate/config", config_file)
+        config_file = os.path.join("olfmlm/evaluate/config", config_file)
         with open(config_file) as fd:
             log.info("Loading config from %s", config_file)
             config_string += fd.read()

diff --git a/idf.py b/idf.py
@@ -2,7 +2,7 @@
 Script to calculate the inverse document frequency (idf) used in tf-idf labels of a dataset.
 """
 
-from sentence_encoders import data_utils
+from olfmlm import data_utils
 import numpy as np
 from math import ceil, log
 from multiprocessing import Pool

diff --git a/model/model.py b/model/model.py
@@ -17,10 +17,10 @@
 
 import torch
 
-from sentence_encoders.model.modeling import BertConfig
-from sentence_encoders.model.modeling import BertLayerNorm
+from olfmlm.model.modeling import BertConfig
+from olfmlm.model.modeling import BertLayerNorm
 
-from sentence_encoders.model.new_models import Bert
+from olfmlm.model.new_models import Bert
 
 def get_params_for_weight_decay_optimization(module):
 

diff --git a/model/modeling.py b/model/modeling.py
@@ -34,7 +34,7 @@
 
 from torch.utils.checkpoint import checkpoint
 
-from sentence_encoders.data_utils.file_utils import cached_path
+from olfmlm.data_utils.file_utils import cached_path
 
 logger = logging.getLogger(__name__)
 

diff --git a/model/new_models.py b/model/new_models.py
@@ -1,7 +1,7 @@
 import numpy as np
 import torch
 from torch.nn import CrossEntropyLoss
-from sentence_encoders.model.modeling import *
+from olfmlm.model.modeling import *
 
 class BertSentHead(nn.Module):
     def __init__(self, config, num_classes=2):

diff --git a/optim/__init__.py b/optim/__init__.py
@@ -23,4 +23,4 @@
 from torch.optim import RMSprop
 from torch.optim import Optimizer
 from torch.optim import LBFGS
-from sentence_encoders.optim.adam import Adam
+from olfmlm.optim.adam import Adam
diff --git a/pretrain_bert.py b/pretrain_bert.py
@@ -24,16 +24,16 @@
 import psutil
 import torch
 
-from sentence_encoders.arguments import get_args
-from sentence_encoders.configure_data import configure_data
-from sentence_encoders.learning_rates import AnnealingLR
-from sentence_encoders.model import BertModel
-from sentence_encoders.model import get_params_for_weight_decay_optimization
-from sentence_encoders.model import DistributedDataParallel as DDP
-from sentence_encoders.optim import Adam
-from sentence_encoders.utils import Timers
-from sentence_encoders.utils import save_checkpoint
-from sentence_encoders.utils import load_checkpoint
+from olfmlm.arguments import get_args
+from olfmlm.configure_data import configure_data
+from olfmlm.learning_rates import AnnealingLR
+from olfmlm.model import BertModel
+from olfmlm.model import get_params_for_weight_decay_optimization
+from olfmlm.model import DistributedDataParallel as DDP
+from olfmlm.optim import Adam
+from olfmlm.utils import Timers
+from olfmlm.utils import save_checkpoint
+from olfmlm.utils import load_checkpoint
 
 
 def get_model(tokenizer, args):

diff --git a/scripts/pretrain_bert.sh b/scripts/pretrain_bert.sh
@@ -3,4 +3,4 @@
 RANK=0
 WORLD_SIZE=1
 
-python3 -m sentence_encoders.pretrain_bert "$@"
+python3 -m olfmlm.pretrain_bert "$@"
diff --git a/scripts/pretrain_bert_distributed.sh b/scripts/pretrain_bert_distributed.sh
@@ -9,7 +9,7 @@ NODE_RANK=0
 
 DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"
 
-python -m torch.distributed.launch $DISTRIBUTED_ARGS sentence_encoders/pretrain_bert.py "$@"
+python -m torch.distributed.launch $DISTRIBUTED_ARGS olfmlm/pretrain_bert.py "$@"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -9,7 +9,7 @@ NODE_RANK=0

		DISTRIBUTED_ARGS="--nproc_per_node $WORLD_SIZE --nnodes $NNODES --node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT"

		python -m torch.distributed.launch $DISTRIBUTED_ARGS sentence_encoders/pretrain_bert.py "$@"
		python -m torch.distributed.launch $DISTRIBUTED_ARGS olfmlm/pretrain_bert.py "$@"



Expand Down