Skip to content

Commit

Permalink
Fix quality due to ruff release
Browse files Browse the repository at this point in the history
  • Loading branch information
sgugger committed Mar 23, 2023
1 parent 73fdc8c commit ef28df0
Show file tree
Hide file tree
Showing 28 changed files with 40 additions and 58 deletions.
8 changes: 3 additions & 5 deletions examples/flax/language-modeling/run_bart_dlm_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,15 +319,13 @@ def permute_sentences(self, input_ids):
sentence_ends = np.argwhere(end_sentence_mask)
sentence_ends[:, 1] += 1
example_has_multiple_sentences, num_sentences = np.unique(sentence_ends[:, 0], return_counts=True)
num_sentences_map = {sent_idx: count for sent_idx, count in zip(example_has_multiple_sentences, num_sentences)}
num_sentences_map = dict(zip(example_has_multiple_sentences, num_sentences))

num_to_permute = np.ceil(num_sentences * self.permute_sentence_ratio).astype(int)
num_to_permute_map = {
sent_idx: count for sent_idx, count in zip(example_has_multiple_sentences, num_to_permute)
}
num_to_permute_map = dict(zip(example_has_multiple_sentences, num_to_permute))

sentence_ends = np.split(sentence_ends[:, 1], np.unique(sentence_ends[:, 0], return_index=True)[1][1:])
sentence_ends_map = {sent_idx: count for sent_idx, count in zip(example_has_multiple_sentences, sentence_ends)}
sentence_ends_map = dict(zip(example_has_multiple_sentences, sentence_ends))

for i in range(input_ids.shape[0]):
if i not in example_has_multiple_sentences:
Expand Down
2 changes: 1 addition & 1 deletion examples/legacy/pytorch-lightning/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _eval_end(self, outputs) -> tuple:

results = {**{"val_loss": val_loss_mean}, **compute_metrics(self.hparams.task, preds, out_label_ids)}

ret = {k: v for k, v in results.items()}
ret = dict(results.items())
ret["log"] = results
return ret, preds_list, out_label_list

Expand Down
4 changes: 2 additions & 2 deletions examples/legacy/pytorch-lightning/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def _eval_end(self, outputs):
preds = np.argmax(preds, axis=2)
out_label_ids = np.concatenate([x["target"] for x in outputs], axis=0)

label_map = {i: label for i, label in enumerate(self.labels)}
label_map = dict(enumerate(self.labels))
out_label_list = [[] for _ in range(out_label_ids.shape[0])]
preds_list = [[] for _ in range(out_label_ids.shape[0])]

Expand All @@ -140,7 +140,7 @@ def _eval_end(self, outputs):
"f1": f1_score(out_label_list, preds_list),
}

ret = {k: v for k, v in results.items()}
ret = dict(results.items())
ret["log"] = results
return ret, preds_list, out_label_list

Expand Down
4 changes: 2 additions & 2 deletions examples/legacy/seq2seq/run_eval_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

def parse_search_arg(search):
groups = search.split()
entries = {k: vs for k, vs in (g.split("=") for g in groups)}
entries = dict((g.split("=") for g in groups))
entry_names = list(entries.keys())
sets = [[f"--{k} {v}" for v in vs.split(":")] for k, vs in entries.items()]
matrix = [list(x) for x in itertools.product(*sets)]
Expand Down Expand Up @@ -105,7 +105,7 @@ def run_search():
col_widths = {col: len(str(col)) for col in col_names}
results = []
for r in matrix:
hparams = {k: v for k, v in (x.replace("--", "").split() for x in r)}
hparams = dict((x.replace("--", "").split() for x in r))
args_exp = " ".join(r).split()
args_exp.extend(["--bs", str(args.bs)]) # in case we need to reduce its size due to CUDA OOM
sys.argv = args_normal + args_exp
Expand Down
2 changes: 1 addition & 1 deletion examples/legacy/token-classification/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def main():

# Prepare CONLL-2003 task
labels = token_classification_task.get_labels(data_args.labels)
label_map: Dict[int, str] = {i: label for i, label in enumerate(labels)}
label_map: Dict[int, str] = dict(enumerate(labels))
num_labels = len(labels)

# Load pretrained model and tokenizer
Expand Down
2 changes: 1 addition & 1 deletion examples/legacy/token-classification/run_tf_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def main():

# Prepare Token Classification task
labels = token_classification_task.get_labels(data_args.labels)
label_map: Dict[int, str] = {i: label for i, label in enumerate(labels)}
label_map: Dict[int, str] = dict(enumerate(labels))
num_labels = len(labels)

# Load pretrained model and tokenizer
Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/token-classification/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def get_label_list(labels):

# Set the correspondences label/ID inside the model config
model.config.label2id = {l: i for i, l in enumerate(label_list)}
model.config.id2label = {i: l for i, l in enumerate(label_list)}
model.config.id2label = dict(enumerate(label_list))

# Map that sends B-Xxx label to its I-Xxx counterpart
b_to_i_label = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def get_label_list(labels):

# Set the correspondences label/ID inside the model config
model.config.label2id = {l: i for i, l in enumerate(label_list)}
model.config.id2label = {i: l for i, l in enumerate(label_list)}
model.config.id2label = dict(enumerate(label_list))

# Map that sends B-Xxx label to its I-Xxx counterpart
b_to_i_label = []
Expand Down
4 changes: 2 additions & 2 deletions examples/research_projects/layoutlmv3/run_funsd_cord.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,11 @@ def get_label_list(labels):
if isinstance(features[label_column_name].feature, ClassLabel):
label_list = features[label_column_name].feature.names
# No need to convert the labels since they are already ints.
id2label = {k: v for k, v in enumerate(label_list)}
id2label = dict(enumerate(label_list))
label2id = {v: k for k, v in enumerate(label_list)}
else:
label_list = get_label_list(datasets["train"][label_column_name])
id2label = {k: v for k, v in enumerate(label_list)}
id2label = dict(enumerate(label_list))
label2id = {v: k for k, v in enumerate(label_list)}
num_labels = len(label_list)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ def training_step(self, batch, batch_idx) -> Dict:

loss_tensors = self._step(batch)

logs = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
logs = dict(zip(self.loss_names, loss_tensors))
# tokens per batch
tgt_pad_token_id = (
self.tokenizer.generator.pad_token_id
Expand Down Expand Up @@ -434,7 +434,7 @@ def _generative_step(self, batch: dict) -> dict:
target: List[str] = self.ids_to_clean_text(batch["decoder_input_ids"])
# print(preds,target)
loss_tensors = self._step(batch)
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
base_metrics = dict(zip(self.loss_names, loss_tensors))
gen_metrics: Dict = self.calc_generative_metrics(preds, target)

summ_len = np.mean(lmap(len, generated_ids))
Expand Down
2 changes: 1 addition & 1 deletion examples/research_projects/rag/finetune_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def _generative_step(self, batch: dict) -> dict:
preds: List[str] = self.ids_to_clean_text(generated_ids)
target: List[str] = self.ids_to_clean_text(batch["decoder_input_ids"])
loss_tensors = self._step(batch)
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
base_metrics = dict(zip(self.loss_names, loss_tensors))
gen_metrics: Dict = self.calc_generative_metrics(preds, target)

summ_len = np.mean(lmap(len, generated_ids))
Expand Down
4 changes: 2 additions & 2 deletions examples/research_projects/seq2seq-distillation/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def pad(self) -> int:
def training_step(self, batch, batch_idx) -> Dict:
loss_tensors = self._step(batch)

logs = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
logs = dict(zip(self.loss_names, loss_tensors))
# tokens per batch
logs["tpb"] = batch["input_ids"].ne(self.pad).sum() + batch["labels"].ne(self.pad).sum()
logs["bs"] = batch["input_ids"].shape[0]
Expand Down Expand Up @@ -225,7 +225,7 @@ def _generative_step(self, batch: dict) -> dict:
preds: List[str] = self.ids_to_clean_text(generated_ids)
target: List[str] = self.ids_to_clean_text(batch["labels"])
loss_tensors = self._step(batch)
base_metrics = {name: loss for name, loss in zip(self.loss_names, loss_tensors)}
base_metrics = dict(zip(self.loss_names, loss_tensors))
rouge: Dict = self.calc_generative_metrics(preds, target)
summ_len = np.mean(lmap(len, generated_ids))
base_metrics.update(gen_time=gen_time, gen_len=summ_len, preds=preds, target=target, **rouge)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def main():
student_args.student_name_or_path, num_labels=len(class_names)
)
tokenizer = AutoTokenizer.from_pretrained(student_args.student_name_or_path, use_fast=data_args.use_fast_tokenizer)
model.config.id2label = {i: label for i, label in enumerate(class_names)}
model.config.id2label = dict(enumerate(class_names))
model.config.label2id = {label: i for i, label in enumerate(class_names)}

# 4. train student on teacher predictions
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/benchmark/benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ def __init__(self, args: BenchmarkArguments = None, configs: PretrainedConfig =
model_name: AutoConfig.from_pretrained(model_name) for model_name in self.args.model_names
}
else:
self.config_dict = {model_name: config for model_name, config in zip(self.args.model_names, configs)}
self.config_dict = dict(zip(self.args.model_names, configs))

warnings.warn(
f"The class {self.__class__} is deprecated. Hugging Face Benchmarking utils"
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/modelcard.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,9 @@ def create_model_index(self, metric_mapping):
dataset_metadata = _listify(self.dataset_metadata)
if len(dataset_args) < len(dataset_tags):
dataset_args = dataset_args + [None] * (len(dataset_tags) - len(dataset_args))
dataset_mapping = {tag: name for tag, name in zip(dataset_tags, dataset_names)}
dataset_arg_mapping = {tag: arg for tag, arg in zip(dataset_tags, dataset_args)}
dataset_metadata_mapping = {tag: metadata for tag, metadata in zip(dataset_tags, dataset_metadata)}
dataset_mapping = dict(zip(dataset_tags, dataset_names))
dataset_arg_mapping = dict(zip(dataset_tags, dataset_args))
dataset_metadata_mapping = dict(zip(dataset_tags, dataset_metadata))

task_mapping = {
task: TASK_TAG_TO_NAME_MAPPING[task] for task in _listify(self.tasks) if task in TASK_TAG_TO_NAME_MAPPING
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/esm/tokenization_esm.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class EsmTokenizer(PreTrainedTokenizer):
def __init__(self, vocab_file, **kwargs):
super().__init__(**kwargs)
self.all_tokens = load_vocab_file(vocab_file)
self._id_to_token = {ind: tok for ind, tok in enumerate(self.all_tokens)}
self._id_to_token = dict(enumerate(self.all_tokens))
self._token_to_id = {tok: ind for ind, tok in enumerate(self.all_tokens)}
self.unk_token = "<unk>"
self.cls_token = "<cls>"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def __call__(self, original_config: object) -> MaskFormerConfig:
swin = model.SWIN

dataset_catalog = MetadataCatalog.get(original_config.DATASETS.TEST[0])
id2label = {idx: label for idx, label in enumerate(dataset_catalog.stuff_classes)}
id2label = dict(enumerate(dataset_catalog.stuff_classes))
label2id = {label: idx for idx, label in id2label.items()}

config: MaskFormerConfig = MaskFormerConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def __call__(self, original_config: object, is_swin: bool) -> OneFormerConfig:
model = original_config.MODEL

dataset_catalog = MetadataCatalog.get(original_config.DATASETS.TEST_PANOPTIC[0])
id2label = {idx: label for idx, label in enumerate(dataset_catalog.stuff_classes)}
id2label = dict(enumerate(dataset_catalog.stuff_classes))
label2id = {label: idx for idx, label in id2label.items()}

if is_swin:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def create_vocab_dict(dict_path):
"<unk>": 3,
}

vocab_dict.update({k: v for k, v in zip(words, range(4, num_words + 4))})
vocab_dict.update(dict(zip(words, range(4, num_words + 4))))
return vocab_dict


Expand Down
6 changes: 2 additions & 4 deletions src/transformers/onnx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,7 @@ def export_pytorch(
f=output.as_posix(),
input_names=list(config.inputs.keys()),
output_names=onnx_outputs,
dynamic_axes={
name: axes for name, axes in chain(config.inputs.items(), config.outputs.items())
},
dynamic_axes=dict(chain(config.inputs.items(), config.outputs.items())),
do_constant_folding=True,
use_external_data_format=config.use_external_data_format(model.num_parameters()),
enable_onnx_checker=True,
Expand All @@ -208,7 +206,7 @@ def export_pytorch(
f=output.as_posix(),
input_names=list(config.inputs.keys()),
output_names=onnx_outputs,
dynamic_axes={name: axes for name, axes in chain(config.inputs.items(), config.outputs.items())},
dynamic_axes=dict(chain(config.inputs.items(), config.outputs.items())),
do_constant_folding=True,
opset_version=opset,
)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/pipelines/document_question_answering.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def _forward(self, model_inputs):
else:
model_outputs = self.model(**model_inputs)

model_outputs = {k: v for (k, v) in model_outputs.items()}
model_outputs = dict(model_outputs.items())
model_outputs["p_mask"] = p_mask
model_outputs["word_ids"] = word_ids
model_outputs["words"] = words
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/utils/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def update(self, *args, **kwargs):

def __getitem__(self, k):
if isinstance(k, str):
inner_dict = {k: v for (k, v) in self.items()}
inner_dict = dict(self.items())
return inner_dict[k]
else:
return self.to_tuple()[k]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,7 @@ def comm_get_image_processing_inputs(
high = num_labels
if is_instance_map:
labels_expanded = list(range(num_labels)) * 2
instance_id_to_semantic_id = {
instance_id: label_id for instance_id, label_id in enumerate(labels_expanded)
}
instance_id_to_semantic_id = dict(enumerate(labels_expanded))
annotations = [
np.random.randint(0, high * 2, (img.size[1], img.size[0])).astype(np.uint8) for img in image_inputs
]
Expand Down
4 changes: 1 addition & 3 deletions tests/models/maskformer/test_image_processing_maskformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,9 +298,7 @@ def comm_get_image_processing_inputs(
high = num_labels
if is_instance_map:
labels_expanded = list(range(num_labels)) * 2
instance_id_to_semantic_id = {
instance_id: label_id for instance_id, label_id in enumerate(labels_expanded)
}
instance_id_to_semantic_id = dict(enumerate(labels_expanded))
annotations = [
np.random.randint(0, high * 2, (img.size[1], img.size[0])).astype(np.uint8) for img in image_inputs
]
Expand Down
4 changes: 1 addition & 3 deletions tests/models/oneformer/test_image_processing_oneformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,7 @@ def comm_get_image_processor_inputs(
high = num_labels
if is_instance_map:
labels_expanded = list(range(num_labels)) * 2
instance_id_to_semantic_id = {
instance_id: label_id for instance_id, label_id in enumerate(labels_expanded)
}
instance_id_to_semantic_id = dict(enumerate(labels_expanded))
annotations = [
np.random.randint(0, high * 2, (img.size[1], img.size[0])).astype(np.uint8) for img in image_inputs
]
Expand Down
4 changes: 1 addition & 3 deletions tests/models/oneformer/test_processor_oneformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,9 +401,7 @@ def comm_get_processor_inputs(self, with_segmentation_maps=False, is_instance_ma
high = num_labels
if is_instance_map:
labels_expanded = list(range(num_labels)) * 2
instance_id_to_semantic_id = {
instance_id: label_id for instance_id, label_id in enumerate(labels_expanded)
}
instance_id_to_semantic_id = dict(enumerate(labels_expanded))
annotations = [
np.random.randint(0, high * 2, (img.size[1], img.size[0])).astype(np.uint8) for img in image_inputs
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,8 @@

@is_pipeline_test
class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
model_mapping = {
k: v
for k, v in (list(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING.items()) if MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING else [])
+ (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
}
model_mapping = dict((list(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING.items()) if MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING else [])
+ (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else []))

def get_test_pipeline(self, model, tokenizer, processor):
if tokenizer is None:
Expand Down
7 changes: 2 additions & 5 deletions tests/pipelines/test_pipelines_image_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,11 @@ def mask_to_test_readable_only_shape(mask: Image) -> Dict:
@require_timm
@require_torch
class ImageSegmentationPipelineTests(unittest.TestCase):
model_mapping = {
k: v
for k, v in (
model_mapping = dict((
list(MODEL_FOR_IMAGE_SEGMENTATION_MAPPING.items()) if MODEL_FOR_IMAGE_SEGMENTATION_MAPPING else []
)
+ (MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING.items() if MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING else [])
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
}
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else []))

def get_test_pipeline(self, model, tokenizer, processor):
image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor)
Expand Down

0 comments on commit ef28df0

Please sign in to comment.