Skip to content

Commit

Permalink
ADLR/megatron-lm!2440 - MCore generate: read vocab size from model, n…
Browse files Browse the repository at this point in the history
…ot tokenizer
  • Loading branch information
cuichenx authored and ericharper committed Dec 16, 2024
1 parent bd677bf commit f33d9fe
Showing 1 changed file with 2 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -305,7 +305,7 @@ def generate_all_output_tokens_static_batch(
if self.model_is_pipeline_parallel:
context_length = context_end_position - context_start_position
logits = broadcast_from_last_pipeline_stage(
[batch_size, context_length, self.tokenizer.vocab_size],
[batch_size, context_length, self.inference_wrapped_model.model.vocab_size],
dtype=self.inference_wrapped_model.inference_wrapper_config.params_dtype,
tensor=logits,
)
@@ -316,7 +316,7 @@ def generate_all_output_tokens_static_batch(
generation_started = prompt_lengths_in_batch <= context_end_position
last_token_logits = logits[:, -1, :]
sampled_logits = self.sample_from_logits(
last_token_logits, common_inference_params, self.tokenizer.vocab_size
last_token_logits, common_inference_params, self.inference_wrapped_model.model.vocab_size
)

# Substitute the sampled logits only for only the prompts that

0 comments on commit f33d9fe

Please sign in to comment.