Open
Description
hey, while running on 4bit quantized model from https://huggingface.co/ThetaCursed/Ovis1.6-Gemma2-9B-bnb-4bit i am getting the following error
{
"name": "RuntimeError",
"message": "self and mat2 must have the same dtype, but got BFloat16 and Byte",
"stack": "---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[3], line 35
23 with torch.inference_mode():
24 gen_kwargs = dict(
25 max_new_tokens=1024,
26 do_sample=False,
(...)
33 use_cache=True
34 )
---> 35 output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0]
36 output = text_tokenizer.decode(output_ids, skip_special_tokens=True)
37 print(f'Output:\
{output}')
File ~/.cache/huggingface/modules/transformers_modules/AIDC-AI/Ovis1.6-Gemma2-9B/15954d60650e5d6f3cfebcb9407e809b4c5019e1/modeling_ovis.py:588, in Ovis.generate(self, inputs, **kwargs)
583 def generate(
584 self,
585 inputs: Optional[torch.Tensor] = None,
586 **kwargs
587 ) -> Union[GenerateOutput, torch.LongTensor]:
--> 588 _, inputs_embeds, labels, attention_mask = self.merge_multimodal(
589 text_input_ids=inputs,
590 text_attention_masks=kwargs.pop('attention_mask'),
591 text_labels=None,
592 pixel_values=kwargs.pop('pixel_values'),
593 left_padding=True
594 )
595 if getattr(self.generation_config, 'cache_implementation') == 'hybrid': # mainly for Gemma2
596 kwargs['past_key_values'] = self._get_hybrid_cache_for_llm(
597 getattr(kwargs, \"num_beams\", inputs_embeds.shape[0]), kwargs['max_new_tokens'] + inputs_embeds.shape[-2])
File ~/.cache/huggingface/modules/transformers_modules/AIDC-AI/Ovis1.6-Gemma2-9B/15954d60650e5d6f3cfebcb9407e809b4c5019e1/modeling_ovis.py:385, in Ovis.merge_multimodal(self, text_input_ids, text_attention_masks, text_labels, pixel_values, left_padding)
383 num_images = [x.shape[0] if x is not None else 0 for x in pixel_values]
384 if sum(num_images) > 0:
--> 385 visual_tokens = self.visual_tokenizer(torch.cat([x for x in pixel_values if x is not None], dim=0))
386 visual_embeds = torch.split(self.get_vte()(visual_tokens).to(dtype=self.dtype, device=input_device),
387 split_size_or_sections=num_images, dim=0)
388 visual_input_ids = torch.split(torch.argmax(visual_tokens, dim=-1).to(device=input_device),
389 split_size_or_sections=num_images, dim=0)
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
168 output = module._old_forward(*args, **kwargs)
169 else:
--> 170 output = module._old_forward(*args, **kwargs)
171 return module._hf_hook.post_forward(module, output)
File ~/.cache/huggingface/modules/transformers_modules/AIDC-AI/Ovis1.6-Gemma2-9B/15954d60650e5d6f3cfebcb9407e809b4c5019e1/modeling_ovis.py:221, in BaseVisualTokenizer.forward(self, pixel_values)
220 def forward(self, pixel_values) -> torch.Tensor: # [BatchSize, ImageShape] -> [BatchSize, #Token, VocabSize]
--> 221 features = self.encode(pixel_values)
222 logits = self.head(features)
223 tokens = self.tokenize(logits)
File ~/.cache/huggingface/modules/transformers_modules/AIDC-AI/Ovis1.6-Gemma2-9B/15954d60650e5d6f3cfebcb9407e809b4c5019e1/modeling_ovis.py:196, in BaseVisualTokenizer.encode(self, pixel_values)
195 def encode(self, pixel_values):
--> 196 output = self.backbone(pixel_values, output_hidden_states=True, return_dict=True)
197 features = output.hidden_states[-1]
198 if self.config.drop_cls_token:
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
168 output = module._old_forward(*args, **kwargs)
169 else:
--> 170 output = module._old_forward(*args, **kwargs)
171 return module._hf_hook.post_forward(module, output)
File ~/.local/lib/python3.8/site-packages/transformers/models/siglip/modeling_siglip.py:1189, in SiglipVisionModel.forward(self, pixel_values, output_attentions, output_hidden_states, return_dict, interpolate_pos_encoding)
1165 r\"\"\"
1166 Returns:
1167
(...)
1185 >>> pooled_output = outputs.pooler_output # pooled features
1186 ```\"\"\"
1187 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-> 1189 return self.vision_model(
1190 pixel_values=pixel_values,
1191 output_attentions=output_attentions,
1192 output_hidden_states=output_hidden_states,
1193 return_dict=return_dict,
1194 interpolate_pos_encoding=interpolate_pos_encoding,
1195 )
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
168 output = module._old_forward(*args, **kwargs)
169 else:
--> 170 output = module._old_forward(*args, **kwargs)
171 return module._hf_hook.post_forward(module, output)
File ~/.local/lib/python3.8/site-packages/transformers/models/siglip/modeling_siglip.py:1100, in SiglipVisionTransformer.forward(self, pixel_values, output_attentions, output_hidden_states, return_dict, interpolate_pos_encoding)
1097 last_hidden_state = encoder_outputs[0]
1098 last_hidden_state = self.post_layernorm(last_hidden_state)
-> 1100 pooler_output = self.head(last_hidden_state) if self.use_head else None
1101 if not return_dict:
1102 return (last_hidden_state, pooler_output) + encoder_outputs[1:]
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
168 output = module._old_forward(*args, **kwargs)
169 else:
--> 170 output = module._old_forward(*args, **kwargs)
171 return module._hf_hook.post_forward(module, output)
File ~/.local/lib/python3.8/site-packages/transformers/models/siglip/modeling_siglip.py:1127, in SiglipMultiheadAttentionPoolingHead.forward(self, hidden_state)
1124 batch_size = hidden_state.shape[0]
1125 probe = self.probe.repeat(batch_size, 1, 1)
-> 1127 hidden_state = self.attention(probe, hidden_state, hidden_state)[0]
1129 residual = hidden_state
1130 hidden_state = self.layernorm(hidden_state)
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs)
1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1510 else:
-> 1511 return self._call_impl(*args, **kwargs)
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs)
1515 # If we don't have any hooks, we want to skip the rest of the logic in
1516 # this function, and just call forward.
1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1518 or _global_backward_pre_hooks or _global_backward_hooks
1519 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1520 return forward_call(*args, **kwargs)
1522 try:
1523 result = None
File ~/.local/lib/python3.8/site-packages/accelerate/hooks.py:170, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
168 output = module._old_forward(*args, **kwargs)
169 else:
--> 170 output = module._old_forward(*args, **kwargs)
171 return module._hf_hook.post_forward(module, output)
File ~/.local/lib/python3.8/site-packages/torch/nn/modules/activation.py:1241, in MultiheadAttention.forward(self, query, key, value, key_padding_mask, need_weights, attn_mask, average_attn_weights, is_causal)
1227 attn_output, attn_output_weights = F.multi_head_attention_forward(
1228 query, key, value, self.embed_dim, self.num_heads,
1229 self.in_proj_weight, self.in_proj_bias,
(...)
1238 average_attn_weights=average_attn_weights,
1239 is_causal=is_causal)
1240 else:
-> 1241 attn_output, attn_output_weights = F.multi_head_attention_forward(
1242 query, key, value, self.embed_dim, self.num_heads,
1243 self.in_proj_weight, self.in_proj_bias,
1244 self.bias_k, self.bias_v, self.add_zero_attn,
1245 self.dropout, self.out_proj.weight, self.out_proj.bias,
1246 training=self.training,
1247 key_padding_mask=key_padding_mask,
1248 need_weights=need_weights,
1249 attn_mask=attn_mask,
1250 average_attn_weights=average_attn_weights,
1251 is_causal=is_causal)
1252 if self.batch_first and is_batched:
1253 return attn_output.transpose(1, 0), attn_output_weights
File ~/.local/lib/python3.8/site-packages/torch/nn/functional.py:5449, in multi_head_attention_forward(query, key, value, embed_dim_to_check, num_heads, in_proj_weight, in_proj_bias, bias_k, bias_v, add_zero_attn, dropout_p, out_proj_weight, out_proj_bias, training, key_padding_mask, need_weights, attn_mask, use_separate_proj_weight, q_proj_weight, k_proj_weight, v_proj_weight, static_k, static_v, average_attn_weights, is_causal)
5446 attn_output = torch.bmm(attn_output_weights, v)
5448 attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len * bsz, embed_dim)
-> 5449 attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
5450 attn_output = attn_output.view(tgt_len, bsz, attn_output.size(1))
5452 # optionally average attention weights over heads
RuntimeError: self and mat2 must have the same dtype, but got BFloat16 and Byte"
}
Metadata
Assignees
Labels
No labels