这是我跑模型的代码
def forward(self, image_feature, merge_feature):
image_atts = torch.ones(image_feature.size()[:-1], dtype=torch.long)
query_tokens = self.query_tokens.expand(merge_feature.shape[0], -1, -1)
print("image:", image_feature.size())
print("query:", query_tokens.size())
print("merge:", merge_feature.size())
# 检查和调整 image_feature 的 shape 以适应矩阵乘法
# 我们需要确保 image_feature 的最后一个维度与 query_tokens 的最后一个维度匹配。
image_feature = image_feature.unsqueeze(1).expand(-1, query_tokens.size(1), -1)
image_feature = self.image_proj(image_feature)
if image_feature.size(2) != query_tokens.size(2):
raise ValueError("The last dimension of image_feature must match the last dimension of query_tokens.")
print("image_upgrate:", image_feature.size())
# print("image_att__upgrate:", image_atts.size())
query_output = self.Qformer.bert(
query_embeds=query_tokens,
encoder_hidden_states=image_feature,
encoder_attention_mask=image_atts,
use_cache=True,
return_dict=True,
)
出现问题的地方在
def invert_attention_mask(self, encoder_attention_mask: Tensor) -> Tensor:
"""
Invert an attention mask (e.g., switches 0. and 1.).
Args:
encoder_attention_mask (`torch.Tensor`): An attention mask.
Returns:
`torch.Tensor`: The inverted attention mask.
"""
if encoder_attention_mask.dim() == 3:
encoder_extended_attention_mask = encoder_attention_mask[:, None, :, :]
if encoder_attention_mask.dim() == 2:
encoder_extended_attention_mask = encoder_attention_mask[:, None, None, :]
# T5 has a mask that can compare sequence ids, we can simulate this here with this transposition
# Cf. https://github.com/tensorflow/mesh/blob/8d2465e9bc93129b913b5ccc6a59aa97abd96ec6/mesh_tensorflow
# /transformer/transformer_layers.py#L270
# encoder_extended_attention_mask = (encoder_extended_attention_mask ==
# encoder_extended_attention_mask.transpose(-1, -2))
encoder_extended_attention_mask = encoder_extended_attention_mask.to(dtype=self.dtype) # fp16 compatibility
encoder_extended_attention_mask = (1.0 - encoder_extended_attention_mask) * torch.finfo(self.dtype).min
return encoder_extended_attention_mask
报错:UnboundLocalError: local variable 'encoder_extended_attention_mask' referenced before assignment