From 56333d48508f95bdef23870cad3239ba0ebdb8a9 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Fri, 7 Jun 2024 03:05:23 -0400 Subject: [PATCH] Use the end token for the text encoder attention mask. --- comfy/sd1_clip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index ff6db0d..e7ebf04 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -168,11 +168,11 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): attention_mask = None if self.enable_attention_masks: attention_mask = torch.zeros_like(tokens) - max_token = self.transformer.get_input_embeddings().weight.shape[0] - 1 + end_token = self.special_tokens.get("end", -1) for x in range(attention_mask.shape[0]): for y in range(attention_mask.shape[1]): attention_mask[x, y] = 1 - if tokens[x, y] == max_token: + if tokens[x, y] == end_token: break outputs = self.transformer(tokens, attention_mask, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state)