From d31e226650ad01daefff66ec202992b8c3bf8384 Mon Sep 17 00:00:00 2001 From: comfyanonymous Date: Wed, 28 Aug 2024 16:18:39 -0400 Subject: [PATCH] Unify RMSNorm code. --- comfy/ldm/common_dit.py | 13 +++++++++++ comfy/ldm/flux/layers.py | 4 ++-- comfy/ldm/modules/diffusionmodules/mmdit.py | 24 ++------------------- 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/comfy/ldm/common_dit.py b/comfy/ldm/common_dit.py index 9900255..9016abc 100644 --- a/comfy/ldm/common_dit.py +++ b/comfy/ldm/common_dit.py @@ -1,4 +1,5 @@ import torch +import comfy.ops def pad_to_patch_size(img, patch_size=(2, 2), padding_mode="circular"): if padding_mode == "circular" and torch.jit.is_tracing() or torch.jit.is_scripting(): @@ -6,3 +7,15 @@ def pad_to_patch_size(img, patch_size=(2, 2), padding_mode="circular"): pad_h = (patch_size[0] - img.shape[-2] % patch_size[0]) % patch_size[0] pad_w = (patch_size[1] - img.shape[-1] % patch_size[1]) % patch_size[1] return torch.nn.functional.pad(img, (0, pad_w, 0, pad_h), mode=padding_mode) + +try: + rms_norm_torch = torch.nn.functional.rms_norm +except: + rms_norm_torch = None + +def rms_norm(x, weight, eps=1e-6): + if rms_norm_torch is not None: + return rms_norm_torch(x, weight.shape, weight=comfy.ops.cast_to(weight, dtype=x.dtype, device=x.device), eps=eps) + else: + rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + eps) + return (x * rrms) * comfy.ops.cast_to(weight, dtype=x.dtype, device=x.device) diff --git a/comfy/ldm/flux/layers.py b/comfy/ldm/flux/layers.py index 20bd285..dabab3e 100644 --- a/comfy/ldm/flux/layers.py +++ b/comfy/ldm/flux/layers.py @@ -6,6 +6,7 @@ from torch import Tensor, nn from .math import attention, rope import comfy.ops +import comfy.ldm.common_dit class EmbedND(nn.Module): @@ -63,8 +64,7 @@ class RMSNorm(torch.nn.Module): self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device)) def forward(self, x: Tensor): - rrms = torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + 1e-6) - return (x * rrms) * comfy.ops.cast_to(self.scale, dtype=x.dtype, device=x.device) + return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6) class QKNorm(torch.nn.Module): diff --git a/comfy/ldm/modules/diffusionmodules/mmdit.py b/comfy/ldm/modules/diffusionmodules/mmdit.py index 491a58a..759788a 100644 --- a/comfy/ldm/modules/diffusionmodules/mmdit.py +++ b/comfy/ldm/modules/diffusionmodules/mmdit.py @@ -355,29 +355,9 @@ class RMSNorm(torch.nn.Module): else: self.register_parameter("weight", None) - def _norm(self, x): - """ - Apply the RMSNorm normalization to the input tensor. - Args: - x (torch.Tensor): The input tensor. - Returns: - torch.Tensor: The normalized tensor. - """ - return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) - def forward(self, x): - """ - Forward pass through the RMSNorm layer. - Args: - x (torch.Tensor): The input tensor. - Returns: - torch.Tensor: The output tensor after applying RMSNorm. - """ - x = self._norm(x) - if self.learnable_scale: - return x * self.weight.to(device=x.device, dtype=x.dtype) - else: - return x + return comfy.ldm.common_dit.rms_norm(x, self.weight, self.eps) + class SwiGLUFeedForward(nn.Module):