Always shift text encoder to GPU when the device supports fp16.

main
comfyanonymous 2 years ago
parent a6ef08a46a
commit cc44ade79e

@ -432,8 +432,7 @@ def text_encoder_device():
if args.gpu_only:
return get_torch_device()
elif vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.NORMAL_VRAM:
#NOTE: on a Ryzen 5 7600X with 4080 it's faster to shift to GPU
if should_use_fp16() or torch.get_num_threads() < 8: #leaving the text encoder on the CPU is faster than shifting it if the CPU is fast enough.
if should_use_fp16(prioritize_performance=False):
return get_torch_device()
else:
return torch.device("cpu")
@ -569,7 +568,7 @@ def is_device_mps(device):
return True
return False
def should_use_fp16(device=None, model_params=0):
def should_use_fp16(device=None, model_params=0, prioritize_performance=True):
global xpu_available
global directml_enabled
@ -614,7 +613,7 @@ def should_use_fp16(device=None, model_params=0):
if fp16_works:
free_model_memory = (get_free_memory() * 0.9 - minimum_inference_memory())
if model_params * 4 > free_model_memory:
if (not prioritize_performance) or model_params * 4 > free_model_memory:
return True
if props.major < 7:

@ -545,7 +545,7 @@ class CLIP:
load_device = model_management.text_encoder_device()
offload_device = model_management.text_encoder_offload_device()
params['device'] = load_device
if model_management.should_use_fp16(load_device):
if model_management.should_use_fp16(load_device, prioritize_performance=False):
params['dtype'] = torch.float16
else:
params['dtype'] = torch.float32

Loading…
Cancel
Save