diff --git a/comfy/model_management.py b/comfy/model_management.py
index 83053ea..544a945 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -432,8 +432,7 @@ def text_encoder_device():
     if args.gpu_only:
         return get_torch_device()
     elif vram_state == VRAMState.HIGH_VRAM or vram_state == VRAMState.NORMAL_VRAM:
-        #NOTE: on a Ryzen 5 7600X with 4080 it's faster to shift to GPU
-        if should_use_fp16() or torch.get_num_threads() < 8: #leaving the text encoder on the CPU is faster than shifting it if the CPU is fast enough.
+        if should_use_fp16(prioritize_performance=False):
             return get_torch_device()
         else:
             return torch.device("cpu")
@@ -569,7 +568,7 @@ def is_device_mps(device):
             return True
     return False
 
-def should_use_fp16(device=None, model_params=0):
+def should_use_fp16(device=None, model_params=0, prioritize_performance=True):
     global xpu_available
     global directml_enabled
 
@@ -614,7 +613,7 @@ def should_use_fp16(device=None, model_params=0):
 
     if fp16_works:
         free_model_memory = (get_free_memory() * 0.9 - minimum_inference_memory())
-        if model_params * 4 > free_model_memory:
+        if (not prioritize_performance) or model_params * 4 > free_model_memory:
             return True
 
     if props.major < 7:
diff --git a/comfy/sd.py b/comfy/sd.py
index 2a593fb..89df5a7 100644
--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -545,7 +545,7 @@ class CLIP:
         load_device = model_management.text_encoder_device()
         offload_device = model_management.text_encoder_offload_device()
         params['device'] = load_device
-        if model_management.should_use_fp16(load_device):
+        if model_management.should_use_fp16(load_device, prioritize_performance=False):
             params['dtype'] = torch.float16
         else:
             params['dtype'] = torch.float32