diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py
index a6d40e8..f78a1a6 100644
--- a/comfy/ldm/modules/attention.py
+++ b/comfy/ldm/modules/attention.py
@@ -11,11 +11,10 @@ from .sub_quadratic_attention import efficient_dot_product_attention
 
 import model_management
 
-try:
+
+if model_management.xformers_enabled():
     import xformers
     import xformers.ops
-except:
-    pass
 
 # CrossAttn precision handling
 import os
diff --git a/comfy/ldm/modules/diffusionmodules/model.py b/comfy/ldm/modules/diffusionmodules/model.py
index 15f35b9..fcbee29 100644
--- a/comfy/ldm/modules/diffusionmodules/model.py
+++ b/comfy/ldm/modules/diffusionmodules/model.py
@@ -9,11 +9,9 @@ from typing import Optional, Any
 from ldm.modules.attention import MemoryEfficientCrossAttention
 import model_management
 
-try:
+if model_management.xformers_enabled():
     import xformers
     import xformers.ops
-except:
-    pass
 
 try:
     OOM_EXCEPTION = torch.cuda.OutOfMemoryError
diff --git a/comfy/model_management.py b/comfy/model_management.py
index c1a8f5a..7365bee 100644
--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -31,15 +31,16 @@ try:
 except:
     pass
 
-try:
-    import xformers
-    import xformers.ops
-    XFORMERS_IS_AVAILBLE = True
-except:
-    XFORMERS_IS_AVAILBLE = False
-
 if "--disable-xformers" in sys.argv:
     XFORMERS_IS_AVAILBLE = False
+else:
+    try:
+        import xformers
+        import xformers.ops
+        XFORMERS_IS_AVAILBLE = True
+    except:
+        XFORMERS_IS_AVAILBLE = False
+
 
 if "--cpu" in sys.argv:
     vram_state = CPU
diff --git a/main.py b/main.py
index c3d9603..fc37781 100644
--- a/main.py
+++ b/main.py
@@ -8,9 +8,6 @@ if os.name == "nt":
     import logging
     logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage())
 
-import execution
-import server
-
 if __name__ == "__main__":
     if '--help' in sys.argv:
         print("Valid Command line Arguments:")
@@ -18,6 +15,7 @@ if __name__ == "__main__":
         print("\t--port 8188\t\t\tSet the listen port.")
         print("\t--dont-upcast-attention\t\tDisable upcasting of attention \n\t\t\t\t\tcan boost speed but increase the chances of black images.\n")
         print("\t--use-split-cross-attention\tUse the split cross attention optimization instead of the sub-quadratic one.\n\t\t\t\t\tIgnored when xformers is used.")
+        print("\t--disable-xformers\t\tdisables xformers")
         print()
         print("\t--highvram\t\t\tBy default models will be unloaded to CPU memory after being used.\n\t\t\t\t\tThis option keeps them in GPU memory.\n")
         print("\t--normalvram\t\t\tUsed to force normal vram use if lowvram gets automatically enabled.")
@@ -31,6 +29,9 @@ if __name__ == "__main__":
         print("disabling upcasting of attention")
         os.environ['ATTN_PRECISION'] = "fp16"
 
+import execution
+import server
+
 def prompt_worker(q, server):
     e = execution.PromptExecutor(server)
     while True: