diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index a6d40e8..f78a1a6 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -11,11 +11,10 @@ from .sub_quadratic_attention import efficient_dot_product_attention import model_management -try: + +if model_management.xformers_enabled(): import xformers import xformers.ops -except: - pass # CrossAttn precision handling import os diff --git a/comfy/ldm/modules/diffusionmodules/model.py b/comfy/ldm/modules/diffusionmodules/model.py index 15f35b9..fcbee29 100644 --- a/comfy/ldm/modules/diffusionmodules/model.py +++ b/comfy/ldm/modules/diffusionmodules/model.py @@ -9,11 +9,9 @@ from typing import Optional, Any from ldm.modules.attention import MemoryEfficientCrossAttention import model_management -try: +if model_management.xformers_enabled(): import xformers import xformers.ops -except: - pass try: OOM_EXCEPTION = torch.cuda.OutOfMemoryError diff --git a/comfy/model_management.py b/comfy/model_management.py index c1a8f5a..7365bee 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -31,15 +31,16 @@ try: except: pass -try: - import xformers - import xformers.ops - XFORMERS_IS_AVAILBLE = True -except: - XFORMERS_IS_AVAILBLE = False - if "--disable-xformers" in sys.argv: XFORMERS_IS_AVAILBLE = False +else: + try: + import xformers + import xformers.ops + XFORMERS_IS_AVAILBLE = True + except: + XFORMERS_IS_AVAILBLE = False + if "--cpu" in sys.argv: vram_state = CPU diff --git a/main.py b/main.py index c3d9603..fc37781 100644 --- a/main.py +++ b/main.py @@ -8,9 +8,6 @@ if os.name == "nt": import logging logging.getLogger("xformers").addFilter(lambda record: 'A matching Triton is not available' not in record.getMessage()) -import execution -import server - if __name__ == "__main__": if '--help' in sys.argv: print("Valid Command line Arguments:") @@ -18,6 +15,7 @@ if __name__ == "__main__": print("\t--port 8188\t\t\tSet the listen port.") print("\t--dont-upcast-attention\t\tDisable upcasting of attention \n\t\t\t\t\tcan boost speed but increase the chances of black images.\n") print("\t--use-split-cross-attention\tUse the split cross attention optimization instead of the sub-quadratic one.\n\t\t\t\t\tIgnored when xformers is used.") + print("\t--disable-xformers\t\tdisables xformers") print() print("\t--highvram\t\t\tBy default models will be unloaded to CPU memory after being used.\n\t\t\t\t\tThis option keeps them in GPU memory.\n") print("\t--normalvram\t\t\tUsed to force normal vram use if lowvram gets automatically enabled.") @@ -31,6 +29,9 @@ if __name__ == "__main__": print("disabling upcasting of attention") os.environ['ATTN_PRECISION'] = "fp16" +import execution +import server + def prompt_worker(q, server): e = execution.PromptExecutor(server) while True: