WebUI/modules/call_queue.py

116 lines
3.9 KiB
Python
Raw Normal View History

import html
import sys
import threading
import traceback
import time
2023-04-16 17:06:28 +00:00
import gradio as gr
from modules import shared, progress
queue_lock = threading.Lock()
2023-02-06 07:57:26 +00:00
queue_lock_condition = threading.Condition(lock=queue_lock)
def wrap_queued_call(func):
def f(*args, **kwargs):
with queue_lock:
res = func(*args, **kwargs)
return res
return f
def wrap_gradio_gpu_call(func, extra_outputs=None):
2023-04-16 17:06:28 +00:00
def f(request: gr.Request, *args, **kwargs):
user = request.username
# if the first argument is a string that says "task(...)", it is treated as a job id
if len(args) > 0 and type(args[0]) == str and args[0][0:5] == "task(" and args[0][-1] == ")":
id_task = args[0]
2023-04-16 17:06:28 +00:00
progress.add_task_to_queue(user, id_task)
else:
id_task = None
with queue_lock:
shared.state.begin()
2023-04-16 17:06:28 +00:00
progress.start_task(user, id_task)
try:
res = func(*args, **kwargs)
finally:
2023-04-16 17:06:28 +00:00
progress.finish_task(user, id_task)
progress.set_last_task_result(user, id_task, res)
shared.state.end()
return res
2023-04-16 17:06:28 +00:00
return wrap_gradio_call(f, extra_outputs=extra_outputs, add_stats=True, add_request=True)
2023-04-16 17:06:28 +00:00
def wrap_gradio_call(func, extra_outputs=None, add_stats=False, add_request=False):
def f(request: gr.Request, *args, extra_outputs_array=extra_outputs, **kwargs):
run_memmon = shared.opts.memmon_poll_rate > 0 and not shared.mem_mon.disabled and add_stats
if run_memmon:
shared.mem_mon.monitor()
t = time.perf_counter()
try:
2023-04-16 17:06:28 +00:00
if add_request:
res = list(func(request, *args, **kwargs))
else:
res = list(func(*args, **kwargs))
except Exception as e:
# When printing out our debug argument list, do not print out more than a MB of text
max_debug_str_len = 131072 # (1024*1024)/8
print("Error completing request", file=sys.stderr)
argStr = f"Arguments: {str(args)} {str(kwargs)}"
print(argStr[:max_debug_str_len], file=sys.stderr)
if len(argStr) > max_debug_str_len:
print(f"(Argument list truncated at {max_debug_str_len}/{len(argStr)} characters)", file=sys.stderr)
print(traceback.format_exc(), file=sys.stderr)
shared.state.job = ""
shared.state.job_count = 0
if extra_outputs_array is None:
extra_outputs_array = [None, '']
res = extra_outputs_array + [f"<div class='error'>{html.escape(type(e).__name__+': '+str(e))}</div>"]
shared.state.skipped = False
shared.state.interrupted = False
shared.state.job_count = 0
if not add_stats:
return tuple(res)
elapsed = time.perf_counter() - t
elapsed_m = int(elapsed // 60)
elapsed_s = elapsed % 60
elapsed_text = f"{elapsed_s:.2f}s"
if elapsed_m > 0:
elapsed_text = f"{elapsed_m}m "+elapsed_text
if run_memmon:
mem_stats = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.stop().items()}
active_peak = mem_stats['active_peak']
reserved_peak = mem_stats['reserved_peak']
sys_peak = mem_stats['system_peak']
sys_total = mem_stats['total']
sys_pct = round(sys_peak/max(sys_total, 1) * 100, 2)
vram_html = f"<p class='vram'>Torch active/reserved: {active_peak}/{reserved_peak} MiB, <wbr>Sys VRAM: {sys_peak}/{sys_total} MiB ({sys_pct}%)</p>"
else:
vram_html = ''
# last item is always HTML
res[-1] += f"<div class='performance'><p class='time'>Time taken: <wbr>{elapsed_text}</p>{vram_html}</div>"
return tuple(res)
return f