Okay, I've been having a lot of problems with this model. I'm using the Stojo model, altered by Icarus' suggestions, and then altered with a ffl at 100 (as torz) suggested. I have the batch size lowered to 1. I've been minimally messing around with the settings to try and get the ooms to stop. But this model will run fine for about 30 minutes then, OOM. (out of memory?). It's the only model to do this to me. I've updated, and taken ffl down to zero. But still ooms
Thoughts?
I should also mention I'm using a 3080ti, i7 8700k, and 32 G of memory
Here's the error readout:
Code: Select all
08/30/2022 18:39:39 ERROR Caught exception in thread: '_training'
08/30/2022 18:39:43 ERROR Got Exception on main handler:
Traceback (most recent call last):
File "C:\Users\e4978\faceswap\lib\cli\launcher.py", line 201, in execute_script
process.process()
File "C:\Users\e4978\faceswap\scripts\train.py", line 216, in process
self._end_thread(thread, err)
File "C:\Users\e4978\faceswap\scripts\train.py", line 256, in _end_thread
thread.join()
File "C:\Users\e4978\faceswap\lib\multithreading.py", line 217, in join
raise thread.err[1].with_traceback(thread.err[2])
File "C:\Users\e4978\faceswap\lib\multithreading.py", line 96, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\e4978\faceswap\scripts\train.py", line 278, in _training
raise err
File "C:\Users\e4978\faceswap\scripts\train.py", line 268, in _training
self._run_training_cycle(model, trainer)
File "C:\Users\e4978\faceswap\scripts\train.py", line 353, in _run_training_cycle
trainer.train_one_step(viewer, timelapse)
File "C:\Users\e4978\faceswap\plugins\train\trainer\_base.py", line 244, in train_one_step
self._update_viewers(viewer, timelapse_kwargs)
File "C:\Users\e4978\faceswap\plugins\train\trainer\_base.py", line 343, in _update_viewers
samples = self._samples.show_sample()
File "C:\Users\e4978\faceswap\plugins\train\trainer\_base.py", line 642, in show_sample
preds = self._get_predictions(feeds["a"], feeds["b"])
File "C:\Users\e4978\faceswap\plugins\train\trainer\_base.py", line 695, in _get_predictions
standard = self._model.model.predict([feed_a, feed_b], verbose=0)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\tensorflow\python\eager\execute.py", line 54, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.ResourceExhaustedError: Graph execution error:
Detected at node 'phaze_a/decoder_b/upscale_ri_160_3_conv/Cast' defined at (most recent call last):
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\threading.py", line 930, in _bootstrap
self._bootstrap_inner()
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\threading.py", line 973, in _bootstrap_inner
self.run()
File "C:\Users\e4978\faceswap\lib\multithreading.py", line 96, in run
self._target(*self._args, **self._kwargs)
File "C:\Users\e4978\faceswap\scripts\train.py", line 268, in _training
self._run_training_cycle(model, trainer)
File "C:\Users\e4978\faceswap\scripts\train.py", line 353, in _run_training_cycle
trainer.train_one_step(viewer, timelapse)
File "C:\Users\e4978\faceswap\plugins\train\trainer\_base.py", line 244, in train_one_step
self._update_viewers(viewer, timelapse_kwargs)
File "C:\Users\e4978\faceswap\plugins\train\trainer\_base.py", line 343, in _update_viewers
samples = self._samples.show_sample()
File "C:\Users\e4978\faceswap\plugins\train\trainer\_base.py", line 642, in show_sample
preds = self._get_predictions(feeds["a"], feeds["b"])
File "C:\Users\e4978\faceswap\plugins\train\trainer\_base.py", line 695, in _get_predictions
standard = self._model.model.predict([feed_a, feed_b], verbose=0)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\training.py", line 2033, in predict
tmp_batch_outputs = self.predict_function(iterator)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\training.py", line 1845, in predict_function
return step_function(self, iterator)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\training.py", line 1834, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\training.py", line 1823, in run_step
outputs = model.predict_step(data)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\training.py", line 1791, in predict_step
return self(x, training=False)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\training.py", line 490, in __call__
return super().__call__(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\functional.py", line 458, in call
return self._run_internal_graph(
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\functional.py", line 596, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\training.py", line 490, in __call__
return super().__call__(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\base_layer.py", line 1014, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
return fn(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\functional.py", line 458, in call
return self._run_internal_graph(
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\functional.py", line 596, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\base_layer.py", line 1010, in __call__
inputs = self._maybe_cast_inputs(inputs, input_list)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\base_layer.py", line 2473, in _maybe_cast_inputs
return tf.nest.map_structure(self._cast_single_input, inputs)
File "C:\Users\e4978\MiniConda3\envs\faceswap\lib\site-packages\keras\engine\base_layer.py", line 2486, in _cast_single_input
return tf.cast(x, self._compute_dtype_object)
Node: 'phaze_a/decoder_b/upscale_ri_160_3_conv/Cast'
failed to allocate memory
[[{{node phaze_a/decoder_b/upscale_ri_160_3_conv/Cast}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
[Op:__inference_predict_function_261370]
08/30/2022 18:39:43 CRITICAL An unexpected crash has occurred. Crash report written to 'C:\Users\e4978\faceswap\crash_report.2022.08.30.183939884200.log'. You MUST provide this file if seeking assistance. Please verify you are running the latest version of faceswap before reporting