Hello,
Whenever i try to run my training on my gpu server i get this error.
i dont understand why can i get some help with it?
Code: Select all
Traceback (most recent call last):
File "/data/ai-vision/DeepFake_creation/faceswap/lib/cli/launcher.py", line 182, in execute_script
process.process()
File "/data/ai-vision/DeepFake_creation/faceswap/scripts/train.py", line 190, in process
self._end_thread(thread, err)
File "/data/ai-vision/DeepFake_creation/faceswap/scripts/train.py", line 230, in _end_thread
thread.join()
File "/data/ai-vision/DeepFake_creation/faceswap/lib/multithreading.py", line 121, in join
raise thread.err[1].with_traceback(thread.err[2])
File "/data/ai-vision/DeepFake_creation/faceswap/lib/multithreading.py", line 37, in run
self._target(*self._args, **self._kwargs)
File "/data/ai-vision/DeepFake_creation/faceswap/scripts/train.py", line 252, in _training
raise err
File "/data/ai-vision/DeepFake_creation/faceswap/scripts/train.py", line 240, in _training
model = self._load_model()
File "/data/ai-vision/DeepFake_creation/faceswap/scripts/train.py", line 268, in _load_model
model.build()
File "/data/ai-vision/DeepFake_creation/faceswap/plugins/train/model/_base.py", line 286, in build
model = self._io._load() # pylint:disable=protected-access
File "/data/ai-vision/DeepFake_creation/faceswap/plugins/train/model/_base.py", line 556, in _load
model = load_model(self._filename, compile=False)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/saving/save.py", line 201, in load_model
return hdf5_format.load_model_from_hdf5(filepath, custom_objects,
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/saving/hdf5_format.py", line 180, in load_model_from_hdf5
model = model_config_lib.model_from_config(model_config,
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/saving/model_config.py", line 52, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/layers/serialization.py", line 163, in deserialize
return generic_utils.deserialize_keras_object(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 674, in deserialize_keras_object
deserialized_obj = cls.from_config(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py", line 668, in from_config
input_tensors, output_tensors, created_layers = reconstruct_from_config(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py", line 1279, in reconstruct_from_config
process_layer(layer_data)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py", line 1261, in process_layer
layer = deserialize_layer(layer_data, custom_objects=custom_objects)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/layers/serialization.py", line 163, in deserialize
return generic_utils.deserialize_keras_object(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/utils/generic_utils.py", line 674, in deserialize_keras_object
deserialized_obj = cls.from_config(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py", line 668, in from_config
input_tensors, output_tensors, created_layers = reconstruct_from_config(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py", line 1289, in reconstruct_from_config
process_node(layer, node_data)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py", line 1237, in process_node
output_tensors = layer(input_tensors, **kwargs)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 996, in __call__
return self._functional_construction_call(inputs, args, kwargs,
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1134, in _functional_construction_call
outputs = self._keras_tensor_symbolic_call(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 867, in _keras_tensor_symbolic_call
return self._infer_output_signature(inputs, args, kwargs, input_masks)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 905, in _infer_output_signature
self._maybe_build(inputs)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 2667, in _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/layers/convolutional.py", line 201, in build
self.kernel = self.add_weight(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 666, in add_weight
variable = self._add_variable_with_custom_getter(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/training/tracking/base.py", line 813, in _add_variable_with_custom_getter
new_variable = getter(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 127, in make_variable
return tf_variables.VariableV1(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/ops/variables.py", line 266, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/ops/variables.py", line 212, in _variable_v1_call
return previous_getter(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/ops/variables.py", line 67, in getter
return captured_getter(captured_previous, **kwargs)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py", line 3547, in creator
return next_creator(**kwargs)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/ops/variables.py", line 205, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/ops/variable_scope.py", line 2612, in default_variable_creator
return resource_variable_ops.ResourceVariable(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/ops/variables.py", line 270, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1602, in __init__
self._init_from_args(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/ops/resource_variable_ops.py", line 1740, in _init_from_args
initial_value = initial_value()
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/initializers/initializers_v2.py", line 523, in __call__
return self._random_generator.random_uniform(shape, -limit, limit, dtype)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/keras/initializers/initializers_v2.py", line 978, in random_uniform
return op(
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py", line 206, in wrapper
return target(*args, **kwargs)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/ops/random_ops.py", line 296, in random_uniform
shape = tensor_util.shape_tensor(shape)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/framework/tensor_util.py", line 1080, in shape_tensor
return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/profiler/trace.py", line 163, in wrapped
return func(*args, **kwargs)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/framework/ops.py", line 1566, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 346, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 271, in constant
return _constant_impl(value, dtype, shape, name, verify_shape=False,
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 283, in _constant_impl
return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 308, in _constant_eager_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 105, in convert_to_eager_tensor
ctx.ensure_initialized()
File "/data/ai-vision/miniconda3/envs/faceswap/lib/python3.8/site-packages/tensorflow/python/eager/context.py", line 555, in ensure_initialized
context_handle = pywrap_tfe.TFE_NewContext(opts)
tensorflow.python.framework.errors_impl.InternalError: cudaGetDevice() failed. Status: initialization error
============ System Information ============
encoding: UTF-8
git_branch: Not Found
git_commits: Not Found
gpu_cuda: 10.1
gpu_cudnn: No global version found. Check Conda packages for Conda cuDNN
gpu_devices: GPU_0: Tesla V100-PCIE-32GB, GPU_1: Tesla V100-PCIE-32GB
gpu_devices_active: GPU_0, GPU_1
gpu_driver: 418.87.01
gpu_vram: GPU_0: 32480MB, GPU_1: 32480MB
os_machine: x86_64
os_platform: Linux-3.10.0-1160.6.1.el7.x86_64-x86_64-with-glibc2.10
os_release: 3.10.0-1160.6.1.el7.x86_64
py_command: faceswap/faceswap.py train -A data/dst -B data/src -m model/ -p
py_conda_version: conda 4.10.3
py_implementation: CPython
py_version: 3.8.8
py_virtual_env: True
sys_cores: 104
sys_processor: x86_64
sys_ram: Total: 257359MB, Available: 243666MB, Used: 12809MB, Free: 165984MB