Very slow training performance - same hardware
Hi all,
I'm running training on an RTX 3080 16GB laptop card, i7-11800H, 32 GB RAM, with Dfaker, batch size of 28.
I'm only seeing 5.6 EGs/sec during a 12 hour training run.
When I've run in the past on the same hardware, I've seen well over 10x that. I recently had to reformat and reinstall Windows, and now I'm seeing this really slow training.
I have confirmed that the GPU is enabled (or at least, it's not checked to be disabled under Global Options). What else should I be looking into?
Thank you!
Edit: system info.
Code: Select all
mation ============
encoding: cp1252
git_branch: master
git_commits: 09c7d8a Merge branch 'staging' of https://github.com/deepfakes/faceswap into staging
gpu_cuda: No global version found. Check Conda packages for Conda Cuda
gpu_cudnn: No global version found. Check Conda packages for Conda cuDNN
gpu_devices: GPU_0: NVIDIA GeForce RTX 3080 Laptop GPU
gpu_devices_active: GPU_0
gpu_driver: 471.75
gpu_vram: GPU_0: 16384MB
os_machine: AMD64
os_platform: Windows-10-10.0.19043-SP0
os_release: 10
py_command: C:\Users\aaron\faceswap/faceswap.py gui
py_conda_version: conda 4.11.0
py_implementation: CPython
py_version: 3.8.12
py_virtual_env: True
sys_cores: 16
sys_processor: Intel64 Family 6 Model 141 Stepping 1, GenuineIntel
sys_ram: Total: 32429MB, Available: 15169MB, Used: 17259MB, Free: 15169MB
=============== Pip Packages ===============
absl-py==0.15.0
astunparse==1.6.3
cachetools==4.2.4
certifi==2021.10.8
charset-normalizer==2.0.10
clang==5.0
cycler @ file:///tmp/build/80754af9/cycler_1637851556182/work
fastcluster==1.1.26
ffmpy==0.2.3
flatbuffers==1.12
gast==0.4.0
google-auth==1.35.0
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
grpcio==1.43.0
h5py==3.1.0
idna==3.3
imageio @ file:///tmp/build/80754af9/imageio_1617700267927/work
imageio-ffmpeg @ file:///home/conda/feedstock_root/build_artifacts/imageio-ffmpeg_1629987409325/work
importlib-metadata==4.10.0
joblib @ file:///tmp/build/80754af9/joblib_1635411271373/work
keras==2.6.0
Keras-Preprocessing==1.1.2
kiwisolver @ file:///C:/ci/kiwisolver_1612282606037/work
Markdown==3.3.6
matplotlib @ file:///C:/ci/matplotlib-base_1592837548929/work
mkl-fft==1.3.0
mkl-random==1.1.1
mkl-service==2.3.0
numpy @ file:///C:/ci/numpy_and_numpy_base_1603466732592/work
nvidia-ml-py==11.495.46
oauthlib==3.1.1
olefile @ file:///Users/ktietz/demo/mc3/conda-bld/olefile_1629805411829/work
opencv-python==4.5.5.62
opt-einsum==3.3.0
Pillow==8.4.0
protobuf==3.19.3
psutil @ file:///C:/ci/psutil_1612298324802/work
pyasn1==0.4.8
pyasn1-modules==0.2.8
pyparsing @ file:///tmp/build/80754af9/pyparsing_1635766073266/work
python-dateutil @ file:///tmp/build/80754af9/python-dateutil_1626374649649/work
pywin32==302
requests==2.27.1
requests-oauthlib==1.3.0
rsa==4.8
scikit-learn @ file:///C:/ci/scikit-learn_1641891148727/work
scipy @ file:///C:/ci/scipy_1616703433439/work
sip==4.19.13
six==1.15.0
tensorboard==2.6.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
tensorflow-estimator==2.6.0
tensorflow-gpu==2.6.2
termcolor==1.1.0
threadpoolctl @ file:///Users/ktietz/demo/mc3/conda-bld/threadpoolctl_1629802263681/work
tornado @ file:///C:/ci/tornado_1606942392901/work
tqdm @ file:///tmp/build/80754af9/tqdm_1635330843403/work
typing-extensions==3.7.4.3
urllib3==1.26.8
Werkzeug==2.0.2
wincertstore==0.2
wrapt==1.12.1
zipp==3.7.0
============== Conda Packages ==============
# packages in environment at C:\Users\aaron\MiniConda3\envs\faceswap:
#
# Name Version Build Channel
absl-py 0.15.0 pypi_0 pypi
astunparse 1.6.3 pypi_0 pypi
blas 1.0 mkl
ca-certificates 2021.10.26 haa95532_2
cachetools 4.2.4 pypi_0 pypi
certifi 2021.10.8 py38haa95532_2
charset-normalizer 2.0.10 pypi_0 pypi
clang 5.0 pypi_0 pypi
cycler 0.11.0 pyhd3eb1b0_0
fastcluster 1.1.26 py38h5d928e2_3 conda-forge
ffmpeg 4.3.1 ha925a31_0 conda-forge
ffmpy 0.2.3 pypi_0 pypi
flatbuffers 1.12 pypi_0 pypi
freetype 2.10.4 hd328e21_0
gast 0.4.0 pypi_0 pypi
git 2.32.0 haa95532_1
google-auth 1.35.0 pypi_0 pypi
google-auth-oauthlib 0.4.6 pypi_0 pypi
google-pasta 0.2.0 pypi_0 pypi
grpcio 1.43.0 pypi_0 pypi
h5py 3.1.0 pypi_0 pypi
icc_rt 2019.0.0 h0cc432a_1
icu 58.2 ha925a31_3
idna 3.3 pypi_0 pypi
imageio 2.9.0 pyhd3eb1b0_0
imageio-ffmpeg 0.4.5 pyhd8ed1ab_0 conda-forge
importlib-metadata 4.10.0 pypi_0 pypi
intel-openmp 2021.4.0 haa95532_3556
joblib 1.1.0 pyhd3eb1b0_0
jpeg 9d h2bbff1b_0
keras 2.6.0 pypi_0 pypi
keras-preprocessing 1.1.2 pypi_0 pypi
kiwisolver 1.3.1 py38hd77b12b_0
libpng 1.6.37 h2a8f88b_0
libtiff 4.2.0 hd0e1b90_0
libwebp 1.2.0 h2bbff1b_0
lz4-c 1.9.3 h2bbff1b_1
markdown 3.3.6 pypi_0 pypi
matplotlib 3.2.2 0
matplotlib-base 3.2.2 py38h64f37c6_0
mkl 2020.2 256
mkl-service 2.3.0 py38h196d8e1_0
mkl_fft 1.3.0 py38h46781fe_0
mkl_random 1.1.1 py38h47e9c7a_0
numpy 1.19.2 py38hadc3359_0
numpy-base 1.19.2 py38ha3acd2a_0
nvidia-ml-py 11.495.46 pypi_0 pypi
oauthlib 3.1.1 pypi_0 pypi
olefile 0.46 pyhd3eb1b0_0
opencv-python 4.5.5.62 pypi_0 pypi
openssl 1.1.1l h2bbff1b_0
opt-einsum 3.3.0 pypi_0 pypi
pillow 8.4.0 py38hd45dc43_0
pip 21.2.2 py38haa95532_0
protobuf 3.19.3 pypi_0 pypi
psutil 5.8.0 py38h2bbff1b_1
pyasn1 0.4.8 pypi_0 pypi
pyasn1-modules 0.2.8 pypi_0 pypi
pyparsing 3.0.4 pyhd3eb1b0_0
pyqt 5.9.2 py38ha925a31_4
python 3.8.12 h6244533_0
python-dateutil 2.8.2 pyhd3eb1b0_0
python_abi 3.8 2_cp38 conda-forge
pywin32 302 py38h827c3e9_1
qt 5.9.7 vc14h73c81de_0
requests 2.27.1 pypi_0 pypi
requests-oauthlib 1.3.0 pypi_0 pypi
rsa 4.8 pypi_0 pypi
scikit-learn 1.0.2 py38hf11a4ad_0
scipy 1.6.2 py38h14eb087_0
setuptools 58.0.4 py38haa95532_0
sip 4.19.13 py38ha925a31_0
six 1.15.0 pypi_0 pypi
sqlite 3.37.0 h2bbff1b_0
tensorboard 2.6.0 pypi_0 pypi
tensorboard-data-server 0.6.1 pypi_0 pypi
tensorboard-plugin-wit 1.8.1 pypi_0 pypi
tensorflow-estimator 2.6.0 pypi_0 pypi
tensorflow-gpu 2.6.2 pypi_0 pypi
termcolor 1.1.0 pypi_0 pypi
threadpoolctl 2.2.0 pyh0d69192_0
tk 8.6.11 h2bbff1b_0
tornado 6.1 py38h2bbff1b_0
tqdm 4.62.3 pyhd3eb1b0_1
typing-extensions 3.7.4.3 pypi_0 pypi
urllib3 1.26.8 pypi_0 pypi
vc 14.2 h21ff451_1
vs2015_runtime 14.27.29016 h5e58377_2
werkzeug 2.0.2 pypi_0 pypi
wheel 0.37.1 pyhd3eb1b0_0
wincertstore 0.2 py38haa95532_2
wrapt 1.12.1 pypi_0 pypi
xz 5.2.5 h62dcd97_0
zipp 3.7.0 pypi_0 pypi
zlib 1.2.11 h8cc25b3_4
zstd 1.4.9 h19a0ad4_0
================= Configs ==================
--------- .faceswap ---------
backend: nvidia
--------- convert.ini ---------
[color.color_transfer]
clip: True
preserve_paper: True
[color.manual_balance]
colorspace: HSV
balance_1: 0.0
balance_2: 0.0
balance_3: 0.0
contrast: 0.0
brightness: 0.0
[color.match_hist]
threshold: 99.0
[mask.box_blend]
type: gaussian
distance: 11.0
radius: 5.0
passes: 8
[mask.mask_blend]
type: gaussian
kernel_size: 3
passes: 8
threshold: 4
erosion: 0.0
[scaling.sharpen]
method: unsharp_mask
amount: 150
radius: 0.3
threshold: 5.0
[writer.ffmpeg]
container: mp4
codec: libx264
crf: 23
preset: medium
tune: None
profile: auto
level: auto
skip_mux: False
[writer.gif]
fps: 25
loop: 0
palettesize: 256
subrectangles: False
[writer.opencv]
format: png
draw_transparent: False
jpg_quality: 75
png_compress_level: 0
[writer.pillow]
format: png
draw_transparent: False
optimize: False
gif_interlace: True
jpg_quality: 75
png_compress_level: 3
tif_compression: tiff_deflate
--------- extract.ini ---------
[global]
allow_growth: True
[align.fan]
batch-size: 12
[detect.cv2_dnn]
confidence: 50
[detect.mtcnn]
minsize: 20
scalefactor: 0.709
batch-size: 8
threshold_1: 0.6
threshold_2: 0.7
threshold_3: 0.7
[detect.s3fd]
confidence: 70
batch-size: 4
[mask.bisenet_fp]
batch-size: 8
include_ears: False
include_hair: False
include_glasses: True
[mask.unet_dfl]
batch-size: 8
[mask.vgg_clear]
batch-size: 6
[mask.vgg_obstructed]
batch-size: 2
--------- gui.ini ---------
[global]
fullscreen: False
tab: extract
options_panel_width: 30
console_panel_height: 20
icon_size: 14
font: default
font_size: 9
autosave_last_session: prompt
timeout: 120
auto_load_model_stats: True
--------- train.ini ---------
[global]
centering: face
coverage: 87.5
icnr_init: False
conv_aware_init: False
optimizer: adam
learning_rate: 5e-05
epsilon_exponent: -7
reflect_padding: False
allow_growth: True
mixed_precision: False
nan_protection: True
convert_batchsize: 16
[global.loss]
loss_function: ssim
mask_loss_function: mse
l2_reg_term: 100
eye_multiplier: 3
mouth_multiplier: 2
penalized_mask_loss: True
mask_type: extended
mask_blur_kernel: 3
mask_threshold: 4
learn_mask: False
[model.dfaker]
output_size: 128
[model.dfl_h128]
lowmem: False
[model.dfl_sae]
input_size: 128
clipnorm: True
architecture: df
autoencoder_dims: 0
encoder_dims: 42
decoder_dims: 21
multiscale_decoder: False
[model.dlight]
features: best
details: good
output_size: 256
[model.original]
lowmem: False
[model.phaze_a]
output_size: 128
shared_fc: None
enable_gblock: True
split_fc: True
split_gblock: False
split_decoders: False
enc_architecture: fs_original
enc_scaling: 40
enc_load_weights: True
bottleneck_type: dense
bottleneck_norm: None
bottleneck_size: 1024
bottleneck_in_encoder: True
fc_depth: 1
fc_min_filters: 1024
fc_max_filters: 1024
fc_dimensions: 4
fc_filter_slope: -0.5
fc_dropout: 0.0
fc_upsampler: upsample2d
fc_upsamples: 1
fc_upsample_filters: 512
fc_gblock_depth: 3
fc_gblock_min_nodes: 512
fc_gblock_max_nodes: 512
fc_gblock_filter_slope: -0.5
fc_gblock_dropout: 0.0
dec_upscale_method: subpixel
dec_norm: None
dec_min_filters: 64
dec_max_filters: 512
dec_filter_slope: -0.45
dec_res_blocks: 1
dec_output_kernel: 5
dec_gaussian: True
dec_skip_last_residual: True
freeze_layers: keras_encoder
load_layers: encoder
fs_original_depth: 4
fs_original_min_filters: 128
fs_original_max_filters: 1024
mobilenet_width: 1.0
mobilenet_depth: 1
mobilenet_dropout: 0.001
[model.realface]
input_size: 64
output_size: 128
dense_nodes: 1536
complexity_encoder: 128
complexity_decoder: 512
[model.unbalanced]
input_size: 128
lowmem: False
clipnorm: True
nodes: 1024
complexity_encoder: 128
complexity_decoder_a: 384
complexity_decoder_b: 512
[model.villain]
lowmem: False
[trainer.original]
preview_images: 14
zoom_amount: 5
rotation_range: 10
shift_range: 5
flip_chance: 50
color_lightness: 30
color_ab: 8
color_clahe_chance: 50
color_clahe_max_size: 4