I have tried working with smaller batches even batch size of 1. but the error persists.
use_tb_logger: True
model: spsr
scale: 2
gpu_ids: [2, 3]
datasets:[
train:[
name: exp
mode: LRHR
dataroot_HR: /home/beemap/mobin_workspace/data/exp/train_HR.lmdb
dataroot_LR: /home/beemap/mobin_workspace/data/exp/train_LR.lmdb
subset_file: None
use_shuffle: True
n_workers: 16
batch_size: 50
HR_size: 128
use_flip: True
use_rot: True
phase: train
scale: 2
data_type: lmdb
]
val:[
name: exp
mode: LRHR
dataroot_HR: /home/beemap/mobin_workspace/data/exp/test_HR.lmdb
dataroot_LR: /home/beemap/mobin_workspace/data/exp/test_LR.lmdb
phase: val
scale: 2
data_type: lmdb
]
]
path:[
root: /home/beemap/mobin_workspace/code/SPSR
pretrain_model_G: /home/beemap/mobin_workspace/code/SPSR/experiments/pretrain_models/RRDB_PSNR_x4.pth
experiments_root: /home/beemap/mobin_workspace/code/SPSR/experiments/SPSR_LR_images_gen_4m_cycleGAN
models: /home/beemap/mobin_workspace/code/SPSR/experiments/SPSR_LR_images_gen_4m_cycleGAN/models
training_state: /home/beemap/mobin_workspace/code/SPSR/experiments/SPSR_LR_images_gen_4m_cycleGAN/training_state
log: /home/beemap/mobin_workspace/code/SPSR/experiments/SPSR_LR_images_gen_4m_cycleGAN
val_images: /home/beemap/mobin_workspace/code/SPSR/experiments/SPSR_LR_images_gen_4m_cycleGAN/val_images
]
network_G:[
which_model_G: spsr_net
norm_type: None
mode: CNA
nf: 64
nb: 23
in_nc: 3
out_nc: 3
gc: 32
group: 1
scale: 2
]
network_D:[
which_model_D: discriminator_vgg_128
norm_type: batch
act_type: leakyrelu
mode: CNA
nf: 64
in_nc: 3
]
train:[
lr_G: 0.0001
lr_G_grad: 0.0001
weight_decay_G: 0
weight_decay_G_grad: 0
beta1_G: 0.9
beta1_G_grad: 0.9
lr_D: 0.0001
weight_decay_D: 0
beta1_D: 0.9
lr_scheme: MultiStepLR
lr_steps: [50000, 100000, 200000, 300000]
lr_gamma: 0.5
pixel_criterion: l1
pixel_weight: 0.01
feature_criterion: l1
feature_weight: 1
gan_type: vanilla
gan_weight: 0.005
gradient_pixel_weight: 0.01
gradient_gan_weight: 0.005
pixel_branch_criterion: l1
pixel_branch_weight: 0.5
Branch_pretrain: 1
Branch_init_iters: 5000
manual_seed: 9
niter: 500000.0
val_freq: 50000000000.0
]
logger:[
print_freq: 100
save_checkpoint_freq: 4000.0
]
is_train: True
21-05-24 00:41:34.605 - INFO: Random seed: 9
21-05-24 00:41:34.608 - INFO: Read lmdb keys from cache: /home/beemap/mobin_workspace/data/exp/train_HR.lmdb/_keys_cache.p
21-05-24 00:41:34.611 - INFO: Read lmdb keys from cache: /home/beemap/mobin_workspace/data/exp/train_LR.lmdb/_keys_cache.p
21-05-24 00:41:34.614 - INFO: Dataset [LRHRDataset - exp] is created.
21-05-24 00:41:34.614 - INFO: Number of train images: 4,630, iters: 93
21-05-24 00:41:34.614 - INFO: Total epochs needed: 5377 for iters 500,000
21-05-24 00:41:34.615 - INFO: Read lmdb keys from cache: /home/beemap/mobin_workspace/data/exp/test_HR.lmdb/_keys_cache.p
21-05-24 00:41:34.615 - INFO: Read lmdb keys from cache: /home/beemap/mobin_workspace/data/exp/test_LR.lmdb/_keys_cache.p
21-05-24 00:41:34.615 - INFO: Dataset [LRHRDataset - exp] is created.
21-05-24 00:41:34.615 - INFO: Number of val images in [exp]: 50
21-05-24 00:41:35.034 - INFO: Initialization method [kaiming]
21-05-24 00:41:38.987 - INFO: Initialization method [kaiming]
21-05-24 00:41:39.614 - INFO: Initialization method [kaiming]
21-05-24 00:41:40.026 - INFO: Loading pretrained model for G [/home/beemap/mobin_workspace/code/SPSR/experiments/pretrain_models/RRDB_PSNR_x4.pth] ...
21-05-24 00:41:42.554 - WARNING: Params [module.get_g_nopadding.weight_h] will not optimize.
21-05-24 00:41:42.554 - WARNING: Params [module.get_g_nopadding.weight_v] will not optimize.
21-05-24 00:41:42.570 - INFO: Model [SPSRModel] is created.
21-05-24 00:41:42.570 - INFO: Start training from epoch: 0, iter: 0
export CUDA_VISIBLE_DEVICES=2,3
Path already exists. Rename it to [/home/beemap/mobin_workspace/code/SPSR/experiments/SPSR_LR_images_gen_4m_cycleGAN_archived_210524-004134]
/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/optim/lr_scheduler.py:416: UserWarning: To get the last learning rate computed by the scheduler, please use get_last_lr()
.
warnings.warn("To get the last learning rate computed by the scheduler, "
21-05-24 00:44:53.517 - INFO: <epoch: 1, iter: 100, lr:1.000e-04> l_g_pix: 3.3209e-03 l_g_fea: 1.6455e+00 l_g_gan: 7.5400e-02 l_g_pix_grad_branch: 2.6782e-02 l_d_real: 6.6168e-05 l_d_fake: 1.1682e-06 l_d_real_grad: 1.0860e-03 l_d_fake_grad: 3.4428e-05 D_real: 1.7047e+01 D_fake: 1.9670e+00 D_real_grad: 1.2006e+01 D_fake_grad: -5.3090e-01
21-05-24 00:48:04.134 - INFO: <epoch: 2, iter: 200, lr:1.000e-04> l_g_pix: 3.6927e-03 l_g_fea: 1.6852e+00 l_g_gan: 7.7636e-02 l_g_pix_grad_branch: 2.8236e-02 l_d_real: 5.6790e-06 l_d_fake: 1.5473e-06 l_d_real_grad: 1.9656e-04 l_d_fake_grad: 2.0194e-06 D_real: 1.7042e+01 D_fake: 1.5145e+00 D_real_grad: 1.5466e+01 D_fake_grad: 1.3511e+00
21-05-24 00:51:16.203 - INFO: <epoch: 3, iter: 300, lr:1.000e-04> l_g_pix: 3.1596e-03 l_g_fea: 1.7432e+00 l_g_gan: 5.4458e-02 l_g_pix_grad_branch: 2.8226e-02 l_d_real: 3.5918e-04 l_d_fake: 3.7819e-04 l_d_real_grad: 2.3085e-04 l_d_fake_grad: 1.1712e-04 D_real: 1.7641e+01 D_fake: 6.7494e+00 D_real_grad: 1.0886e+01 D_fake_grad: -3.8302e-02
21-05-24 00:54:28.644 - INFO: <epoch: 4, iter: 400, lr:1.000e-04> l_g_pix: 2.9831e-03 l_g_fea: 1.7308e+00 l_g_gan: 5.7067e-02 l_g_pix_grad_branch: 2.8616e-02 l_d_real: 3.4906e-02 l_d_fake: 8.1219e-04 l_d_real_grad: 1.1727e-02 l_d_fake_grad: 1.6730e-02 D_real: 1.5657e+01 D_fake: 4.2613e+00 D_real_grad: 7.5860e+00 D_fake_grad: -4.1845e+00
21-05-24 00:57:41.556 - INFO: <epoch: 5, iter: 500, lr:1.000e-04> l_g_pix: 3.0206e-03 l_g_fea: 1.6828e+00 l_g_gan: 3.9894e-02 l_g_pix_grad_branch: 3.1332e-02 l_d_real: 3.7866e-02 l_d_fake: 4.4069e-02 l_d_real_grad: 5.7599e-04 l_d_fake_grad: 6.5754e-04 D_real: -1.7732e+00 D_fake: -9.7111e+00 D_real_grad: 9.5656e+00 D_fake_grad: -1.5118e+00
21-05-24 01:00:54.652 - INFO: <epoch: 6, iter: 600, lr:1.000e-04> l_g_pix: 3.1056e-03 l_g_fea: 1.5872e+00 l_g_gan: 7.1355e-02 l_g_pix_grad_branch: 2.9602e-02 l_d_real: 1.1663e-01 l_d_fake: 3.5371e-01 l_d_real_grad: 3.3140e-07 l_d_fake_grad: 8.8215e-08 D_real: -6.1734e+00 D_fake: -2.0209e+01 D_real_grad: 2.6732e+00 D_fake_grad: -1.6756e+01
21-05-24 01:04:06.999 - INFO: <epoch: 7, iter: 700, lr:1.000e-04> l_g_pix: 3.3884e-03 l_g_fea: 1.6590e+00 l_g_gan: 9.8999e-02 l_g_pix_grad_branch: 2.7133e-02 l_d_real: 6.8104e-05 l_d_fake: 1.0225e-02 l_d_real_grad: 0.0000e+00 l_d_fake_grad: 0.0000e+00 D_real: 4.4747e+00 D_fake: -1.5320e+01 D_real_grad: 7.7027e+00 D_fake_grad: -2.8511e+01
21-05-24 01:07:19.649 - INFO: <epoch: 8, iter: 800, lr:1.000e-04> l_g_pix: 3.0371e-03 l_g_fea: 1.6621e+00 l_g_gan: 7.6863e-02 l_g_pix_grad_branch: 2.7822e-02 l_d_real: 1.3455e-05 l_d_fake: 1.0611e-04 l_d_real_grad: 4.3578e-04 l_d_fake_grad: 1.6385e-04 D_real: -6.1604e+00 D_fake: -2.1533e+01 D_real_grad: 3.6745e+00 D_fake_grad: -8.4098e+00
21-05-24 01:10:31.936 - INFO: <epoch: 9, iter: 900, lr:1.000e-04> l_g_pix: 2.8769e-03 l_g_fea: 1.6759e+00 l_g_gan: 2.9822e-02 l_g_pix_grad_branch: 2.8341e-02 l_d_real: 2.8139e-02 l_d_fake: 1.0946e-01 l_d_real_grad: 5.4808e-05 l_d_fake_grad: 4.0746e-05 D_real: -7.9460e+00 D_fake: -1.3842e+01 D_real_grad: 1.0622e+00 D_fake_grad: -1.2964e+01
21-05-24 01:13:44.604 - INFO: <epoch: 10, iter: 1,000, lr:1.000e-04> l_g_pix: 2.7664e-03 l_g_fea: 1.6928e+00 l_g_gan: 9.8261e-02 l_g_pix_grad_branch: 2.8014e-02 l_d_real: 3.4927e-06 l_d_fake: 3.4571e-07 l_d_real_grad: 3.7050e-06 l_d_fake_grad: 2.1339e-05 D_real: -1.1567e+01 D_fake: -3.1219e+01 D_real_grad: 1.3630e+01 D_fake_grad: -1.1159e+00
21-05-24 01:16:58.029 - INFO: <epoch: 11, iter: 1,100, lr:1.000e-04> l_g_pix: 2.6696e-03 l_g_fea: 1.6866e+00 l_g_gan: 6.8830e-02 l_g_pix_grad_branch: 3.0263e-02 l_d_real: 1.3072e-05 l_d_fake: 1.4579e-04 l_d_real_grad: 8.3739e-05 l_d_fake_grad: 5.7104e-05 D_real: -9.3927e+00 D_fake: -2.3158e+01 D_real_grad: 9.3501e+00 D_fake_grad: -3.3562e+00
21-05-24 01:20:12.842 - INFO: <epoch: 13, iter: 1,200, lr:1.000e-04> l_g_pix: 3.1989e-03 l_g_fea: 1.6771e+00 l_g_gan: 7.3309e-02 l_g_pix_grad_branch: 2.6471e-02 l_d_real: 8.8113e-02 l_d_fake: 1.5901e-04 l_d_real_grad: 2.7180e-07 l_d_fake_grad: 1.5163e-06 D_real: -2.4804e+01 D_fake: -3.9421e+01 D_real_grad: 1.7869e+01 D_fake_grad: 9.5287e-01
21-05-24 01:23:25.288 - INFO: <epoch: 14, iter: 1,300, lr:1.000e-04> l_g_pix: 2.9128e-03 l_g_fea: 1.6844e+00 l_g_gan: 5.0193e-02 l_g_pix_grad_branch: 3.2575e-02 l_d_real: 6.6567e-04 l_d_fake: 8.2944e-04 l_d_real_grad: 8.7365e-05 l_d_fake_grad: 3.7020e-04 D_real: -6.1354e+00 D_fake: -1.6173e+01 D_real_grad: 1.2349e+01 D_fake_grad: -1.4154e+00
21-05-24 01:26:38.063 - INFO: <epoch: 15, iter: 1,400, lr:1.000e-04> l_g_pix: 2.4558e-03 l_g_fea: 1.5891e+00 l_g_gan: 6.2942e-02 l_g_pix_grad_branch: 3.1616e-02 l_d_real: 2.2350e-03 l_d_fake: 1.8222e-03 l_d_real_grad: 6.5287e-04 l_d_fake_grad: 5.0378e-04 D_real: -2.3414e+01 D_fake: -3.6000e+01 D_real_grad: 3.3537e+01 D_fake_grad: 2.4417e+01
21-05-24 01:29:50.940 - INFO: <epoch: 16, iter: 1,500, lr:1.000e-04> l_g_pix: 2.6007e-03 l_g_fea: 1.6478e+00 l_g_gan: 7.8554e-02 l_g_pix_grad_branch: 3.0603e-02 l_d_real: 2.0991e-04 l_d_fake: 4.3102e-04 l_d_real_grad: 2.3365e-07 l_d_fake_grad: 1.1206e-06 D_real: -2.1310e+01 D_fake: -3.7020e+01 D_real_grad: 2.1616e+01 D_fake_grad: 3.8412e+00
21-05-24 01:33:03.328 - INFO: <epoch: 17, iter: 1,600, lr:1.000e-04> l_g_pix: 2.1862e-03 l_g_fea: 1.6181e+00 l_g_gan: 1.0025e-01 l_g_pix_grad_branch: 3.0339e-02 l_d_real: 2.0098e-06 l_d_fake: 1.3280e-06 l_d_real_grad: 2.6226e-08 l_d_fake_grad: 1.1921e-07 D_real: -2.3161e+01 D_fake: -4.3211e+01 D_real_grad: 1.6464e+01 D_fake_grad: -5.1182e+00
21-05-24 01:36:16.004 - INFO: <epoch: 18, iter: 1,700, lr:1.000e-04> l_g_pix: 2.4907e-03 l_g_fea: 1.6881e+00 l_g_gan: 1.3110e-01 l_g_pix_grad_branch: 2.8322e-02 l_d_real: 8.5831e-08 l_d_fake: 2.7250e-06 l_d_real_grad: 3.7503e-06 l_d_fake_grad: 6.0510e-06 D_real: -2.4416e+01 D_fake: -5.0636e+01 D_real_grad: 3.0785e+01 D_fake_grad: 1.6611e+01
21-05-24 01:39:27.256 - INFO: <epoch: 19, iter: 1,800, lr:1.000e-04> l_g_pix: 2.0183e-03 l_g_fea: 1.6027e+00 l_g_gan: 1.1546e-01 l_g_pix_grad_branch: 2.6887e-02 l_d_real: 5.7220e-08 l_d_fake: 3.1208e-06 l_d_real_grad: 2.0038e-05 l_d_fake_grad: 8.9218e-05 D_real: -2.0398e+01 D_fake: -4.3490e+01 D_real_grad: 1.7696e+01 D_fake_grad: 3.8131e+00
21-05-24 01:42:39.856 - INFO: <epoch: 20, iter: 1,900, lr:1.000e-04> l_g_pix: 1.8941e-03 l_g_fea: 1.6969e+00 l_g_gan: 8.9008e-02 l_g_pix_grad_branch: 2.8792e-02 l_d_real: 1.1778e-06 l_d_fake: 1.0792e-04 l_d_real_grad: 1.1804e-04 l_d_fake_grad: 4.6953e-05 D_real: -1.0110e+01 D_fake: -2.7911e+01 D_real_grad: 1.0423e+01 D_fake_grad: -2.8702e+00
21-05-24 01:45:52.129 - INFO: <epoch: 21, iter: 2,000, lr:1.000e-04> l_g_pix: 2.4722e-03 l_g_fea: 1.6711e+00 l_g_gan: 2.9332e-02 l_g_pix_grad_branch: 3.0218e-02 l_d_real: 5.2074e-02 l_d_fake: 1.1575e-01 l_d_real_grad: 0.0000e+00 l_d_fake_grad: 0.0000e+00 D_real: -9.6310e+00 D_fake: -1.5414e+01 D_real_grad: -1.2766e+01 D_fake_grad: -3.8304e+01
21-05-24 01:49:03.386 - INFO: <epoch: 22, iter: 2,100, lr:1.000e-04> l_g_pix: 2.8986e-03 l_g_fea: 1.5935e+00 l_g_gan: 5.3678e-02 l_g_pix_grad_branch: 2.7128e-02 l_d_real: 1.8270e-04 l_d_fake: 2.4126e-04 l_d_real_grad: 0.0000e+00 l_d_fake_grad: 0.0000e+00 D_real: -6.1048e+00 D_fake: -1.6840e+01 D_real_grad: 1.0796e+00 D_fake_grad: -3.7817e+01
21-05-24 01:52:15.883 - INFO: <epoch: 23, iter: 2,200, lr:1.000e-04> l_g_pix: 2.4406e-03 l_g_fea: 1.6539e+00 l_g_gan: 1.1410e-01 l_g_pix_grad_branch: 2.8961e-02 l_d_real: 2.3842e-09 l_d_fake: 2.7536e-06 l_d_real_grad: 1.0729e-07 l_d_fake_grad: 3.3379e-08 D_real: -2.5580e+01 D_fake: -4.8400e+01 D_real_grad: -2.3847e+01 D_fake_grad: -4.4304e+01
21-05-24 01:55:28.255 - INFO: <epoch: 24, iter: 2,300, lr:1.000e-04> l_g_pix: 2.4852e-03 l_g_fea: 1.7700e+00 l_g_gan: 4.3522e-02 l_g_pix_grad_branch: 3.1261e-02 l_d_real: 4.3588e-03 l_d_fake: 6.2978e-04 l_d_real_grad: 2.8310e-04 l_d_fake_grad: 9.1119e-06 D_real: -1.4005e+01 D_fake: -2.2707e+01 D_real_grad: 4.6634e-01 D_fake_grad: -1.6876e+01
21-05-24 01:58:41.680 - INFO: <epoch: 26, iter: 2,400, lr:1.000e-04> l_g_pix: 3.0689e-03 l_g_fea: 1.7215e+00 l_g_gan: 7.7397e-02 l_g_pix_grad_branch: 2.8686e-02 l_d_real: 5.9508e-06 l_d_fake: 9.8943e-07 l_d_real_grad: 1.1660e-05 l_d_fake_grad: 1.3742e-04 D_real: -1.7526e+01 D_fake: -3.3005e+01 D_real_grad: -4.2123e+00 D_fake_grad: -2.1467e+01
21-05-24 02:01:54.331 - INFO: <epoch: 27, iter: 2,500, lr:1.000e-04> l_g_pix: 2.8539e-03 l_g_fea: 1.6631e+00 l_g_gan: 8.8362e-02 l_g_pix_grad_branch: 2.9986e-02 l_d_real: 1.8161e-04 l_d_fake: 1.3661e-06 l_d_real_grad: 0.0000e+00 l_d_fake_grad: 2.3842e-09 D_real: -2.6553e+00 D_fake: -2.0328e+01 D_real_grad: -6.0097e+00 D_fake_grad: -3.2225e+01
21-05-24 02:05:06.089 - INFO: <epoch: 28, iter: 2,600, lr:1.000e-04> l_g_pix: 2.6444e-03 l_g_fea: 1.8047e+00 l_g_gan: 1.1630e-01 l_g_pix_grad_branch: 2.9866e-02 l_d_real: 0.0000e+00 l_d_fake: 4.7684e-09 l_d_real_grad: 0.0000e+00 l_d_fake_grad: 0.0000e+00 D_real: -1.0456e+01 D_fake: -3.3716e+01 D_real_grad: 4.0606e+00 D_fake_grad: -3.6053e+01
21-05-24 02:08:18.046 - INFO: <epoch: 29, iter: 2,700, lr:1.000e-04> l_g_pix: 2.0163e-03 l_g_fea: 1.6041e+00 l_g_gan: 9.4380e-02 l_g_pix_grad_branch: 2.6837e-02 l_d_real: 6.4373e-08 l_d_fake: 2.6703e-07 l_d_real_grad: 1.5494e-05 l_d_fake_grad: 1.5154e-04 D_real: -5.2771e+00 D_fake: -2.4153e+01 D_real_grad: -2.8587e+01 D_fake_grad: -4.1664e+01
21-05-24 02:11:29.657 - INFO: <epoch: 30, iter: 2,800, lr:1.000e-04> l_g_pix: 3.2022e-03 l_g_fea: 1.7363e+00 l_g_gan: 6.1157e-02 l_g_pix_grad_branch: 2.9119e-02 l_d_real: 3.5096e-04 l_d_fake: 6.9597e-05 l_d_real_grad: 9.5367e-08 l_d_fake_grad: 5.2452e-08 D_real: -8.3919e+00 D_fake: -2.0623e+01 D_real_grad: -2.7735e+01 D_fake_grad: -4.7713e+01
21-05-24 02:14:41.966 - INFO: <epoch: 31, iter: 2,900, lr:1.000e-04> l_g_pix: 2.0775e-03 l_g_fea: 1.6452e+00 l_g_gan: 7.1554e-02 l_g_pix_grad_branch: 2.6934e-02 l_d_real: 2.3431e-04 l_d_fake: 1.8326e-03 l_d_real_grad: 2.6464e-07 l_d_fake_grad: 2.8610e-08 D_real: -1.5319e+01 D_fake: -2.9629e+01 D_real_grad: -1.5372e+01 D_fake_grad: -3.6825e+01
21-05-24 02:17:53.561 - INFO: <epoch: 32, iter: 3,000, lr:1.000e-04> l_g_pix: 1.7321e-03 l_g_fea: 1.6710e+00 l_g_gan: 9.1385e-02 l_g_pix_grad_branch: 2.9790e-02 l_d_real: 9.0599e-08 l_d_fake: 6.0320e-07 l_d_real_grad: 3.6304e-05 l_d_fake_grad: 6.1511e-07 D_real: -3.0952e+01 D_fake: -4.9229e+01 D_real_grad: -1.3276e+01 D_fake_grad: -3.5428e+01
21-05-24 02:21:04.942 - INFO: <epoch: 33, iter: 3,100, lr:1.000e-04> l_g_pix: 2.5524e-03 l_g_fea: 1.6122e+00 l_g_gan: 1.7510e-01 l_g_pix_grad_branch: 2.8861e-02 l_d_real: 0.0000e+00 l_d_fake: 4.1962e-07 l_d_real_grad: 2.5749e-07 l_d_fake_grad: 2.7180e-07 D_real: -1.9678e+01 D_fake: -5.4697e+01 D_real_grad: -1.0838e+01 D_fake_grad: -2.9239e+01
21-05-24 02:24:16.634 - INFO: <epoch: 34, iter: 3,200, lr:1.000e-04> l_g_pix: 2.9174e-03 l_g_fea: 1.6195e+00 l_g_gan: 8.8550e-02 l_g_pix_grad_branch: 3.1018e-02 l_d_real: 6.4611e-07 l_d_fake: 5.9604e-07 l_d_real_grad: 5.5455e-06 l_d_fake_grad: 6.1075e-05 D_real: -1.1526e+01 D_fake: -2.9236e+01 D_real_grad: -1.6291e+01 D_fake_grad: -3.2677e+01
21-05-24 02:27:28.812 - INFO: <epoch: 35, iter: 3,300, lr:1.000e-04> l_g_pix: 2.5499e-03 l_g_fea: 1.6712e+00 l_g_gan: 6.8183e-02 l_g_pix_grad_branch: 2.7392e-02 l_d_real: 9.3864e-02 l_d_fake: 2.1278e-04 l_d_real_grad: 3.6161e-04 l_d_fake_grad: 1.2811e-05 D_real: -2.7487e+01 D_fake: -4.1077e+01 D_real_grad: -1.0106e+01 D_fake_grad: -2.5549e+01
21-05-24 02:30:41.108 - INFO: <epoch: 36, iter: 3,400, lr:1.000e-04> l_g_pix: 2.3978e-03 l_g_fea: 1.6088e+00 l_g_gan: 1.3013e-01 l_g_pix_grad_branch: 2.9338e-02 l_d_real: 0.0000e+00 l_d_fake: 4.7684e-09 l_d_real_grad: 0.0000e+00 l_d_fake_grad: 0.0000e+00 D_real: -1.9569e+01 D_fake: -4.5596e+01 D_real_grad: -1.2644e+01 D_fake_grad: -3.9879e+01
21-05-24 02:33:54.622 - INFO: <epoch: 38, iter: 3,500, lr:1.000e-04> l_g_pix: 2.4048e-03 l_g_fea: 1.5419e+00 l_g_gan: 1.2125e-01 l_g_pix_grad_branch: 2.6149e-02 l_d_real: 4.6155e-06 l_d_fake: 6.2974e-03 l_d_real_grad: 0.0000e+00 l_d_fake_grad: 0.0000e+00 D_real: -8.3029e+00 D_fake: -3.2550e+01 D_real_grad: -9.6282e+00 D_fake_grad: -4.1604e+01
21-05-24 02:37:05.900 - INFO: <epoch: 39, iter: 3,600, lr:1.000e-04> l_g_pix: 2.2153e-03 l_g_fea: 1.6592e+00 l_g_gan: 1.5681e-01 l_g_pix_grad_branch: 2.8890e-02 l_d_real: 0.0000e+00 l_d_fake: 7.1526e-09 l_d_real_grad: 3.0068e-03 l_d_fake_grad: 9.6887e-03 D_real: -5.0906e+01 D_fake: -8.2267e+01 D_real_grad: -1.8265e+01 D_fake_grad: -2.6081e+01
21-05-24 02:40:17.427 - INFO: <epoch: 40, iter: 3,700, lr:1.000e-04> l_g_pix: 2.3257e-03 l_g_fea: 1.5420e+00 l_g_gan: 7.5334e-02 l_g_pix_grad_branch: 2.7359e-02 l_d_real: 6.2178e-06 l_d_fake: 1.1802e-06 l_d_real_grad: 1.2017e-03 l_d_fake_grad: 1.9033e-03 D_real: -2.3801e+01 D_fake: -3.8868e+01 D_real_grad: -8.9928e+00 D_fake_grad: -1.8048e+01
21-05-24 02:43:28.910 - INFO: <epoch: 41, iter: 3,800, lr:1.000e-04> l_g_pix: 2.6892e-03 l_g_fea: 1.6348e+00 l_g_gan: 9.0847e-02 l_g_pix_grad_branch: 2.8078e-02 l_d_real: 2.6703e-07 l_d_fake: 3.1226e-05 l_d_real_grad: 5.3070e-06 l_d_fake_grad: 1.0695e-05 D_real: -2.3891e+01 D_fake: -4.2061e+01 D_real_grad: -1.7052e+01 D_fake_grad: -3.1479e+01
21-05-24 02:46:40.285 - INFO: <epoch: 42, iter: 3,900, lr:1.000e-04> l_g_pix: 2.5676e-03 l_g_fea: 1.8278e+00 l_g_gan: 4.1917e-02 l_g_pix_grad_branch: 3.4700e-02 l_d_real: 1.5963e-02 l_d_fake: 4.6029e-02 l_d_real_grad: 9.9656e-06 l_d_fake_grad: 4.8351e-06 D_real: -4.0825e+01 D_fake: -4.9177e+01 D_real_grad: -9.2685e+00 D_fake_grad: -2.2622e+01
21-05-24 02:49:52.089 - INFO: <epoch: 43, iter: 4,000, lr:1.000e-04> l_g_pix: 2.2216e-03 l_g_fea: 1.5846e+00 l_g_gan: 6.4105e-02 l_g_pix_grad_branch: 2.6135e-02 l_d_real: 1.3576e-03 l_d_fake: 4.8046e-04 l_d_real_grad: 3.7264e-06 l_d_fake_grad: 8.0104e-06 D_real: -1.6566e+01 D_fake: -2.9386e+01 D_real_grad: -2.1560e+01 D_fake_grad: -3.7010e+01
21-05-24 02:49:52.089 - INFO: Saving models and training states.
21-05-24 02:53:05.239 - INFO: <epoch: 44, iter: 4,100, lr:1.000e-04> l_g_pix: 2.3361e-03 l_g_fea: 1.6421e+00 l_g_gan: 5.3571e-02 l_g_pix_grad_branch: 2.9098e-02 l_d_real: 1.9744e-04 l_d_fake: 2.6551e-03 l_d_real_grad: 9.2835e-02 l_d_fake_grad: 6.1299e-03 D_real: -2.1851e+01 D_fake: -3.2563e+01 D_real_grad: -4.4997e+00 D_fake_grad: -1.3830e+01
21-05-24 02:56:16.614 - INFO: <epoch: 45, iter: 4,200, lr:1.000e-04> l_g_pix: 1.7827e-03 l_g_fea: 1.5211e+00 l_g_gan: 8.9597e-02 l_g_pix_grad_branch: 2.7135e-02 l_d_real: 3.3855e-07 l_d_fake: 4.6569e-05 l_d_real_grad: 1.3113e-07 l_d_fake_grad: 1.3590e-07 D_real: -3.6528e+01 D_fake: -5.4447e+01 D_real_grad: -2.0493e+01 D_fake_grad: -3.8345e+01
21-05-24 02:59:29.413 - INFO: <epoch: 46, iter: 4,300, lr:1.000e-04> l_g_pix: 1.8728e-03 l_g_fea: 1.6894e+00 l_g_gan: 8.1638e-02 l_g_pix_grad_branch: 3.1682e-02 l_d_real: 3.8835e-05 l_d_fake: 4.4186e-05 l_d_real_grad: 8.2023e-05 l_d_fake_grad: 4.1961e-06 D_real: -3.1155e+01 D_fake: -4.7482e+01 D_real_grad: -2.8142e+01 D_fake_grad: -4.4408e+01
21-05-24 03:02:40.728 - INFO: <epoch: 47, iter: 4,400, lr:1.000e-04> l_g_pix: 1.9327e-03 l_g_fea: 1.6821e+00 l_g_gan: 6.4633e-02 l_g_pix_grad_branch: 2.9793e-02 l_d_real: 8.4029e-05 l_d_fake: 1.5388e-03 l_d_real_grad: 3.7105e-05 l_d_fake_grad: 2.8300e-06 D_real: -3.1148e+01 D_fake: -4.4074e+01 D_real_grad: -2.0028e+01 D_fake_grad: -3.8328e+01
21-05-24 03:05:52.137 - INFO: <epoch: 48, iter: 4,500, lr:1.000e-04> l_g_pix: 2.1025e-03 l_g_fea: 1.8216e+00 l_g_gan: 5.2623e-02 l_g_pix_grad_branch: 3.2644e-02 l_d_real: 9.1258e-02 l_d_fake: 6.3421e-04 l_d_real_grad: 1.4305e-08 l_d_fake_grad: 2.8610e-08 D_real: -3.1337e+01 D_fake: -4.1816e+01 D_real_grad: -1.3150e+01 D_fake_grad: -3.2264e+01
21-05-24 03:09:04.571 - INFO: <epoch: 49, iter: 4,600, lr:1.000e-04> l_g_pix: 1.7092e-03 l_g_fea: 1.6174e+00 l_g_gan: 4.5857e-02 l_g_pix_grad_branch: 2.7879e-02 l_d_real: 1.8214e-01 l_d_fake: 2.6645e-01 l_d_real_grad: 2.6844e-05 l_d_fake_grad: 2.5817e-05 D_real: -2.9393e+01 D_fake: -3.8340e+01 D_real_grad: -2.9426e+01 D_fake_grad: -4.1452e+01
21-05-24 03:12:18.200 - INFO: <epoch: 51, iter: 4,700, lr:1.000e-04> l_g_pix: 1.8148e-03 l_g_fea: 1.7709e+00 l_g_gan: 8.0268e-02 l_g_pix_grad_branch: 2.9699e-02 l_d_real: 2.2101e-06 l_d_fake: 2.5701e-06 l_d_real_grad: 2.6035e-06 l_d_fake_grad: 5.7112e-04 D_real: -3.2691e+01 D_fake: -4.8745e+01 D_real_grad: -2.9348e+01 D_fake_grad: -4.5035e+01
21-05-24 03:15:30.126 - INFO: <epoch: 52, iter: 4,800, lr:1.000e-04> l_g_pix: 2.1427e-03 l_g_fea: 1.6868e+00 l_g_gan: 7.9818e-02 l_g_pix_grad_branch: 2.9640e-02 l_d_real: 7.7724e-07 l_d_fake: 4.0957e-06 l_d_real_grad: 5.9761e-05 l_d_fake_grad: 1.7802e-02 D_real: -2.7988e+01 D_fake: -4.3951e+01 D_real_grad: -4.4252e+01 D_fake_grad: -5.5154e+01
21-05-24 03:18:42.705 - INFO: <epoch: 53, iter: 4,900, lr:1.000e-04> l_g_pix: 2.1152e-03 l_g_fea: 1.7703e+00 l_g_gan: 1.1302e-01 l_g_pix_grad_branch: 3.1607e-02 l_d_real: 2.3842e-09 l_d_fake: 0.0000e+00 l_d_real_grad: 8.5831e-08 l_d_fake_grad: 1.0014e-07 D_real: -2.3408e+01 D_fake: -4.6011e+01 D_real_grad: -2.8532e+01 D_fake_grad: -4.5819e+01
Traceback (most recent call last):
File "train.py", line 190, in
main()
File "train.py", line 106, in main
model.optimize_parameters(current_step)
File "/home/beemap/mobin_workspace/code/SPSR/code/models/SPSR_model.py", line 251, in optimize_parameters
self.fake_H_branch, self.fake_H, self.grad_LR = self.netG(self.var_L)
File "/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/nn/parallel/data_parallel.py", line 161, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/nn/parallel/data_parallel.py", line 171, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/nn/parallel/parallel_apply.py", line 86, in parallel_apply
output.reraise()
File "/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/_utils.py", line 428, in reraise
raise self.exc_type(msg)
RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
File "/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker
output = module(*input, **kwargs)
File "/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/beemap/mobin_workspace/code/SPSR/code/models/modules/architecture.py", line 147, in forward
x = block_listi+10
File "/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/beemap/mobin_workspace/code/SPSR/code/models/modules/block.py", line 229, in forward
out = self.RDB3(out)
File "/home/beemap/miniconda3/envs/pytorch-mobin/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/beemap/mobin_workspace/code/SPSR/code/models/modules/block.py", line 205, in forward
x4 = self.conv4(torch.cat((x, x1, x2, x3), 1))
RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 11.91 GiB total capacity; 11.14 GiB already allocated; 36.25 MiB free; 11.25 GiB reserved in total by PyTorch)
`