$ python train.py
{'log_dir': 'exp', 'model_name': 'dygan_vc', 'exp_name': 'dygan_vc_vq_spkemb', 'pretrained_model': '', 'fp16_run': False, 'trainer': 'VQMelSpkEmbLSTrainer', 'epochs': 100, 'num_speakers': 8, 'save_freq': 20, 'load_only_params': False, 'data_loader': {'dataset': 'VQMelSpkEmbDataset', 'data_dir': 'vcc2020', 'vq_dir': 'dump/vqw2v_feat_test', 'batch_size': 8,'speakers': 'speaker.json', 'spk_emb_dir': 'dump/ppg-vc-spks', 'shuffle': True, 'drop_last': False, 'num_workers': 4, 'min_length': 128, 'stats': 'vocoder/stats.npy'}, 'model': {'generator': {'model_name': 'Generator0', 'in_feat_dim': 512, 'out_feat_dim': 80, 'kernel': '9_9_9_9_9_9', 'num_heads': '4_4_4_4_4_4', 'num_res_blocks': 6, 'hidden_size': '256_256_256_256_256_256', 'spk_emb_dim': 256, 'hid2_factor': 1, 'res_wff_kernel1': 3, 'res_wff_kernel2': 3, 'res_wadain_use_ln': False, 'res_wff_use_res': True, 'res_wff_use_act2': True, 'res_use_ln': True, 'use_kconv': False, 'wadain_beta': False, 'ff_block': 'WadainFF', 'conv_block': 'DynamicConv', 'scale': 1.0, 'out_kernel': 1}, 'discriminator': {'model_name': 'Discriminator128', 'num_speakers': 8, 'kernel_size': 3, 'padding': 1}}, 'loss': {'g_loss': {'lambda_cyc': 0.0, 'lambda_id': 5.0, 'lambda_adv': 1.0}, 'd_loss': {'lambda_reg': 1.0, 'lambda_con_reg': 5.0}, 'con_reg_epoch': 50000}, 'optimizer': {'discriminator': {'lr': 2e-05, 'weight_decay': 0.0001, 'betas': [0.5, 0.999]}, 'generator': {'lr': 0.0001, 'weight_decay': 0.0001, 'betas': [0.5, 0.999]}}}
SEF1
60
SEF2
60
SEM1
60
SEM2
60
TEF1
60
TEF2
60
TEM1
60
TEM2
60
loading files 480
SEF1
10
SEF2
10
SEM1
10
SEM2
10
TEF1
10
TEF2
10
TEM1
10
TEM2
10
loading files 80
Generator0(
(conv1): Sequential(
(0): Conv1d(512, 256, kernel_size=(5,), stride=(1,), padding=(2,))
(1): LeakyReLU(negative_slope=0.2)
)
(res_blocks): Sequential(
(0): DynamicConv(
(k_layer): Linear(in_features=256, out_features=512, bias=True)
(conv_kernel_layer): Linear(in_features=256, out_features=36, bias=True)
(lconv): LightConv(
(unfold1d): Unfold(kernel_size=[9, 1], dilation=1, padding=[4, 0], stride=1)
)
(ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(act): GLU(dim=-1)
)
(1): WadainFF(
(conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
(conv2): WadaIN(
(act): LeakyReLU(negative_slope=0.2)
(style_linear): EqualLinear()
)
(act): ReLU()
)
(2): DynamicConv(
(k_layer): Linear(in_features=256, out_features=512, bias=True)
(conv_kernel_layer): Linear(in_features=256, out_features=36, bias=True)
(lconv): LightConv(
(unfold1d): Unfold(kernel_size=[9, 1], dilation=1, padding=[4, 0], stride=1)
)
(ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(act): GLU(dim=-1)
)
(3): WadainFF(
(conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
(conv2): WadaIN(
(act): LeakyReLU(negative_slope=0.2)
(style_linear): EqualLinear()
)
(act): ReLU()
)
(4): DynamicConv(
(k_layer): Linear(in_features=256, out_features=512, bias=True)
(conv_kernel_layer): Linear(in_features=256, out_features=36, bias=True)
(lconv): LightConv(
(unfold1d): Unfold(kernel_size=[9, 1], dilation=1, padding=[4, 0], stride=1)
)
(ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(act): GLU(dim=-1)
)
(5): WadainFF(
(conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
(conv2): WadaIN(
(act): LeakyReLU(negative_slope=0.2)
(style_linear): EqualLinear()
)
(act): ReLU()
)
(6): DynamicConv(
(k_layer): Linear(in_features=256, out_features=512, bias=True)
(conv_kernel_layer): Linear(in_features=256, out_features=36, bias=True)
(lconv): LightConv(
(unfold1d): Unfold(kernel_size=[9, 1], dilation=1, padding=[4, 0], stride=1)
)
(ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(act): GLU(dim=-1)
)
(7): WadainFF(
(conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
(conv2): WadaIN(
(act): LeakyReLU(negative_slope=0.2)
(style_linear): EqualLinear()
)
(act): ReLU()
)
(8): DynamicConv(
(k_layer): Linear(in_features=256, out_features=512, bias=True)
(conv_kernel_layer): Linear(in_features=256, out_features=36, bias=True)
(lconv): LightConv(
(unfold1d): Unfold(kernel_size=[9, 1], dilation=1, padding=[4, 0], stride=1)
)
(ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(act): GLU(dim=-1)
)
(9): WadainFF(
(conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
(conv2): WadaIN(
(act): LeakyReLU(negative_slope=0.2)
(style_linear): EqualLinear()
)
(act): ReLU()
)
(10): DynamicConv(
(k_layer): Linear(in_features=256, out_features=512, bias=True)
(conv_kernel_layer): Linear(in_features=256, out_features=36, bias=True)
(lconv): LightConv(
(unfold1d): Unfold(kernel_size=[9, 1], dilation=1, padding=[4, 0], stride=1)
)
(ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
(act): GLU(dim=-1)
)
(11): WadainFF(
(conv1): Conv1d(256, 256, kernel_size=(3,), stride=(1,), padding=(1,))
(conv2): WadaIN(
(act): LeakyReLU(negative_slope=0.2)
(style_linear): EqualLinear()
)
(act): ReLU()
)
)
(out): Sequential(
(0): Conv1d(256, 80, kernel_size=(1,), stride=(1,))
)
)
generator
The number of parameters: 4281384
Discriminator128(
(conv_layer_1): Sequential(
(0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(down_sample_1): DisRes(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv1x1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(act): LeakyReLU(negative_slope=0.2)
)
(down_sample_2): DisRes(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(act): LeakyReLU(negative_slope=0.2)
)
(down_sample_3): DisRes(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(act): LeakyReLU(negative_slope=0.2)
)
(down_sample_4): DisRes(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(act): LeakyReLU(negative_slope=0.2)
)
(blocks): Sequential(
(0): LeakyReLU(negative_slope=0.2)
(1): Conv2d(128, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(2): LeakyReLU(negative_slope=0.2)
(3): AdaptiveAvgPool2d(output_size=1)
)
(dis_conv): Conv2d(128, 8, kernel_size=(1, 1), stride=(1, 1))
)
discriminator
The number of parameters: 1415880
Traceback (most recent call last):
File "train.py", line 96, in <module>
main(args.config_path)
File "train.py", line 75, in main
train_results = trainer._train_epoch()
File "/deepmind/experiments/mingjiechen/dyganvc/vqmel_spkemb_ls_trainer.py", line 228, in _train_epoch
for train_steps_per_epoch, batch in enumerate(self.train_dataloader, 1):
File "/storage/usr/conda/envs/dyganvc/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 435, in __next__
data = self._next_data()
File "/storage/usr/conda/envs/dyganvc/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1085, in _next_data
return self._process_data(data)
File "/storage/usr/conda/envs/dyganvc/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1111, in _process_data
data.reraise()
File "/storage/usr/conda/envs/dyganvc/lib/python3.7/site-packages/torch/_utils.py", line 428, in reraise
raise self.exc_type(msg)
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/storage/usr/conda/envs/dyganvc/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
data = fetcher.fetch(index)
File "/storage/usr/conda/envs/dyganvc/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/storage/usr/conda/envs/dyganvc/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/deepmind/experiments/mingjiechen/dyganvc/data_loader.py", line 117, in __getitem__
vqw2v_dense = np.concatenate((vqw2v_dense, np.repeat(pad_vec, mel_length - vq_length, 0)),1)
File "<__array_function__ internals>", line 6, in concatenate
ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)