emotional-vits's Issues










已自己解决 tts is not defined


txt = "疲れた?甘ったれたこと言ってんじゃないわよ!"
txtr=get_roma(txt, hps)
tts(txtr, torch.LongTensor([0]), emotion="./short angry.wav", roma=True, length_scale = 1)

NameError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_18240\ in
2 txt = "疲れた?甘ったれたこと言ってんじゃないわよ!"
3 txtr=get_roma(txt, hps)
----> 4 tts(txtr, torch.LongTensor([0]), emotion="./short angry.wav", roma=True, length_scale = 1)

NameError: name 'tts' is not defined





有人知道这是啥原因吗 我溯源发现是inputs为空,inputs为空是因为掩码全为false。
File "/share/home/ncu3/ly/proj/e-vits/", line 114, in rational_quadratic_spline
if torch.min(inputs) < left or torch.max(inputs) > right:
RuntimeError: min(): Expected reduction dim to be specified for input.numel() == 0. Specify the reduction dim with the 'dim' argument.


config中的symbols 用了cj大佬方言模型的symbols;





Lines 104 to 115 in 09e1654

if ckptG is not None:
_, _, _, epoch_str = utils.load_checkpoint(ckptG, net_g, optim_g, is_old=True)
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g,
if ckptD is not None:
_, _, _, epoch_str = utils.load_checkpoint(ckptG, net_g, optim_g, is_old=True)
_, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d,

defective codes at the latest commit

f6d9de5 seems to have several possible and definite bugs. First,'s

_, _, _, epoch_str = utils.load_old_checkpoint(ckptD, net_d, optim_d)

part has been modified to:

_, _, _, epoch_str = utils.load_checkpoint(ckptG, net_g, optim_g, is_old=True)

, which is clearly incorrect since it's loading ckptG instead of ckptD, as well as other G-related arguments.
Second, this one is possibly just me, but single-speaker training doesn't seem to work. This may be due to the checkpoint-loading defect, which is present in both and; I only tried to train the model with the current code(unfixed the bugs aforementioned), so I don't know exactly why it didn't work.





ImportError: cannot import name 'CommitOperationAdd' from 'huggingface_hub'

Cell In[41], line 4
2 txt = "疲れた?甘ったれたこと言ってんじゃないわよ!"
3 txtr=get_roma(txt, hps)
----> 4 tts(txtr, torch.LongTensor([1]), emotion="./short normal.wav", roma=True, length_scale = 1)

Cell In[38], line 13, in tts(txt, sid, emotion, roma, length_scale)
11 x_tst = stn_tst.unsqueeze(0)
12 x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
---> 13 import emotion_extract
14 emo = torch.FloatTensor(emotion_extract.extract_wav(emotion))
15 # sid = torch.LongTensor([0])
16 # if type(emotion) ==int:
17 # emo = torch.FloatTensor(all_emotions[emotion]).unsqueeze(0)
27 # else:
28 # emo = torch.FloatTensor(all_emotions[emotion_dict[emotion]]).unsqueeze(0)

File D:\VITS\emotional-vits-main\
1 import torch
2 import torch.nn as nn
----> 3 from transformers import Wav2Vec2Processor
4 from transformers.models.wav2vec2.modeling_wav2vec2 import (
5 Wav2Vec2Model,
6 Wav2Vec2PreTrainedModel,
7 )
8 import os

File D:\Anaconda3\envs\vits\lib\site-packages\
23 from typing import TYPE_CHECKING
25 # Check the dependencies satisfy the minimal versions required.
---> 26 from . import dependency_versions_check
27 from .utils import (
28 OptionalDependencyNotAvailable,
29 _LazyModule,
42 logging,
43 )
46 logger = logging.get_logger(name) # pylint: disable=invalid-name

File D:\Anaconda3\envs\vits\lib\site-packages\transformers\
33 if pkg in deps:
34 if pkg == "tokenizers":
35 # must be loaded here, or else tqdm check may fail
---> 36 from .utils import is_tokenizers_available
38 if not is_tokenizers_available():
39 continue # not required, check version only if installed

File D:\Anaconda3\envs\vits\lib\site-packages\transformers\
22 from .doc import (
23 add_code_sample_docstrings,
24 add_end_docstrings,
28 replace_return_docstrings,
29 )
30 from .generic import (
31 ContextManagers,
32 ExplicitEnum,
54 working_or_temp_dir,
55 )
---> 56 from .hub import (
67 EntryNotFoundError,
68 PushToHubMixin,
69 RepositoryNotFoundError,
70 RevisionNotFoundError,
71 cached_file,
72 default_cache_path,
73 define_sagemaker_information,
74 download_url,
75 extract_commit_hash,
76 get_cached_models,
77 get_file_from_repo,
78 get_full_repo_name,
79 has_file,
80 http_user_agent,
81 is_offline_mode,
82 is_remote_url,
83 move_cache,
84 send_example_telemetry,
85 )
86 from .import_utils import (
166 torch_version,
167 )
170 WEIGHTS_NAME = "pytorch_model.bin"

File D:\Anaconda3\envs\vits\lib\site-packages\transformers\utils\
30 import huggingface_hub
31 import requests
---> 32 from huggingface_hub import (
33 CommitOperationAdd,
34 create_commit,
35 create_repo,
36 get_hf_file_metadata,
37 hf_hub_download,
38 hf_hub_url,
39 whoami,
40 )
41 from huggingface_hub.file_download import REGEX_COMMIT_HASH, http_get
42 from huggingface_hub.utils import (
43 EntryNotFoundError,
44 LocalEntryNotFoundError,
48 hf_raise_for_status,
49 )

ImportError: cannot import name 'CommitOperationAdd' from 'huggingface_hub' (D:\Anaconda3\envs\vits\lib\site-packages\

Multiple GPU training bug

Training on multiple GPUs results in error:

Traceback (most recent call last):
File "/opt/conda/lib/python3.10/site-packages/torch/multiprocessing/", line 69, in _wrap
fn(i, *args)
File "/root/emotional-vits/", line 134, in run
train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler,
File "/root/emotional-vits/", line 153, in train_and_evaluate
for batch_idx, (x, x_lengths, spec, spec_lengths, y, y_lengths, speakers, emo) in enumerate(train_loader):
File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/", line 441, in iter
return self._get_iterator()
File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/", line 388, in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/", line 994, in init
File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/", line 603, in init
self._sampler_iter = iter(self._index_sampler)
File "/root/emotional-vits/", line 372, in iter
ids_bucket = ids_bucket + ids_bucket * (rem // len_bucket) + ids_bucket[:(rem % len_bucket)]
ZeroDivisionError: integer division or modulo by zero

wav2vec 模型是针对英文的?

wav2vec2-large-robust-12-ft-emotion-msp-dim 是从英文dataset训练而来的吧?会不会不适合中文的音频?是不是用从中文音频训练而来的模型效果会更好一点?



DataLoader IndexError: Dimension out of range during training


I am currently encountering an issue during the training process of my model. The error message that I receive is as follows:

IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

This error occurs when enumerating over the DataLoader in my training loop:

for batch_idx, (x, x_lengths, spec, spec_lengths, y, y_lengths, speakers, emo) in enumerate(train_loader):

The batch data seems to be of the correct shape when I print it out just before the loop:

batch[0][0].shape: torch.Size([17])
batch[0][1].shape: torch.Size([513])
batch[0][2].shape: torch.Size([1, 66150])
batch[0][3].shape: torch.Size([1])
batch[0][4].shape: torch.Size([1024])

The problem seems to occur when the collate_fn function of the DataLoader tries to create a LongTensor from the sizes of the batch data:

torch.LongTensor([x[1].size(1) for x in batch])

I have been trying to debug this issue, but I am currently stuck. Any help or pointers would be greatly appreciated.


IndexError: Dimension out of range


python --text_index 2 --filelists filelists/train.txt filelists/val.txt --text_cleaners chinese_cleaners
python --filelists filelists/train.txt filelists/val.txt

python -c configs/test.json -m test

File "G:\emotional-vits\", line 264, in <listcomp>
    torch.LongTensor([x[1].size(1) for x in batch]),
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

for i, x in enumerate(batch):
if len(x[1].shape) < 2:
print(f"Item {i} in batch has unexpected shape {x[1].shape}")

Item 1 in batch has unexpected shape torch.Size([513])
Item 2 in batch has unexpected shape torch.Size([513])
Item 3 in batch has unexpected shape torch.Size([513])
Item 4 in batch has unexpected shape torch.Size([513])
Item 5 in batch has unexpected shape torch.Size([513])
Item 6 in batch has unexpected shape torch.Size([513])
Item 7 in batch has unexpected shape torch.Size([513])
Item 8 in batch has unexpected shape torch.Size([513])
Item 9 in batch has unexpected shape torch.Size([513])
Item 10 in batch has unexpected shape torch.Size([513])
Item 11 in batch has unexpected shape torch.Size([513])
Item 12 in batch has unexpected shape torch.Size([513])
Item 13 in batch has unexpected shape torch.Size([513])
Item 14 in batch has unexpected shape torch.Size([513])
Item 15 in batch has unexpected shape torch.Size([513])
Item 16 in batch has unexpected shape torch.Size([513])
Item 17 in batch has unexpected shape torch.Size([513])
Item 18 in batch has unexpected shape torch.Size([513])
Item 19 in batch has unexpected shape torch.Size([513])
Item 20 in batch has unexpected shape torch.Size([513])
Item 21 in batch has unexpected shape torch.Size([513])
Item 22 in batch has unexpected shape torch.Size([513])
Item 23 in batch has unexpected shape torch.Size([513])```



python -c configs/mako.json -m mako


INFO:torch.distributed.distributed_c10d:Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 1 nodes.


Traceback (most recent call last): File "", line 314, in main() File "", line 49, in main mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hps,)) File "C:\Users\Henry\anaconda3\envs\e-vits\lib\site-packages\torch\multiprocessing\", line 240, in spawn return start_processes(fn, args, nprocs, join, daemon, start_method='spawn') File "C:\Users\Henry\anaconda3\envs\e-vits\lib\site-packages\torch\multiprocessing\", line 198, in start_processes while not context.join(): File "C:\Users\Henry\anaconda3\envs\e-vits\lib\site-packages\torch\multiprocessing\", line 160, in join raise ProcessRaisedException(msg, error_index, torch.multiprocessing.spawn.ProcessRaisedException:

-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "C:\Users\Henry\anaconda3\envs\e-vits\lib\site-packages\torch\multiprocessing\", line 69, in _wrap
fn(i, *args)
File "D:\DL\emotional-vits\", line 77, in run
eval_dataset = TextAudioSpeakerLoader(,
File "D:\DL\emotional-vits\", line 183, in init
File "D:\DL\emotional-vits\", line 195, in _filter
for audiopath, sid, text in self.audiopaths_sid_text:
ValueError: too many values to unpack (expected 3)



python --filelists filelists/train.txt filelists/val.txt

这一命令时,遇到了UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 38: illegal multibyte sequence报错。为解决这个问题,我对emotion_extract.py的第132行进行了修改,修改后的代码为

with open(filelist,'r',encoding='UTF-8') as f:




C:\Users\Jason\anaconda3\envs\vocal\lib\site-packages\ UserWarning: loaded more than 1 DLL from .libs:
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
filelists/train.txt ----start emotion extract-------
Traceback (most recent call last):
File "", line 133, in
for idx, line in enumerate(f.readlines(),encoding = "utf-8"):
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 20: illegal multibyte sequence


读取原版模型那里D Net仍然读的是G Net的状态,估计是复制了没改。我的fork里改的太多,我就不提pull req了

