hahunavth commited on
Commit
8deb421
1 Parent(s): 630c148

Upload model

Browse files
Files changed (2) hide show
  1. config.json +99 -0
  2. model.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ESSModelForPretraining"
4
+ ],
5
+ "freeze": [],
6
+ "model_config": {
7
+ "conformer": {
8
+ "attention_dropout_p": 0.2,
9
+ "conv_dropout_p": 0.2,
10
+ "conv_expansion_factor": 2,
11
+ "conv_kernel_size": 7,
12
+ "decoder_dim": 256,
13
+ "encoder_dim": 256,
14
+ "feed_forward_dropout_p": 0.2,
15
+ "feed_forward_expansion_factor": 4,
16
+ "half_step_residual": true,
17
+ "num_attention_heads": 2,
18
+ "num_decode_layers": 6,
19
+ "num_encode_layers": 4
20
+ },
21
+ "max_seq_len": 1000,
22
+ "mode": "train",
23
+ "num_emotion": 5,
24
+ "reference_encoder": {
25
+ "dropout": 0.2,
26
+ "encoder_dim": 128
27
+ },
28
+ "variance_embedding": {
29
+ "energy_quantization": "linear",
30
+ "n_bins": 256,
31
+ "pitch_quantization": "linear"
32
+ },
33
+ "variance_predictor": {
34
+ "dropout": 0.5,
35
+ "filter_size": 256,
36
+ "kernel_size": 3
37
+ },
38
+ "vocoder": {
39
+ "model": "HiFi-GAN",
40
+ "speaker": "tth"
41
+ }
42
+ },
43
+ "model_type": "emofs2",
44
+ "preprocess_config": {
45
+ "dataset": "vlsp2023emo",
46
+ "emotion2id": {
47
+ "angry": 3,
48
+ "happy": 1,
49
+ "neutral": 0,
50
+ "sad": 2,
51
+ "surprise": 4
52
+ },
53
+ "id2emotion": {
54
+ "0": "neutral",
55
+ "1": "happy",
56
+ "2": "sad",
57
+ "3": "angry",
58
+ "4": "surprise"
59
+ },
60
+ "path": {
61
+ "corpus_path": "./data/pretrained_tts_dataset/tuyendv.dict",
62
+ "lexicon_path": "../datasets/ess-vlsp2023-lexicon/lexicon.dict",
63
+ "preprocessed_path": "../datasets/ess-vlsp2023-emo-processed-phoneme-level",
64
+ "raw_path": "./data/pretrained_tts_dataset_raw"
65
+ },
66
+ "preprocessing": {
67
+ "audio": {
68
+ "max_wav_value": 32768.0,
69
+ "sampling_rate": 22050
70
+ },
71
+ "energy": {
72
+ "feature": "phoneme_level",
73
+ "normalization": true
74
+ },
75
+ "mel": {
76
+ "mel_fmax": 8000,
77
+ "mel_fmin": 0,
78
+ "n_mel_channels": 80
79
+ },
80
+ "pitch": {
81
+ "feature": "phoneme_level",
82
+ "normalization": true
83
+ },
84
+ "stft": {
85
+ "filter_length": 1024,
86
+ "hop_length": 256,
87
+ "win_length": 1024
88
+ },
89
+ "text": {
90
+ "language": "en",
91
+ "text_cleaners": []
92
+ },
93
+ "val_size": 512
94
+ },
95
+ "smoothing_label": 0.1
96
+ },
97
+ "torch_dtype": "float32",
98
+ "transformers_version": "4.35.2"
99
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc1fad0efed595df2aebc4692f59a2390b2205b4ef9dad8838d337d430c814a
3
+ size 250799856