Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
B1ueorange authored Jun 15, 2023
0 parents commit 8eb02bc
Show file tree
Hide file tree
Showing 37 changed files with 8,739 additions and 0 deletions.
483 changes: 483 additions & 0 deletions Datasets.py

Large diffs are not rendered by default.

125 changes: 125 additions & 0 deletions Hyper_Parameters.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
Sound:
Spectrogram_Dim: 1025
Mel_Dim: 80
Frame_Length: 512 # 24000 / 1000 * 20ms
Frame_Shift: 128 # 24000 / 1000 * 5ms
Sample_Rate: 24000
Mel_F_Min: 0
Mel_F_Max: 12000 # The data I received was 44100Hz
Max_Abs_Mel: 4
Confidence_Threshold: 0.6
Gaussian_Smoothing_Sigma: 0.0
F0_Min: 0
F0_Max: 500

Tokens: 78
Max_Note: 100
Min_Duration: 600
Max_Duration: 1500

GST:
size: 256
num_heads: 8
token_num: 10
E: 256

# reference encoder
ref_enc_filters: [32, 32, 64, 64, 128, 128]
ref_enc_size: [3, 3]
ref_enc_strides: [2, 2]
ref_enc_pad: [1, 1]
ref_enc_gru_size: E // 2
n_mels: 80

Encoder:
Size: 384
Conv_Kernel_Size: [5, 5, 5]
FFT_Block:
Heads: 2
Dropout_Rate: 0.1
Stacks: 6
FeedForward:
In_Kernel_Size: 3
Out_Kernel_Size: 1
Channels: 1536

Duration_Predictor:
Conv:
Kernel_Size: [3, 3]
Channels: [256, 256]
Dropout_Rate: 0.1

Decoder:
Size: 384 # I think this must be same to encoder size.
FFT_Block:
Heads: 2
Dropout_Rate: 0.1
Stacks: 6
FeedForward:
In_Kernel_Size: 3
Out_Kernel_Size: 1
Channels: 1536

Discriminator:
Stacks: 3
Channels: 64
Kernel_Size: 9
Frequency_Range: [[0, 40], [20, 60], [40, 80]]

Vocoder_Path: 'Vocoder.pts' # Paper is using PWGAN. This part needs another model.

Token_Path: './Token.yaml'
Train:
Use_Pattern_Cache: false
Train_Pattern:
Path: './OpenSingerPickle'
# Path: 'D:/Download/OpenSingerPickle/NAMS1'
# Path: 'D:/Download/segment'
# Path: './segment/NAMS'
# Path: '/home/share/chengxuanang/hifisinger-chinese/segment/NAMS'
# Path: '/data/gongjunhao/hifisinger-chinese/segment/NAMS'
Metadata_File: 'METADATA.PICKLE'
Accumulated_Dataset_Epoch: 1 # This is to prevent slow down from torch.utils.data.DataLoader when the number of patterns is small.
Eval_Pattern:
Path: './OpenSingerPickle'
# Path: 'D:/Download/OpenSingerPickle/NAMS1'
# Path: 'D:/Download/segment/NAMS1'
# Path: './segment'
# Path: '/home/share/chengxuanang/hifisinger-chinese/segment/NAMS1'
# Path: '/data/gongjunhao/hifisinger-chinese/segment/NAMS1'
Metadata_File: 'METADATA.PICKLE'
Num_Workers: 2
Batch_Size: 8 # 32 # 8
Learning_Rate:
Generator:
Initial: 1.0e-4 #×10
Base: 4000
Discriminator:
Initial: 5.0e-5
Base: 4000
Discriminator_Gradient_Panelty_Gamma: 10.0
ADAM:
Beta1: 0.9
Beta2: 0.999
Epsilon: 1.0e-7
Weight_Decay: 1.0e-6
Gradient_Norm: 0.5
Max_Step: 400000
Checkpoint_Save_Interval: 5000
Logging_Interval: 100
Evaluation_Interval: 1000
Inference_Interval: 10000
Initial_Inference: true
Inference_Pattern_in_Train: 'Inference_Text.txt'

Inference_Batch_Size: 4
Inference_Path: 'D:/HiFiSinger.Reuslts/Songs_108/Inference_resemblyzer'
# Checkpoint_Path: 'D:/HiFiSinger.Reuslts/Songs_108/Checkpoint'
Checkpoint_Path: 'D:/HiFiSinger.Reuslts/Songs_108/Checkpoint2'
Log_Path: 'D:/HiFiSinger.Reuslts/Songs_108/Log'

Use_Mixed_Precision: true
Use_Multi_GPU: false
# Device: '0, 1'
# Use_Multi_GPU: true
Device: '0'
Loading

0 comments on commit 8eb02bc

Please sign in to comment.