Add files via upload

B1ueorange · Jun 15, 2023 · 8eb02bc · 8eb02bc
commit 8eb02bc
Show file tree

Hide file tree

Showing 37 changed files with 8,739 additions and 0 deletions.
diff --git a/Datasets.py b/Datasets.py
diff --git a/Hyper_Parameters.yaml b/Hyper_Parameters.yaml
@@ -0,0 +1,125 @@
+Sound:
+    Spectrogram_Dim: 1025
+    Mel_Dim: 80
+    Frame_Length: 512   # 24000 / 1000 * 20ms
+    Frame_Shift: 128    # 24000 / 1000 * 5ms
+    Sample_Rate: 24000
+    Mel_F_Min: 0
+    Mel_F_Max: 12000    # The data I received was 44100Hz
+    Max_Abs_Mel: 4
+    Confidence_Threshold: 0.6
+    Gaussian_Smoothing_Sigma: 0.0
+    F0_Min: 0
+    F0_Max: 500
+
+Tokens: 78
+Max_Note: 100
+Min_Duration: 600
+Max_Duration: 1500
+
+GST:
+    size: 256
+    num_heads: 8
+    token_num: 10
+    E: 256
+
+  # reference encoder
+    ref_enc_filters: [32, 32, 64, 64, 128, 128]
+    ref_enc_size: [3, 3]
+    ref_enc_strides: [2, 2]
+    ref_enc_pad: [1, 1]
+    ref_enc_gru_size: E // 2
+    n_mels: 80
+
+Encoder:
+    Size: 384
+    Conv_Kernel_Size: [5, 5, 5]
+    FFT_Block:
+        Heads: 2
+        Dropout_Rate: 0.1
+        Stacks: 6
+        FeedForward:
+            In_Kernel_Size: 3
+            Out_Kernel_Size: 1
+            Channels: 1536
+
+Duration_Predictor:
+    Conv:
+        Kernel_Size: [3, 3]
+        Channels: [256, 256]
+        Dropout_Rate: 0.1
+
+Decoder:
+    Size: 384   # I think this must be same to encoder size.
+    FFT_Block:
+        Heads: 2
+        Dropout_Rate: 0.1
+        Stacks: 6
+        FeedForward:
+            In_Kernel_Size: 3
+            Out_Kernel_Size: 1
+            Channels: 1536
+
+Discriminator:
+    Stacks: 3
+    Channels: 64
+    Kernel_Size: 9
+    Frequency_Range: [[0, 40], [20, 60], [40, 80]]
+
+Vocoder_Path: 'Vocoder.pts'  # Paper is using PWGAN. This part needs another model.
+
+Token_Path: './Token.yaml'
+Train:
+    Use_Pattern_Cache: false
+    Train_Pattern:
+        Path: './OpenSingerPickle'
+        # Path: 'D:/Download/OpenSingerPickle/NAMS1'
+        # Path: 'D:/Download/segment'
+        # Path: './segment/NAMS'
+        # Path: '/home/share/chengxuanang/hifisinger-chinese/segment/NAMS'
+        # Path: '/data/gongjunhao/hifisinger-chinese/segment/NAMS'
+        Metadata_File: 'METADATA.PICKLE'
+        Accumulated_Dataset_Epoch: 1   # This is to prevent slow down from torch.utils.data.DataLoader when the number of patterns is small.
+    Eval_Pattern:
+        Path: './OpenSingerPickle'
+        # Path: 'D:/Download/OpenSingerPickle/NAMS1'
+        # Path: 'D:/Download/segment/NAMS1'
+        # Path: './segment'
+        # Path: '/home/share/chengxuanang/hifisinger-chinese/segment/NAMS1'
+        # Path: '/data/gongjunhao/hifisinger-chinese/segment/NAMS1'
+        Metadata_File: 'METADATA.PICKLE'
+    Num_Workers: 2
+    Batch_Size: 8 # 32 # 8
+    Learning_Rate:
+        Generator:
+            Initial: 1.0e-4 #×10
+            Base: 4000
+        Discriminator: 
+            Initial: 5.0e-5
+            Base: 4000
+    Discriminator_Gradient_Panelty_Gamma: 10.0
+    ADAM:
+        Beta1: 0.9
+        Beta2: 0.999
+        Epsilon: 1.0e-7
+    Weight_Decay: 1.0e-6
+    Gradient_Norm: 0.5
+    Max_Step: 400000
+    Checkpoint_Save_Interval: 5000
+    Logging_Interval: 100
+    Evaluation_Interval: 1000
+    Inference_Interval: 10000
+    Initial_Inference: true
+    Inference_Pattern_in_Train: 'Inference_Text.txt'
+
+Inference_Batch_Size: 4
+Inference_Path: 'D:/HiFiSinger.Reuslts/Songs_108/Inference_resemblyzer'
+# Checkpoint_Path: 'D:/HiFiSinger.Reuslts/Songs_108/Checkpoint'
+Checkpoint_Path: 'D:/HiFiSinger.Reuslts/Songs_108/Checkpoint2'
+Log_Path: 'D:/HiFiSinger.Reuslts/Songs_108/Log'
+
+Use_Mixed_Precision: true
+Use_Multi_GPU: false
+# Device: '0, 1'
+# Use_Multi_GPU: true
+Device: '0'