From 6e7ad89104372fefcbc3a6a29078cff0fac2b371 Mon Sep 17 00:00:00 2001 From: Fabio Tesser Date: Mon, 8 Oct 2012 17:13:48 +0200 Subject: [PATCH] HTKLabeler fixing and improvement. Issue marytts-it/marytts#12 --- .../marytts/tools/voiceimport/HTKLabeler.java | 1080 ++++++++++++++--- .../tools/voiceimport/importMain.config | 1 + 2 files changed, 897 insertions(+), 184 deletions(-) diff --git a/marytts-builder/src/main/java/marytts/tools/voiceimport/HTKLabeler.java b/marytts-builder/src/main/java/marytts/tools/voiceimport/HTKLabeler.java index a01464d371..0b0f0b91bc 100644 --- a/marytts-builder/src/main/java/marytts/tools/voiceimport/HTKLabeler.java +++ b/marytts-builder/src/main/java/marytts/tools/voiceimport/HTKLabeler.java @@ -22,6 +22,7 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; @@ -30,9 +31,11 @@ import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Scanner; import java.util.Set; import java.util.SortedMap; import java.util.StringTokenizer; @@ -47,6 +50,7 @@ import javax.xml.xpath.XPathFactory; import marytts.client.MaryClient; +import marytts.exceptions.MaryConfigurationException; import marytts.modules.phonemiser.Allophone; import marytts.modules.phonemiser.AllophoneSet; import marytts.util.io.FileUtils; @@ -58,11 +62,19 @@ /** * Automatic Labelling using HTK labeller + * * @author Sathish Chandra Pammi + * @author Fabio Tesser + * + * Fabio Tesser has fixed some HTK procedures and he has added the managing of virtual pauses (so called short pauses) after every word. + * These pauses if detected will notified to the user, the user should be check if necessary to force + * the punctuation on the original text phrase in order to have a more coherent prosody. */ public class HTKLabeler extends VoiceImportComponent { + + private DatabaseLayout db; private File rootDir; private File htk; @@ -79,8 +91,11 @@ public class HTKLabeler extends VoiceImportComponent { protected File intonisedXMLDir; protected Map> dictionary; protected AllophoneSet allophoneSet; - protected int MAX_ITERATIONS = 15; - protected int SP_ITERATION = 5; + protected int MAX_ITERATIONS = 150; + protected int MAX_SP_ITERATION = 10; //when intra word forced pauses (ssil) are inserted + protected int MAX_VP_ITERATION = 20; // when virtual pauses (sp) are inserted + protected int MAX_MIX_ITERATION = 30; // when mixtures are increased + protected int noIterCompleted = 0; @@ -90,9 +105,22 @@ public class HTKLabeler extends VoiceImportComponent { public final String MAXITER = "HTKLabeler.maxNoOfIterations"; public String INTONISEDDIR = "HTKLabeler.intonisedXMLDir"; public String PHONEMEXML = "HTKLabeler.phoneXMLFile"; - public String SPITER = "HTKLabeler.shortPauseIteration"; + public String MAXSPITER = "HTKLabeler.maxshortPauseIteration"; + private String HTK_SO = "-A -D -V -T 1"; //Main HTK standard Options HTK_SO + private String Extract_FEAT = "MFCC_0"; //MFCC_E + private String Train_FEAT = "MFCC_0_D_A"; //MFCC_E_D_A + private int Train_VECTSIZE = 13*3; //13; //13 without D_A; 13*3 with D_A + private int NUMStates = 5; + private int[] num_mixtures_for_state = {2,1,2}; + private int[] current_number_of_mixtures = {1,1,1}; //this is the starting number of mixtures (must be all ones) + private ArrayList logProbFrame_array = new ArrayList(); + private ArrayList epsilon_array = new ArrayList(); + private int PHASE_NUMBER =0; + private double[] epsilon_PHASE = {0.2, 0.05, 0.001, 0.0005}; // 0 1 2 3 + + public final String getName(){ return "HTKLabeler"; } @@ -133,8 +161,8 @@ public SortedMap getDefaultProps(DatabaseLayout db){ props.put(OUTLABDIR, db.getProp(db.ROOTDIR) +"lab" +System.getProperty("file.separator")); - props.put(MAXITER,"20"); - props.put(SPITER,"5"); + props.put(MAXITER,Integer.toString(MAX_ITERATIONS)); + props.put(MAXSPITER,Integer.toString(MAX_SP_ITERATION)); } @@ -151,12 +179,11 @@ protected void setupHelp(){ //props2Help.put(INITHTKDIR,"If you provide a path to previous HTK Directory, Models will intialize with those models. other wise HTK Models will build with Flat-Start Initialization"); //props2Help.put(RETRAIN,"true - Do re-training by initializing with given models. false - Do just Decoding"); props2Help.put(MAXITER,"Maximum number of iterations used for training"); - props2Help.put(SPITER,"Iteration number at which short-pause model need to insert."); + props2Help.put(MAXSPITER,"Iteration number at which short-pause model need to insert."); } - @Override - protected void initialiseComp() + public void initialiseComp() { dictionary = new TreeMap>(); @@ -186,7 +213,7 @@ public boolean compute() throws Exception{ } MAX_ITERATIONS = Integer.valueOf((getProp(MAXITER))); - SP_ITERATION = Integer.valueOf((getProp(SPITER))); + MAX_SP_ITERATION = Integer.valueOf((getProp(MAXSPITER))); System.out.println("Preparing voice database for labelling using HTK :"); //get the voicename @@ -210,14 +237,29 @@ public boolean compute() throws Exception{ createPhoneDictionary(); // Extract phone sequence from intonisedXML files getPhoneSequence(); + + // This is necessary to remove multiple sp: TODO: implement a loop and check the result + delete_multiple_sp_in_PhoneMLFile(getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones3.mlf", + getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones4.mlf"); + + delete_multiple_sp_in_PhoneMLFile(getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones4.mlf", + getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones5.mlf"); + + delete_multiple_sp_in_PhoneMLFile(getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones5.mlf", + getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones6.mlf"); + + delete_multiple_sp_in_PhoneMLFile(getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones6.mlf", + getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones7.mlf"); + + delete_multiple_sp_in_PhoneMLFile(getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones7.mlf", + getProp(HTDIR) + File.separator + "etc" + File.separator + "htk.phones3.mlf"); //part 2: Feature Extraction using HCopy System.out.println("Feature Extraction:"); featureExtraction(); System.out.println("... Done."); - - //Part 3: Initilize Flat-start initialisation + //Part 3: Initialize Flat-start initialization System.out.println("HTK Training:"); initialiseHTKTrain(); createTrainFile(); @@ -231,13 +273,16 @@ public boolean compute() throws Exception{ hviteAligning(); System.out.println("... Done."); + //Part 6: Extra model statistics + System.out.println("Generating Extra model statistics..."); htkExtraModels(); + System.out.println("... Done."); + //Part 6: Generate Labels in required format System.out.println("Generating Labels in required format..."); getProperLabelFormat(); System.out.println(" ... done."); System.out.println("Label file Generation Successfully completed using HTK !"); - return true; } @@ -246,8 +291,9 @@ public boolean compute() throws Exception{ /** * Setup the HTK directory * @throws IOException, InterruptedException + * @throws MaryConfigurationException */ - private void setup() throws IOException,InterruptedException{ + private void setup() throws IOException,InterruptedException, MaryConfigurationException{ htk.mkdir(); File lab = new File(htk.getAbsolutePath()+"/lab"); @@ -260,17 +306,21 @@ private void setup() throws IOException,InterruptedException{ new OutputStreamWriter(process.getOutputStream())); //go to htk directory and setup Directory Structure pw.print("( cd "+htk.getAbsolutePath() - +"; mkdir hmm" - +"; mkdir etc" - +"; mkdir feat" - +"; mkdir config" - +"; mkdir lab" + +"; mkdir -p hmm" + +"; mkdir -p etc" + +"; mkdir -p feat" + +"; mkdir -p config" + +"; mkdir -p lab" +"; exit )\n"); pw.flush(); //shut down pw.close(); process.waitFor(); - process.exitValue(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } } @@ -295,6 +345,13 @@ private void createPhoneDictionary() throws Exception{ new FileOutputStream (new File(getProp(HTDIR)+File.separator +"etc"+File.separator +"htk"+".phone2.list"))); + PrintWriter phoneListOut2 = new PrintWriter( + new FileOutputStream (new File(getProp(HTDIR)+File.separator + +"etc"+File.separator + +"htk"+".phone3.list"))); + + + String phoneSeq; //transLabelOut.println("#!MLF!#"); Set phonesList = allophoneSet.getAllophoneNames(); @@ -308,22 +365,48 @@ private void createPhoneDictionary() throws Exception{ //phon = "sp"; } phon = replaceTrickyPhones(phon); - transLabelOut.println(phon+" "+phon); + transLabelOut.println(phon+" "+phon); phoneListOut.println(phon); phoneListOut1.println(phon); + phoneListOut2.println(phon); } transLabelOut.println("sil"+" "+"sil"); phoneListOut.println("sil"); phoneListOut1.println("sil"); + phoneListOut2.println("sil"); + transLabelOut.println("ssil"+" "+"ssil"); phoneListOut1.println("ssil"); + phoneListOut2.println("ssil"); + + transLabelOut.println("sp"+" "+"sp"); + phoneListOut2.println("sp"); + + + // commented G End Word + // commented G Start Word + /*phoneListOut.println("GEW"); + phoneListOut1.println("GEW"); + phoneListOut2.println("GEW"); + + phoneListOut.println("GSW"); + phoneListOut1.println("GSW"); + phoneListOut2.println("GSW"); + + transLabelOut.println("sp"+" "+"GEW"); + transLabelOut.println("sp"+" "+"GEW sp"); + transLabelOut.println("sp"+" "+"sp GSW"); + transLabelOut.println("sp"+" "+"GSW"); + transLabelOut.println("sp"+" "+"GEW sp GSW");*/ + transLabelOut.flush(); transLabelOut.close(); phoneListOut.flush(); phoneListOut.close(); phoneListOut1.flush(); phoneListOut1.close(); - + phoneListOut2.flush(); + phoneListOut2.close(); } @@ -335,6 +418,7 @@ private void createPhoneDictionary() throws Exception{ private void createRequiredFiles() throws Exception{ // Creating mkphones0.led file, which insert and delete pauses + // FABIO TODO: check is it used? File file = new File(getProp(HTDIR)+File.separator+"config"+File.separator+"mkphone0.led"); PrintWriter pw = new PrintWriter(new FileWriter(file)); pw.println("EX"); @@ -343,18 +427,33 @@ private void createRequiredFiles() throws Exception{ pw.flush(); pw.close(); + + // Creating mkphones1.led file, which delete multiple sp pauses + file = new File(getProp(HTDIR)+File.separator+"config"+File.separator+"mkphone1.led"); + pw = new PrintWriter(new FileWriter(file)); + pw.println("ME sp sp sp"); + pw.println("ME sil sil sp"); + pw.println("ME sil sp sil"); + pw.println("ME ssil ssil sp"); + pw.println("ME ssil sp ssil"); + pw.flush(); + pw.close(); + + + // creating a HTK Feature Extraction config file file = new File(getProp(HTDIR)+File.separator+"config"+File.separator+"featEx.conf"); pw = new PrintWriter(new FileWriter(file)); pw.println("SOURCEFORMAT = WAV # Gives the format of speech files "); - pw.println("TARGETKIND = MFCC_0 #Identifier of the coefficients to use"); + pw.println("TARGETKIND = " + Extract_FEAT + " #Identifier of the coefficients to use"); pw.println("WINDOWSIZE = 100000.0 # = 10 ms = length of a time frame"); - pw.println("TARGETRATE = 50000.0 # = 5 ms = frame periodicity"); + pw.println("TARGETRATE = 50000.0 # = 5 ms = frame periodicity"); pw.println("NUMCEPS = 12 # Number of MFCC coeffs (here from c1 to c12)"); pw.println("USEHAMMING = T # Use of Hamming funtion for windowing frames"); pw.println("PREEMCOEF = 0.97 # Pre-emphasis coefficient"); pw.println("NUMCHANS = 26 # Number of filterbank channels"); pw.println("CEPFILTER = 22 # Length of ceptral filtering"); + pw.println("ENORMALISE = F # Energy measure normalization (sentence level)"); pw.flush(); pw.close(); @@ -364,15 +463,16 @@ private void createRequiredFiles() throws Exception{ file = new File(getProp(HTDIR)+File.separator+"config"+File.separator+"htkTrain.conf"); pw = new PrintWriter(new FileWriter(file)); - pw.println("TARGETKIND = MFCC_0 #Identifier of the coefficients to use"); - pw.println("PARAMETERKIND = MFCC_0"); + pw.println("TARGETKIND = " + Train_FEAT + " #Identifier of the coefficients to use"); + pw.println("PARAMETERKIND = " + Train_FEAT + ""); pw.println("WINDOWSIZE = 100000.0 # = 10 ms = length of a time frame"); - pw.println("TARGETRATE = 50000.0 # = 5 ms = frame periodicity"); + pw.println("TARGETRATE = 50000.0 # = 5 ms = frame periodicity"); pw.println("NUMCEPS = 12 # Number of MFCC coeffs (here from c1 to c12)"); pw.println("USEHAMMING = T # Use of Hamming funtion for windowing frames"); pw.println("PREEMCOEF = 0.97 # Pre-emphasis coefficient"); pw.println("NUMCHANS = 26 # Number of filterbank channels"); pw.println("CEPFILTER = 22 # Length of ceptral filtering"); + pw.println("ENORMALISE = F # Energy measure normalization (sentence level)"); pw.flush(); pw.close(); @@ -382,8 +482,7 @@ private void createRequiredFiles() throws Exception{ pw = new PrintWriter(new FileWriter(file)); for (int i=0; i"); - pw.println(" "+numStates+" "+vectorSize+" "); - for(int i=2;i "+i); + pw.println(" "+numStates+" "+vectorSize+" <" + Train_FEAT + ">"); + + + + for(int state=2;state "+state); + //pw.println(" " + num_mixtures_for_state[state-2]); + //for(int mix=1;mix<=num_mixtures_for_state[state-2];mix++){ + //pw.println(" " + mix + " " + 1.0/num_mixtures_for_state[state-2]); + pw.println(" "+vectorSize); for(int j=0;j "+numStates); pw.println("0.0 1.0 0.0 0.0 0.0"); pw.println("0.0 0.6 0.4 0.0 0.0"); @@ -433,27 +550,100 @@ private void createRequiredFiles() throws Exception{ pw.flush(); pw.close(); - // Creating Silence modeling config file + // Creating SSIL Silence modeling config file -// creating a hmm protofile - file = new File(getProp(HTDIR)+File.separator+"config"+File.separator+"sil.hed"); pw = new PrintWriter(new FileWriter(file)); - pw.println("AT 2 4 0.2 {sil.transP}\n"); - pw.println("AT 4 2 0.2 {sil.transP}\n"); - //pw.println("AT 1 3 0.3 {ssil.transP}\n"); - //pw.println("TI silst {sil.state[3],ssil.state[2]}\n"); - pw.println("AT 2 4 0.2 {ssil.transP}\n"); - pw.println("AT 4 2 0.2 {ssil.transP}\n"); + pw.println("AT 2 4 0.2 {sil.transP}"); + pw.println("AT 4 2 0.2 {sil.transP}"); + //pw.println("AT 1 3 0.3 {ssil.transP}"); + //pw.println("TI silst {sil.state[3],ssil.state[2]}"); + pw.println("AT 2 4 0.2 {ssil.transP}"); + pw.println("AT 4 2 0.2 {ssil.transP}"); + // added tied states... + pw.println("TI silst2 {sil.state[2],ssil.state[2]}"); + pw.println("TI silst3 {sil.state[3],ssil.state[3]}"); + pw.println("TI silst4 {sil.state[4],ssil.state[4]}"); + + + pw.flush(); + pw.close(); + + // Creating SP Silence modeling config file + + + file = new File(getProp(HTDIR)+File.separator+"config"+File.separator+"sil_vp.hed"); + pw = new PrintWriter(new FileWriter(file)); + + //sp 3 state case: + //pw.println("AT 1 3 0.3 {sp.transP}"); + //pw.println("TI ssilst {ssil.state[3],sp.state[2]}"); + + //sp 5 state case: + pw.println("AT 1 5 0.3 {sp.transP}"); + pw.println("TI ssilst2 {ssil.state[2],sp.state[2]}"); + pw.println("TI ssilst3 {ssil.state[3],sp.state[3]}"); + pw.println("TI ssilst4 {ssil.state[4],sp.state[4]}"); + pw.flush(); pw.close(); + } + /** - * create phone master label file + * delete sp repetition on htk.phones3.mlf + * @throws Exception + */ + private void delete_multiple_sp_in_PhoneMLFile(String filein, String fileout) throws Exception{ + String hled = getProp(HTKDIR)+File.separator + +"bin"+File.separator+"HLEd"; + File htkFile = new File(hled); + if (!htkFile.exists()) { + throw new RuntimeException("File "+htkFile.getAbsolutePath()+" does not exist"); + } + //String phoneMLF3 = getProp(HTDIR)+File.separator + // +"etc"+File.separator+"htk.phones3.mlf"; + + // String phoneMLFtmpin = getProp(HTDIR)+File.separator + // +"etc"+File.separator+"htk.phones3_tmp_in.mlf"; + + //String phoneMLFtmpout = getProp(HTDIR)+File.separator + // +"etc"+File.separator+"htk.phones3_tmp_out.mlf"; + + String mkphoneLED = getProp(HTDIR)+File.separator + +"config"+File.separator+"mkphone1.led"; + + Runtime rtime = Runtime.getRuntime(); + //get a shell + Process process = rtime.exec("/bin/bash"); + //get an output stream to write to the shell + PrintWriter pw = new PrintWriter( + new OutputStreamWriter(process.getOutputStream())); + System.out.println("( " + +hled+" -l '*' -i " + +fileout+" "+mkphoneLED+" "+filein + +"; exit )\n"); + + pw.print("( " + +hled+" -l '*' -i " + +fileout+" "+mkphoneLED+" "+filein + //+"; " + +"; exit )\n"); + pw.flush(); + //shut down + pw.close(); + process.waitFor(); + process.exitValue(); + + } + + + /** + * create phone master label file (Not used?) * @throws Exception */ private void createPhoneMLFile() throws Exception{ @@ -492,7 +682,11 @@ private void createPhoneMLFile() throws Exception{ //shut down pw.close(); process.waitFor(); - process.exitValue(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } } @@ -518,17 +712,21 @@ private void featureExtraction() throws Exception { //get an output stream to write to the shell PrintWriter pw = new PrintWriter( new OutputStreamWriter(process.getOutputStream())); - System.out.println("( " - +hcopy+" -T 1 -C "+configFile+" -S "+listFile + System.out.println("( cd "+getProp(HTDIR) +"; " + +hcopy+" -T 1 -C "+configFile+" -S "+listFile+" > log_featureExtraction.txt" +"; exit )\n"); - pw.print("( " - +hcopy+" -T 1 -C "+configFile+" -S "+listFile+" > log.txt" + pw.print("( cd "+getProp(HTDIR) +"; " + +hcopy+" -T 1 -C "+configFile+" -S "+listFile+" > log_featureExtraction.txt" +"; exit )\n"); pw.flush(); //shut down pw.close(); process.waitFor(); - process.exitValue(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } } /** @@ -557,22 +755,29 @@ private void initialiseHTKTrain() throws Exception{ System.out.println("( cd "+getProp(HTDIR) +" ; mkdir hmm/hmm-dummy ; " +" mkdir hmm/hmm-final ; " - +hcompv+" -C "+configFile+" -f 0.01 -m -S "+listFile + +hcompv+" " + HTK_SO + " -C "+configFile+" -f 0.01 -m -S "+listFile +" -M "+getProp(HTDIR)+File.separator+"hmm/hmm-dummy " - +getProp(HTDIR)+File.separator+"config"+File.separator+"htk.proto"+" > log.txt" + +getProp(HTDIR)+File.separator+"config"+File.separator+"htk.proto"+" > log_initialiseHTKTrain.txt" +"; exit )\n"); pw.print("( cd "+getProp(HTDIR) +" ; mkdir hmm/hmm-dummy ; " +" mkdir hmm/hmm-final ; " - +hcompv+" -C "+configFile+" -f 0.01 -m -S "+listFile + +hcompv+" " + HTK_SO + " -C "+configFile+" -f 0.01 -m -S "+listFile +" -M "+getProp(HTDIR)+File.separator+"hmm/hmm-dummy " - +getProp(HTDIR)+File.separator+"config"+File.separator+"htk.proto"+" > log.txt" + +getProp(HTDIR)+File.separator+"config"+File.separator+"htk.proto"+" > log_initialiseHTKTrain.txt" +"; exit )\n"); pw.flush(); //shut down pw.close(); + process.waitFor(); - process.exitValue(); + + + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } } @@ -613,144 +818,408 @@ private void createTrainFile() throws Exception { new OutputStreamWriter(process.getOutputStream())); System.out.println("( cd "+getProp(HTDIR) - +"; sh etc"+File.separator+"htkTrainScript.sh" + +"; sh etc"+File.separator+"htkTrainScript.sh"+" > log_htkTrainScript.txt" +"; exit )\n"); pw.print("( cd "+getProp(HTDIR) - +"; sh etc"+File.separator+"htkTrainScript.sh" + +"; sh etc"+File.separator+"htkTrainScript.sh"+" > log_htkTrainScript.txt" +"; exit )\n"); pw.flush(); //shut down pw.close(); process.waitFor(); - process.exitValue(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } PrintWriter macroFile = new PrintWriter( new FileOutputStream (new File(hmmDir+"hmm0"+File.separator+"macros"))); - macroFile.println("~o\n"+" 13\n"+""); + macroFile.println("~o\n"+" 13\n"+"<" + Train_FEAT + ">"); macroFile.println(FileUtils.getFileAsString(new File(hmmDir+"hmm-dummy"+File.separator+"vFloors"), "ASCII")); macroFile.flush(); macroFile.close(); } + // TODO: check why log is empty! /** * Flat-start initialization for automatic labeling * @throws Exception */ private void herestTraining() throws Exception{ - + String herest = getProp(HTKDIR)+File.separator - +"bin"+File.separator+"HERest"; + +"bin"+File.separator+"HERest"; String hhed = getProp(HTKDIR)+File.separator - +"bin"+File.separator+"HHEd"; - + +"bin"+File.separator+"HHEd"; + File htkFile = new File(herest); if (!htkFile.exists()) { throw new RuntimeException("File "+htkFile.getAbsolutePath()+" does not exist"); } - + String configFile = getProp(HTDIR)+File.separator - +"config"+File.separator+"htkTrain.conf"; + +"config"+File.separator+"htkTrain.conf"; String hhedconf = getProp(HTDIR)+File.separator - +"config"+File.separator+"sil.hed"; + +"config"+File.separator+"sil.hed"; + + String hhedconf_vp = getProp(HTDIR)+File.separator + +"config"+File.separator+"sil_vp.hed"; + + String trainList = getProp(HTDIR)+File.separator - +"etc"+File.separator+"htkTrain.list"; + +"etc"+File.separator+"htkTrain.list"; String phoneList = getProp(HTDIR)+File.separator - +"etc"+File.separator+"htk.phone.list"; - + +"etc"+File.separator+"htk.phone.list"; + String hmmDir = getProp(HTDIR)+File.separator - +"hmm"+File.separator; + +"hmm"+File.separator; String phoneMlf = getProp(HTDIR)+File.separator - +"etc"+File.separator+"htk.phones.mlf"; - - - - for(int i=1;i<=MAX_ITERATIONS;i++){ - - System.out.println("Iteration number: "+i); - - File hmmItDir = new File(hmmDir+"hmm"+i); - if(!hmmItDir.exists()) hmmItDir.mkdir(); - - - Runtime rtime = Runtime.getRuntime(); + +"etc"+File.separator+"htk.phones.mlf"; + + int BEST_ITERATION = MAX_ITERATIONS; + int SP_ITERATION = -1; + int VP_ITERATION = -1; + int change_mix_iteration = -1; + for(int iteration=1;iteration<=MAX_ITERATIONS;iteration++){ + + System.out.println("Iteration number: "+iteration); + + File hmmItDir = new File(hmmDir+"hmm"+iteration); + if(!hmmItDir.exists()) hmmItDir.mkdir(); + + + Runtime rtime = Runtime.getRuntime(); //get a shell - Process process = rtime.exec("/bin/bash"); + Process process = rtime.exec("/bin/bash"); //get an output stream to write to the shell - PrintWriter pw = new PrintWriter( - new OutputStreamWriter(process.getOutputStream())); - - if(i == SP_ITERATION){ - insertShortPause(i); - String oldMacro = hmmDir+"hmm"+(i-1)+File.separator+"macros"; - String newMacro = hmmDir+"hmm"+i+File.separator+"macros"; - FileUtils.copy(oldMacro,newMacro); - continue; - } - - if(i == (SP_ITERATION+1)){ - - phoneMlf = getProp(HTDIR)+File.separator - +"etc"+File.separator+"htk.phones2.mlf"; - phoneList = getProp(HTDIR)+File.separator - +"etc"+File.separator+"htk.phone2.list"; + PrintWriter pw = new PrintWriter( + new OutputStreamWriter(process.getOutputStream())); + + if(PHASE_NUMBER==0){ + + if(iteration == (SP_ITERATION+1)){ + + phoneMlf = getProp(HTDIR)+File.separator + +"etc"+File.separator+"htk.phones2.mlf"; + phoneList = getProp(HTDIR)+File.separator + +"etc"+File.separator+"htk.phone2.list"; + + System.out.println("( cd "+getProp(HTDIR) +"; " + +hhed+" " + HTK_SO + " -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"macros" + +" -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"hmmdefs" + +" -M "+hmmDir+"hmm"+iteration+" "+hhedconf+" "+phoneList +" >> log_herestTraining_"+iteration+".txt" + +"; exit )\n"); + pw.println("( cd "+getProp(HTDIR) +"; " + +hhed+" " + HTK_SO + " -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"macros" + +" -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"hmmdefs" + +" -M "+hmmDir+"hmm"+iteration+" "+hhedconf+" "+phoneList +" >> log_herestTraining_"+iteration+".txt" + +"; exit )\n"); + pw.flush(); + //shut down + pw.close(); + process.waitFor(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } + + // copy of logProbFrame_array in current iteration + logProbFrame_array.add(logProbFrame_array.get(iteration-2)); + epsilon_array.add(100000000.0); + + //now we enter in PHASE 1 + PHASE_NUMBER = 1; + System.out.println("Now we enter in PHASE:" + PHASE_NUMBER); + continue; + } + + // check epsilon_array + if (iteration > 2){ + if(epsilon_array.get(iteration-2) < epsilon_PHASE[PHASE_NUMBER] || iteration == MAX_SP_ITERATION){ + SP_ITERATION = iteration; + insertShortPause(iteration); + String oldMacro = hmmDir+"hmm"+(iteration-1)+File.separator+"macros"; + String newMacro = hmmDir+"hmm"+iteration+File.separator+"macros"; + FileUtils.copy(oldMacro,newMacro); + + // copy of logProbFrame_array in current iteration + logProbFrame_array.add(logProbFrame_array.get(iteration-2)); + epsilon_array.add(100000000.0); + continue; + } + } + } + + ///----------------- + if(PHASE_NUMBER==1){ + if(iteration == (VP_ITERATION+1)){ + phoneMlf = getProp(HTDIR)+File.separator + +"etc"+File.separator+"htk.phones3.mlf"; + phoneList = getProp(HTDIR)+File.separator + +"etc"+File.separator+"htk.phone3.list"; + + System.out.println("( cd "+getProp(HTDIR) +"; " + +hhed+" " + HTK_SO + " -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"macros" + +" -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"hmmdefs" + +" -M "+hmmDir+"hmm"+iteration+" "+hhedconf_vp+" "+phoneList +" >> log_herestTraining_"+iteration+".txt" + +"; exit )\n"); + pw.println("( cd "+getProp(HTDIR) +"; " + +hhed+" " + HTK_SO + " -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"macros" + +" -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"hmmdefs" + +" -M "+hmmDir+"hmm"+iteration+" "+hhedconf_vp+" "+phoneList +" >> log_herestTraining_"+iteration+".txt" + +"; exit )\n"); + pw.flush(); + //shut down + pw.close(); + process.waitFor(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } + + // copy of logProbFrame_array in current iteration + logProbFrame_array.add(logProbFrame_array.get(iteration-2)); + epsilon_array.add(100000000.0); + + //now we enter in PHASE 2 + PHASE_NUMBER = 2; + System.out.println("Now we enter in PHASE:" + PHASE_NUMBER); + continue; + } - System.out.println("( " - +hhed+" -H "+hmmDir+"hmm"+(i-1)+File.separator+"macros" - +" -H "+hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs" - +" -M "+hmmDir+"hmm"+i+" "+hhedconf+" "+phoneList - +"; exit )\n"); - pw.println("( " - +hhed+" -H "+hmmDir+"hmm"+(i-1)+File.separator+"macros" - +" -H "+hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs" - +" -M "+hmmDir+"hmm"+i+" "+hhedconf+" "+phoneList - +"; exit )\n"); - pw.flush(); - //shut down - pw.close(); - process.waitFor(); - process.exitValue(); - continue; - } + // check epsilon_array + if(epsilon_array.get(iteration-2) < epsilon_PHASE[PHASE_NUMBER] || iteration == MAX_VP_ITERATION){ + VP_ITERATION = iteration; + insertVirtualPauseThreeStates(iteration); + String oldMacro = hmmDir+"hmm"+(iteration-1)+File.separator+"macros"; + String newMacro = hmmDir+"hmm"+iteration+File.separator+"macros"; + FileUtils.copy(oldMacro,newMacro); + + // copy of logProbFrame_array in current iteration + logProbFrame_array.add(logProbFrame_array.get(iteration-2)); + epsilon_array.add(100000000.0); + continue; + } + } + - System.out.println("( " - +herest+" -C "+configFile+" -I "+phoneMlf + ///----------------- + if(PHASE_NUMBER==2){ + // check epsilon_array + // the following change_mix_iteration + 2 is used to allow more than one restimation after insertion of new mixture + // Because just after the insertion the delta can be negative + + if(((iteration != change_mix_iteration + 2) && (epsilon_array.get(iteration-2) < epsilon_PHASE[PHASE_NUMBER])) || iteration == MAX_MIX_ITERATION){ + change_mix_iteration = iteration; + MAX_MIX_ITERATION = -1; + + // Creating Increasing mixture config file dynamic iteration + String hhedconf_mix = getProp(HTDIR)+File.separator+"config"+File.separator+"sil_mix_"+iteration+".hed"; + File file = new File(hhedconf_mix); + PrintWriter hhed_conf_pw = new PrintWriter(new FileWriter(file)); + + //MU 3 {*.state[2].mix} + Boolean need_other_updates = false; + for (int state=0; state> log_herestTraining_"+iteration+".txt" + +"; exit )\n"); + pw.println("( cd "+getProp(HTDIR) +"; " + +hhed+" " + HTK_SO + " -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"macros" + +" -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"hmmdefs" + +" -M "+hmmDir+"hmm"+iteration+" "+hhedconf_mix+" "+phoneList +" >> log_herestTraining_"+iteration+".txt" + +"; exit )\n"); + pw.flush(); + //shut down + pw.close(); + process.waitFor(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } + + // copy of logProbFrame_array in current iteration + logProbFrame_array.add(logProbFrame_array.get(iteration-2)); + epsilon_array.add(100000000.0); + continue; + } + } + + ///----------------- + if(PHASE_NUMBER==3){ + // check epsilon_array + if(((iteration != change_mix_iteration + 2) && (epsilon_array.get(iteration-2) < epsilon_PHASE[PHASE_NUMBER])) || iteration == MAX_ITERATIONS) + { + int last = iteration-1; + int previus_last = iteration-2; + + System.out.println("Average log prob per frame has not beeen increased too much respect the previus iteration:"); + System.out.println("Average log prob per frame at last HREST iteration ("+ last +")-> " + logProbFrame_array.get(iteration-2)); + System.out.println("Average log prob per frame at previus HREST iteration ("+ previus_last +")-> " + logProbFrame_array.get(iteration-3)); + System.out.println("Delta -> " + epsilon_array.get(iteration-2)); + System.out.println("Suggested Action -> stop the iterations."); + + if (logProbFrame_array.get(iteration-3) > logProbFrame_array.get(iteration-2)) + { + BEST_ITERATION = iteration - 2; + } else + { + BEST_ITERATION = iteration - 1; + } + break; + } + } + + //Normal HEREST: + System.out.println("( cd "+getProp(HTDIR) +"; " + +herest+" " + HTK_SO + " -C "+configFile+" -I "+phoneMlf +" -t 250.0 150.0 1000.0" +" -S "+trainList - +" -H "+hmmDir+"hmm"+(i-1)+File.separator+"macros" - +" -H "+hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs"+" -M "+hmmDir+"hmm"+i+" "+phoneList + +" -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"macros" + +" -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"hmmdefs"+" -M "+hmmDir+"hmm"+iteration+" "+phoneList +" >> log_herestTraining_"+iteration+".txt" +"; exit )\n"); - - pw.println("( " - +herest+" -C "+configFile+" -I "+phoneMlf + + pw.println("( cd "+getProp(HTDIR) +"; " + +herest+" " + HTK_SO + " -C "+configFile+" -I "+phoneMlf +" -t 250.0 150.0 1000.0" +" -S "+trainList - +" -H "+hmmDir+"hmm"+(i-1)+File.separator+"macros" - +" -H "+hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs"+" -M "+hmmDir+"hmm"+i+" "+phoneList + +" -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"macros" + +" -H "+hmmDir+"hmm"+(iteration-1)+File.separator+"hmmdefs"+" -M "+hmmDir+"hmm"+iteration+" "+phoneList +" >> log_herestTraining_"+iteration+".txt" +"; exit )\n"); - pw.flush(); - //shut down - pw.close(); - process.waitFor(); - process.exitValue(); + pw.flush(); + //shut down + pw.close(); + process.waitFor(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } + + // update average_log_prob_per_frame and deltas + check_average_log_prob_per_frame(iteration); + + System.out.println("Delta average log prob per frame to respect previus iteration-> " + epsilon_array.get(iteration-1)); + System.out.println("Current PHASE: " + PHASE_NUMBER); + System.out.println("Current state and number of mixtures (for each phoneme): " + Arrays.toString(current_number_of_mixtures)); + + System.out.println("---------------------------------------"); } - String oldMacro = hmmDir+"hmm"+MAX_ITERATIONS+File.separator+"macros"; - String newMacro = hmmDir+"hmm-final"+File.separator+"macros"; - FileUtils.copy(oldMacro,newMacro); - - String oldHmmdefs = hmmDir+"hmm"+MAX_ITERATIONS+File.separator+"hmmdefs"; - String newHmmdefs = hmmDir+"hmm-final"+File.separator+"hmmdefs"; - FileUtils.copy(oldHmmdefs, newHmmdefs); - - } - + + + System.out.println("***********\n"); + System.out.println("BEST ITERATION: " + BEST_ITERATION); + System.out.println("COPYNING BEST ITERATION FILES IN hmm-final directory"); + System.out.println("logProbFrame_array:" + logProbFrame_array.toString()); + + System.out.println("epsilon_array:" + epsilon_array.toString()); + + System.out.println("***********\n"); + + String oldMacro = hmmDir+"hmm"+BEST_ITERATION+File.separator+"macros"; + String newMacro = hmmDir+"hmm-final"+File.separator+"macros"; + FileUtils.copy(oldMacro,newMacro); + + String oldHmmdefs = hmmDir+"hmm"+BEST_ITERATION+File.separator+"hmmdefs"; + String newHmmdefs = hmmDir+"hmm-final"+File.separator+"hmmdefs"; + FileUtils.copy(oldHmmdefs, newHmmdefs); + + } + + private void check_average_log_prob_per_frame(int iteration) throws IOException { + // TODO Auto-generated method stub + + String filename = getProp(HTDIR)+ File.separator + "log_herestTraining_"+iteration+".txt"; + + // Reestimation complete - average log prob per frame = xxx + Pattern p = Pattern.compile("^.*average log prob per frame = (.*)$"); + + FileReader fr = new FileReader(filename); + + BufferedReader reader = new BufferedReader(fr); + String st = ""; + Matcher m; + Boolean found = false; + + while ((st = reader.readLine()) != null) { + //System.out.println(st); + m = p.matcher(st); + if (m.find()) { + Double logProbFrame = Double.parseDouble(m.group(1)); + logProbFrame_array.add(logProbFrame); + + System.out.println("Average log prob per frame at iteration " + iteration + " from file is " + m.group(1) + " equal to " + logProbFrame); + + found = true; + break; + } + } + System.out.flush(); + + if (!found) + { + throw new RuntimeException("No match of average log prob per frame in " + filename); + } + + //double epsilon = 0.0001; + double delta; + + if (iteration > 1) + delta = logProbFrame_array.get(iteration-1) - logProbFrame_array.get(iteration-2); + else + delta = 10000000.0; + + epsilon_array.add(delta); + + + } + private void insertShortPause(int i) throws Exception{ String hmmDir = getProp(HTDIR)+File.separator +"hmm"+File.separator; boolean okprint = false; boolean silprint = false; + System.out.println("F1:"+hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs"); + System.out.println("F2:"+hmmDir+"hmm"+i+File.separator+"hmmdefs" ); String line, spHmmDef=""; // File hmmDef = new File(hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs"); @@ -792,9 +1261,160 @@ private void insertShortPause(int i) throws Exception{ hmmDef.close(); + + + } + + /* + * Add sp model copying the centre state of ssil + */ + private void insertVirtualPause(int i) throws Exception{ + String hmmDir = getProp(HTDIR)+File.separator + +"hmm"+File.separator; + boolean okprint = false; + boolean okprint2 = false; + boolean silprint = false; + System.out.println("F1:"+hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs"); + System.out.println("F2:"+hmmDir+"hmm"+i+File.separator+"hmmdefs" ); + + String line, spHmmDef=""; + // File hmmDef = new File(hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs"); + BufferedReader hmmDef + = new BufferedReader( + new FileReader( + hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs")); + while((line = hmmDef.readLine()) != null){ + + if(line.matches("^.*\"ssil\".*$")){ + okprint = true; + spHmmDef += "~h \"sp\"\n"; + spHmmDef += "\n"; + spHmmDef += " 3\n"; + spHmmDef += " 2\n"; + continue; + } + //TODO: add + if (okprint && line.matches("^.* 3.*$")){ + okprint2 = true; + continue; + } + + if (okprint && okprint2 & line.matches("^.* 4.*$")){ + okprint = false; + okprint2 = false; + continue; + } + + if (okprint && okprint2) { + spHmmDef += line+"\n"; + } + + } + + spHmmDef += " 3\n"; + spHmmDef += "0. 1. 0.\n"; + spHmmDef += "0. 0.9 0.1\n"; + spHmmDef += "0. 0. 0. \n"; + spHmmDef += "\n"; + + + hmmDef.close(); + + hmmDef = new BufferedReader( + new FileReader( + hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs")); + PrintWriter newHmmDef = new PrintWriter( + new FileWriter(hmmDir+"hmm"+i+File.separator+"hmmdefs")); + + while((line = hmmDef.readLine()) != null){ + newHmmDef.println(line.trim()); + } + newHmmDef.println(spHmmDef); + newHmmDef.flush(); + newHmmDef.close(); + hmmDef.close(); + } + + + + /* + * Add sp model copying the 3 states of ssil + * remeber to use appropiate AT and TI + */ + private void insertVirtualPauseThreeStates(int i) throws Exception{ + String hmmDir = getProp(HTDIR)+File.separator + +"hmm"+File.separator; + boolean okprint = false; + boolean okprint2 = false; + boolean silprint = false; + System.out.println("F1:"+hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs"); + System.out.println("F2:"+hmmDir+"hmm"+i+File.separator+"hmmdefs" ); + + String line, spHmmDef=""; + // File hmmDef = new File(hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs"); + BufferedReader hmmDef + = new BufferedReader( + new FileReader( + hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs")); + while((line = hmmDef.readLine()) != null){ + + if(line.matches("^.*\"ssil\".*$")){ + okprint = true; + spHmmDef += "~h \"sp\"\n"; + spHmmDef += "\n"; + spHmmDef += " 5\n"; + spHmmDef += " 2\n"; + continue; + } + //TODO: add + if (okprint && line.matches("^.* 2.*$")){ + okprint2 = true; + continue; + } + + if (okprint && okprint2 & line.matches("^.*.*$")){ + okprint = false; + okprint2 = false; + continue; + } + + if (okprint && okprint2) { + spHmmDef += line+"\n"; + } + + } + + /*spHmmDef += " 3\n"; + spHmmDef += "0. 1. 0.\n"; + spHmmDef += "0. 0.9 0.1\n"; + spHmmDef += "0. 0. 0. \n"; + spHmmDef += "\n";*/ + spHmmDef += "\n"; + + hmmDef.close(); + + hmmDef = new BufferedReader( + new FileReader( + hmmDir+"hmm"+(i-1)+File.separator+"hmmdefs")); + PrintWriter newHmmDef = new PrintWriter( + new FileWriter(hmmDir+"hmm"+i+File.separator+"hmmdefs")); + + while((line = hmmDef.readLine()) != null){ + newHmmDef.println(line.trim()); + } + newHmmDef.println(spHmmDef); + newHmmDef.flush(); + newHmmDef.close(); + hmmDef.close(); } + + + + + + /** * Force Align database for Automatic labels * @throws Exception @@ -802,7 +1422,7 @@ private void insertShortPause(int i) throws Exception{ private void hviteAligning() throws Exception{ String hvite = getProp(HTKDIR)+File.separator - +"bin"+File.separator+"HVite"; + +"bin"+File.separator+"HVite"; // -A -D -V -T 1 "; // to add -A -D -V -T 1 in every function File htkFile = new File(hvite); if (!htkFile.exists()) { throw new RuntimeException("File "+htkFile.getAbsolutePath()+" does not exist"); @@ -811,16 +1431,35 @@ private void hviteAligning() throws Exception{ +"config"+File.separator+"htkTrain.conf"; String listFile = getProp(HTDIR)+File.separator +"etc"+File.separator+"htkTrain.list"; + + // Virtual sp change_ phoneList should be a member? + // Without sp: + /*String phoneList = getProp(HTDIR)+File.separator + +"etc"+File.separator+"htk.phone2.list";*/ + + // Whit sp: + String phoneList = getProp(HTDIR)+File.separator - +"etc"+File.separator+"htk.phone2.list"; + +"etc"+File.separator+"htk.phone3.list"; + + String hmmDef = getProp(HTDIR)+File.separator +"hmm"+File.separator +"hmm-final"+File.separator+"hmmdefs"; String macros = getProp(HTDIR)+File.separator +"hmm"+File.separator +"hmm-final"+File.separator+"macros"; + + // Virtual sp change_ phoneMlf should be a member? + + // Without sp: + /*String phoneMlf = getProp(HTDIR)+File.separator + +"etc"+File.separator+"htk.phones2.mlf";*/ + // Whit sp: String phoneMlf = getProp(HTDIR)+File.separator - +"etc"+File.separator+"htk.phones2.mlf"; + +"etc"+File.separator+"htk.phones3.mlf"; + + String alignedMlf = getProp(HTDIR)+File.separator +"aligned.mlf"; String phoneDict = getProp(HTDIR)+File.separator @@ -831,27 +1470,34 @@ private void hviteAligning() throws Exception{ //get a shell Process process = rtime.exec("/bin/bash"); //get an output stream to write to the shell + + //when no sp use (-m)! + PrintWriter pw = new PrintWriter( new OutputStreamWriter(process.getOutputStream())); - System.out.println("( " - +hvite+" -b sil -l "+labDir+" -o W -C "+configFile - +" -a -H "+macros+" -H "+hmmDef+" -i "+alignedMlf+" -m -t 250.0 -y lab" + System.out.println("( cd "+getProp(HTDIR) +"; " + +hvite+" " + HTK_SO + " -b sil -l "+labDir+" -o W -C "+configFile + +" -a -H "+macros+" -H "+hmmDef+" -i "+alignedMlf+" -t 250.0 -y lab" +" -I "+phoneMlf+" -S "+listFile - +" "+phoneDict+" "+phoneList + +" "+phoneDict+" "+phoneList +" > log_hviteAligning.txt" +"; exit )\n"); - pw.println("( " - +hvite+" -b sil -l "+labDir+" -o W -C "+configFile - +" -a -H "+macros+" -H "+hmmDef+" -i "+alignedMlf+" -m -t 250.0 -y lab" + pw.println("( cd "+getProp(HTDIR) +"; " + +hvite+" " + HTK_SO + " -b sil -l "+labDir+" -o W -C "+configFile + +" -a -H "+macros+" -H "+hmmDef+" -i "+alignedMlf+" -t 250.0 -y lab" +" -I "+phoneMlf+" -S "+listFile - +" "+phoneDict+" "+phoneList + +" "+phoneDict+" "+phoneList +" > log_hviteAligning.txt" +"; exit )\n"); pw.flush(); //shut down pw.close(); process.waitFor(); - process.exitValue(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } } @@ -892,21 +1538,25 @@ private void htkExtraModels() throws Exception{ //get an output stream to write to the shell PrintWriter pw = new PrintWriter( new OutputStreamWriter(process.getOutputStream())); - System.out.println("( " + System.out.println("( cd "+getProp(HTDIR) +"; " +hlstats+" -T 1 -C "+configFile+" -b "+bigFile - +" -o "+phoneList+" "+phoneMlf + +" -o "+phoneList+" "+phoneMlf+" > log_hlstats.txt" +"; exit )\n"); - pw.println("( " + pw.println("( cd "+getProp(HTDIR) +"; " +hlstats+" -T 1 -C "+configFile+" -b "+bigFile - +" -o "+phoneList+" "+phoneMlf + +" -o "+phoneList+" "+phoneMlf+" > log_hlstats.txt" +"; exit )\n"); pw.flush(); //shut down pw.close(); process.waitFor(); - process.exitValue(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } String fileDict = FileUtils.getFileAsString(new File(phoneDict), "ASCII"); PrintWriter augPhoneDict = new PrintWriter(new FileWriter(phoneAugDict)); @@ -931,14 +1581,14 @@ private void htkExtraModels() throws Exception{ //get an output stream to write to the shell pw = new PrintWriter( new OutputStreamWriter(process.getOutputStream())); - System.out.println("( " + System.out.println("( cd "+getProp(HTDIR) +"; " +hbuild+" -T 1 -C "+configFile+" -n "+bigFile - +" "+phoneAugList+" "+netFile + +" "+phoneAugList+" "+netFile+" > log_hbuild.txt" +"; exit )\n"); - pw.println("( " + pw.println("( cd "+getProp(HTDIR) +"; " +hbuild+" -T 1 -C "+configFile+" -n "+bigFile - +" "+phoneAugList+" "+netFile + +" "+phoneAugList+" "+netFile+" > log_hbuild.txt" +"; exit )\n"); @@ -946,7 +1596,11 @@ private void htkExtraModels() throws Exception{ //shut down pw.close(); process.waitFor(); - process.exitValue(); + // check exit value + if (process.exitValue() != 0) { + BufferedReader errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); + throw new MaryConfigurationException(errorReader.readLine()); + } } @@ -965,18 +1619,25 @@ private void getPhoneSequence() throws Exception { new FileOutputStream (new File(outputDir+"/"+"htk.phones.mlf"))); PrintWriter transLabelOut1 = new PrintWriter( new FileOutputStream (new File(outputDir+"/"+"htk.phones2.mlf"))); + PrintWriter transLabelOut2 = new PrintWriter( + new FileOutputStream (new File(outputDir+"/"+"htk.phones3.mlf"))); + String phoneSeq; transLabelOut.println("#!MLF!#"); transLabelOut1.println("#!MLF!#"); + transLabelOut2.println("#!MLF!#"); for (int i=0; i200) {print "file:" arr[1] " duration:" arr[2]} }' nohup.out + * gawk 'match($0, /^(.*): a sp.*duration: ([0-9]+\.[0-9]+) ms.*$/, arr) {if (arr[2]>400) {print $0} }' nohup.out + */ + + } + + } + + if (phoneSeg.equals("sil") || phoneSeg.equals("ssil") || phoneSeg.equals("sp")) + phoneSeg = "_"; + + pw.println(tStamp/10000000 + " 125 " + phoneSeg); } pw.flush(); pw.close(); - } for (int i=0; i