trunk: minor, cosmetic changes

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3822 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
vimalmanohar · Mar 30, 2014 · 3cc123e · 3cc123e
1 parent 624bbdd
commit 3cc123e
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 82 deletions.
diff --git a/egs/README.txt b/egs/README.txt
@@ -1,50 +1,13 @@
 
 This directory contains example scripts that demonstrate how to 
 use Kaldi.  Each subdirectory corresponds to a corpus that we have
-example scripts for.  Currently these are all corpora available from
-the Linguistic Data Consortium (LDC).
+example scripts for.
 
-Explanations of the corpora are below.
-Note: the easiest examples to work with are rm/s3 and wsj/s3.
-
- wsj: The Wall Street Journal corpus.  This is a corpus of read
-    sentences from the Wall Street Journal, recorded under clean conditions.
-    The vocabulary is quite large. 
-    Available from the LDC as either: [ catalog numbers LDC93S6A (WSJ0) and LDC94S13A (WSJ1) ]
-    or: [ catalog numbers LDC93S6B (WSJ0) and LDC94S13B (WSJ1) ]
-    The latter option is cheaper and includes only the Sennheiser
-    microphone data (which is all we use in the example scripts).
-
- rm: Resource Management.  Clean speech in a medium-vocabulary task consisting
-    of commands to a (presumably imaginary) computer system.
-    Available from the LDC as catalog number LDC93S3A (it may be possible to
-    get the same data using combinations of other catalog numbers, but this
-    is the one we used).
-
- tidigits: The TI Digits database, available from the LDC (catalog number LDC93S10).
-   This is one of the oldest speech databases; it consists of a bunch of speakers
-   saying digit strings.  It's not considered a "real" task any more, but can be useful
-   for demos, tutorials, and the like.
-
- yesno: This is a simple recipe with some data consisting of a single person
-   saying the words "yes" and "no", that can be downloaded from the Kaldi website.
-   It's a very easy task, but useful for checking that the scripts run, or if
-   you don't yet have any of the LDC data.
-
-
-Recipes in progress (these may be less polished than the ones above).
-
- swbd: Switchboard (from LDC).  A fairly large amount of telephone speech (2-channel, 8kHz
-    sampling rate).
-    This directory is a work in progress.
-
- gp: GlobalPhone (from ELDA).  This is a multilingual speech corpus.
-
- timit: TIMIT (from LDC), which is an old corpus of carefully read speech.  
-    LDC corpous LDC93S1  
-
- voxforge: A recipe for the free speech data available from voxforge.org
-
- hkust: A recipe for HKUST Mandarin Telephone Speech (available from LDC)
+Note: we now have some scripts using free data, including voxforge,
+vystadial_{cz,en} and yesno.  Most of the others are available from
+the Linguistic Data Consortium (LDC), which requires money (unless you
+have a membership).
 
+If you have an LDC membership, probably rm/s5 or wsj/s5 should be your first
+choice to try out the scripts.
 
diff --git a/egs/rm/s5/run.sh b/egs/rm/s5/run.sh
@@ -8,13 +8,13 @@ set -e # exit on error.
 # subdirectories named as follows:
 #    rm1_audio1  rm1_audio2	rm2_audio
 
-#local/rm_data_prep.sh /mnt/matylda2/data/RM 
+#local/rm_data_prep.sh /mnt/matylda2/data/RM
 
-local/rm_data_prep.sh /export/corpora5/LDC/LDC93S3A/rm_comp 
+local/rm_data_prep.sh /export/corpora5/LDC/LDC93S3A/rm_comp
 
-#local/rm_data_prep.sh /home/dpovey/data/LDC93S3A/rm_comp 
+#local/rm_data_prep.sh /home/dpovey/data/LDC93S3A/rm_comp
 
-utils/prepare_lang.sh data/local/dict '!SIL' data/local/lang data/lang 
+utils/prepare_lang.sh data/local/dict '!SIL' data/local/lang data/lang
 
 local/rm_prepare_grammar.sh  # Traditional RM grammar (bigram word-pair)
 local/rm_prepare_grammar_ug.sh || exit 1; # Unigram grammar (gives worse results, but
@@ -26,22 +26,22 @@ featdir=mfcc
 
 
 for x in test_mar87 test_oct87 test_feb89 test_oct89 test_feb91 test_sep92 train; do
-  steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/$x exp/make_mfcc/$x $featdir  
-  steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $featdir  
-  #steps/make_plp.sh data/$x exp/make_plp/$x $featdir 4
+  steps/make_mfcc.sh --nj 8 --cmd "run.pl" data/$x exp/make_mfcc/$x $featdir
+  steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $featdir
+  #steps/make_plp.sh data/$x exp/make_plp/$x $featdir
 done
 
 # Make a combined data dir where the data from all the test sets goes-- we do
 # all our testing on this averaged set.  This is just less hassle.  We
-# regenerate the CMVN stats as one of the speakers appears in two of the 
+# regenerate the CMVN stats as one of the speakers appears in two of the
 # test sets; otherwise tools complain as the archive has 2 entries.
 utils/combine_data.sh data/test data/test_{mar87,oct87,feb89,oct89,feb91,sep92}
-steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $featdir  
+steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $featdir
 
-utils/subset_data_dir.sh data/train 1000 data/train.1k  
+utils/subset_data_dir.sh data/train 1000 data/train.1k
 
 
-steps/train_mono.sh --nj 4 --cmd "$train_cmd" data/train.1k data/lang exp/mono  
+steps/train_mono.sh --nj 4 --cmd "$train_cmd" data/train.1k data/lang exp/mono
 
 #show-transitions data/lang/phones.txt exp/tri2a/final.mdl  exp/tri2a/final.occs | perl -e 'while(<>) { if (m/ sil /) { $l = <>; $l =~ m/pdf = (\d+)/|| die "bad line $l";  $tot += $1; }} print "Total silence count $tot\n";'
 
@@ -58,22 +58,22 @@ steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
 
 # Get alignments from monophone system.
 steps/align_si.sh --nj 8 --cmd "$train_cmd" \
-  data/train data/lang exp/mono exp/mono_ali 
+  data/train data/lang exp/mono exp/mono_ali
 
 # train tri1 [first triphone pass]
 steps/train_deltas.sh --cmd "$train_cmd" \
- 1800 9000 data/train data/lang exp/mono_ali exp/tri1 
+ 1800 9000 data/train data/lang exp/mono_ali exp/tri1
 
 # decode tri1
-utils/mkgraph.sh data/lang exp/tri1 exp/tri1/graph 
+utils/mkgraph.sh data/lang exp/tri1 exp/tri1/graph
 steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
   exp/tri1/graph data/test exp/tri1/decode
 
 #draw-tree data/lang/phones.txt exp/tri1/tree | dot -Tps -Gsize=8,10.5 | ps2pdf - tree.pdf
 
 # align tri1
 steps/align_si.sh --nj 8 --cmd "$train_cmd" \
-  --use-graphs true data/train data/lang exp/tri1 exp/tri1_ali 
+  --use-graphs true data/train data/lang exp/tri1 exp/tri1_ali
 
 # train tri2a [delta+delta-deltas]
 steps/train_deltas.sh --cmd "$train_cmd" 1800 9000 \
@@ -87,71 +87,71 @@ steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
 # train and decode tri2b [LDA+MLLT]
 steps/train_lda_mllt.sh --cmd "$train_cmd" \
   --splice-opts "--left-context=3 --right-context=3" \
- 1800 9000 data/train data/lang exp/tri1_ali exp/tri2b 
+ 1800 9000 data/train data/lang exp/tri1_ali exp/tri2b
 utils/mkgraph.sh data/lang exp/tri2b exp/tri2b/graph
 
 steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
    exp/tri2b/graph data/test exp/tri2b/decode
 
 # Align all data with LDA+MLLT system (tri2b)
 steps/align_si.sh --nj 8 --cmd "$train_cmd" --use-graphs true \
-   data/train data/lang exp/tri2b exp/tri2b_ali 
+   data/train data/lang exp/tri2b exp/tri2b_ali
 
 #  Do MMI on top of LDA+MLLT.
 steps/make_denlats.sh --nj 8 --cmd "$train_cmd" \
-  data/train data/lang exp/tri2b exp/tri2b_denlats 
-steps/train_mmi.sh data/train data/lang exp/tri2b_ali exp/tri2b_denlats exp/tri2b_mmi 
+  data/train data/lang exp/tri2b exp/tri2b_denlats
+steps/train_mmi.sh data/train data/lang exp/tri2b_ali exp/tri2b_denlats exp/tri2b_mmi
 steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" \
    exp/tri2b/graph data/test exp/tri2b_mmi/decode_it4
 steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" \
    exp/tri2b/graph data/test exp/tri2b_mmi/decode_it3
 
 # Do the same with boosting.
 steps/train_mmi.sh --boost 0.05 data/train data/lang \
-   exp/tri2b_ali exp/tri2b_denlats exp/tri2b_mmi_b0.05 
+   exp/tri2b_ali exp/tri2b_denlats exp/tri2b_mmi_b0.05
 steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" \
-   exp/tri2b/graph data/test exp/tri2b_mmi_b0.05/decode_it4 
+   exp/tri2b/graph data/test exp/tri2b_mmi_b0.05/decode_it4
 steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" \
-   exp/tri2b/graph data/test exp/tri2b_mmi_b0.05/decode_it3 
+   exp/tri2b/graph data/test exp/tri2b_mmi_b0.05/decode_it3
 
 # Do MPE.
-steps/train_mpe.sh data/train data/lang exp/tri2b_ali exp/tri2b_denlats exp/tri2b_mpe 
+steps/train_mpe.sh data/train data/lang exp/tri2b_ali exp/tri2b_denlats exp/tri2b_mpe
 steps/decode.sh --config conf/decode.config --iter 4 --nj 20 --cmd "$decode_cmd" \
-   exp/tri2b/graph data/test exp/tri2b_mpe/decode_it4 
+   exp/tri2b/graph data/test exp/tri2b_mpe/decode_it4
 steps/decode.sh --config conf/decode.config --iter 3 --nj 20 --cmd "$decode_cmd" \
-   exp/tri2b/graph data/test exp/tri2b_mpe/decode_it3 
+   exp/tri2b/graph data/test exp/tri2b_mpe/decode_it3
 
 
 ## Do LDA+MLLT+SAT, and decode.
-steps/train_sat.sh 1800 9000 data/train data/lang exp/tri2b_ali exp/tri3b 
-utils/mkgraph.sh data/lang exp/tri3b exp/tri3b/graph 
+steps/train_sat.sh 1800 9000 data/train data/lang exp/tri2b_ali exp/tri3b
+utils/mkgraph.sh data/lang exp/tri3b exp/tri3b/graph
 steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
-  exp/tri3b/graph data/test exp/tri3b/decode 
+  exp/tri3b/graph data/test exp/tri3b/decode
 
 (
- utils/mkgraph.sh data/lang_ug exp/tri3b exp/tri3b/graph_ug 
+ utils/mkgraph.sh data/lang_ug exp/tri3b exp/tri3b/graph_ug
  steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
-   exp/tri3b/graph_ug data/test exp/tri3b/decode_ug 
+   exp/tri3b/graph_ug data/test exp/tri3b/decode_ug
 )
 
 
 # Align all data with LDA+MLLT+SAT system (tri3b)
 steps/align_fmllr.sh --nj 8 --cmd "$train_cmd" --use-graphs true \
-  data/train data/lang exp/tri3b exp/tri3b_ali 
+  data/train data/lang exp/tri3b exp/tri3b_ali
 
 ## MMI on top of tri3b (i.e. LDA+MLLT+SAT+MMI)
 steps/make_denlats.sh --config conf/decode.config \
    --nj 8 --cmd "$train_cmd" --transform-dir exp/tri3b_ali \
-  data/train data/lang exp/tri3b exp/tri3b_denlats 
-steps/train_mmi.sh data/train data/lang exp/tri3b_ali exp/tri3b_denlats exp/tri3b_mmi 
+  data/train data/lang exp/tri3b exp/tri3b_denlats
+steps/train_mmi.sh data/train data/lang exp/tri3b_ali exp/tri3b_denlats exp/tri3b_mmi
 
 steps/decode_fmllr.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
   --alignment-model exp/tri3b/final.alimdl --adapt-model exp/tri3b/final.mdl \
-   exp/tri3b/graph data/test exp/tri3b_mmi/decode 
+   exp/tri3b/graph data/test exp/tri3b_mmi/decode
 
 # Do a decoding that uses the exp/tri3b/decode directory to get transforms from.
 steps/decode.sh --config conf/decode.config --nj 20 --cmd "$decode_cmd" \
-  --transform-dir exp/tri3b/decode  exp/tri3b/graph data/test exp/tri3b_mmi/decode2 
+  --transform-dir exp/tri3b/decode  exp/tri3b/graph data/test exp/tri3b_mmi/decode2
 
 
 #first, train UBM for fMMI experiments.
@@ -161,7 +161,7 @@ steps/train_diag_ubm.sh --silence-weight 0.5 --nj 8 --cmd "$train_cmd" \
 # Next, various fMMI+MMI configurations.
 steps/train_mmi_fmmi.sh --learning-rate 0.0025 \
   --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri3b_ali exp/dubm3b exp/tri3b_denlats \
-  exp/tri3b_fmmi_b 
+  exp/tri3b_fmmi_b
 
 for iter in 3 4 5 6 7 8; do
  steps/decode_fmmi.sh --nj 20 --config conf/decode.config --cmd "$decode_cmd" --iter $iter \
@@ -170,7 +170,7 @@ done
 
 steps/train_mmi_fmmi.sh --learning-rate 0.001 \
   --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri3b_ali exp/dubm3b exp/tri3b_denlats \
-  exp/tri3b_fmmi_c 
+  exp/tri3b_fmmi_c
 
 for iter in 3 4 5 6 7 8; do
  steps/decode_fmmi.sh --nj 20 --config conf/decode.config --cmd "$decode_cmd" --iter $iter \
@@ -180,7 +180,7 @@ done
 # for indirect one, use twice the learning rate.
 steps/train_mmi_fmmi_indirect.sh --learning-rate 0.01 --schedule "fmmi fmmi fmmi fmmi mmi mmi mmi mmi" \
   --boost 0.1 --cmd "$train_cmd" data/train data/lang exp/tri3b_ali exp/dubm3b exp/tri3b_denlats \
-  exp/tri3b_fmmi_d 
+  exp/tri3b_fmmi_d
 
 for iter in 3 4 5 6 7 8; do
  steps/decode_fmmi.sh --nj 20 --config conf/decode.config --cmd "$decode_cmd" --iter $iter \

diff --git a/egs/vystadial_cz/s5/local/download_cs_data.sh b/egs/vystadial_cz/s5/local/download_cs_data.sh
@@ -4,6 +4,9 @@
 DATA_ROOT=$1
 
 url=https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11858/00-097C-0000-0023-4670-6/data_voip_cs.tgz
+# This might be faster:
+#url=http://www.openslr.org/resources/6/data_voip_cs.tgz
+
 name=data_voip_cs
 extract_file=106277
 

diff --git a/egs/vystadial_en/s5/local/download_en_data.sh b/egs/vystadial_en/s5/local/download_en_data.sh
@@ -4,6 +4,8 @@
 DATA_ROOT=$1
 
 url=https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11858/00-097C-0000-0023-4671-4/data_voip_en.tgz
+# This might be faster:
+#url=http://www.openslr.org/resources/6/data_voip_en.tgz
 name=data_voip_en
 extract_file=205859