(trunk/egs/chime1) added a kaldi recipe for the 1st CHiME challenge (…

…GRID corpus) git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5124 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
vimalmanohar · Jun 8, 2015 · 15c5c3f · 15c5c3f
1 parent ce9a7a1
commit 15c5c3f
Show file tree

Hide file tree

Showing 19 changed files with 780 additions and 0 deletions.
diff --git a/egs/chime1/README.txt b/egs/chime1/README.txt
@@ -0,0 +1,50 @@
+
+This is a kaldi setup for 1st CHiME challenge. See
+http://spandh.dcs.shef.ac.uk/projects/chime/challenge.html
+for more detailed information.
+
+The setup should also work for GRID corpus and 2nd CHiME challenge track 1
+http://spandh.dcs.shef.ac.uk/gridcorpus/
+http://spandh.dcs.shef.ac.uk/chime_challenge/chime2013/
+
+
+Quick instruction:
+
+1) download CHiME1 data
+
+Check the download page http://spandh.dcs.shef.ac.uk/projects/chime/PCC/datasets.html
+Train set
+http://spandh.dcs.shef.ac.uk/projects/chime/PCC/data/PCCdata16kHz_train_reverberated.tar.gz
+Devel set
+http://spandh.dcs.shef.ac.uk/projects/chime/PCC/data/PCCdata16kHz_devel_isolated.tar.gz
+Test set
+http://spandh.dcs.shef.ac.uk/projects/chime/PCC/data/PCCdata16kHz_test_isolated.tar.gz
+
+2) move to Kaldi CHiME1 directory, e.g.,
+
+cd kaldi-trunk/egs/chime1/s5
+
+3a) specify Kaldi directory in path.sh,
+
+export KALDI_ROOT="<your kaldi directory>/kaldi-trunk"
+
+3b) specify CHiME1 signal directory and CHiME1 recogniser directory for your
+username ($USER) in config.sh.
+
+By default, directories data/ exp/ mfcc/ will be created by the recipe in the
+Kaldi CHiME1 recogniser directory. You could link these to directories on a 
+different disk space or specify a different directory in config.sh,
+
+export WAV_ROOT="<your CHiME1 directory>/PCCdata16kHz"
+export REC_ROOT="."
+
+4) execute run.sh
+
+./run.sh
+
+4*) we suggest to use the following command to save the main log file
+
+nohup ./run.sh > run.log
+
+5) You can find result at exp/tri2b/decode_*/keyword_scores.txt
+
diff --git a/egs/chime1/s5/RESULTS b/egs/chime1/s5/RESULTS
@@ -0,0 +1,22 @@
+==== Devel set
+Keyword (letter+digit) recognition accuracy (%)
+-----------------------------------------------------------------
+SNR       -6dB    -3dB    0dB     3dB     6dB     9dB     Average 
+-----------------------------------------------------------------
+Overall   38.00   42.75   54.08   64.50   75.17   83.92   59.74   
+-----------------------------------------------------------------
+Letter    30.17   34.17   44.67   52.17   64.50   74.00   49.94   
+Digit     45.83   51.33   63.50   76.83   85.83   93.83   69.53   
+-----------------------------------------------------------------
+
+==== Test set
+Keyword (letter+digit) recognition accuracy (%)
+-----------------------------------------------------------------
+SNR       -6dB    -3dB    0dB     3dB     6dB     9dB     Average 
+-----------------------------------------------------------------
+Overall   36.25   40.92   53.33   64.58   74.92   83.92   58.99   
+-----------------------------------------------------------------
+Letter    30.50   32.00   44.33   56.00   63.33   75.33   50.25   
+Digit     42.00   49.83   62.33   73.17   86.50   92.50   67.72   
+-----------------------------------------------------------------
+
diff --git a/egs/chime1/s5/cmd.sh b/egs/chime1/s5/cmd.sh
@@ -0,0 +1,39 @@
+# "queue.pl" uses qsub.  The options to it are
+# options to qsub.  If you have GridEngine installed,
+# change this to a queue you have access to.
+# Otherwise, use "run.pl", which will run jobs locally
+# (make sure your --num-jobs options are no more than
+# the number of cpus on your machine.
+
+#a) JHU cluster options
+#export train_cmd="queue.pl -l arch=*64"
+#export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
+#export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
+
+#export cuda_cmd="..."
+
+
+#b) BUT cluster options
+#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
+#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
+#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
+
+#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
+#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
+#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
+
+
+#c) USFD cluster options
+#config="conf/queue_usfd.conf"
+#export train_cmd="queue.pl  --config $config --mem 8G --rmem 4G"
+#export decode_cmd="queue.pl  --config $config --mem 8G --rmem 4G"
+#export mkgraph_cmd="queue.pl  --config $config --mem 8G --rmem 4G"
+#export cuda_cmd="queue.pl  --config $config --mem 24G --rmem 20G --gpu 1 --time 24:00:00"
+
+
+#d) run it locally...
+export train_cmd=run.pl
+export decode_cmd=run.pl
+export cuda_cmd=run.pl
+export mkgraph_cmd=run.pl
+
diff --git a/egs/chime1/s5/conf/mfcc.conf b/egs/chime1/s5/conf/mfcc.conf
@@ -0,0 +1,2 @@
+--use-energy=false   # only non-default option.
+--sample-frequency=16000 #  sampled at 16kHz
diff --git a/egs/chime1/s5/conf/queue_usfd.conf b/egs/chime1/s5/conf/queue_usfd.conf
@@ -0,0 +1,13 @@
+command qsub -v PATH -j y
+option mem=* -l mem=$0,rmem=$0
+option mem=0          # Do not add anything to qsub_opts
+option rmem=* -l rmem=$0
+option rmem=0          # Do not add anything to qsub_opts
+option num_threads=* -pe openmp $0
+option num_threads=1  # Do not add anything to qsub_opts
+option max_jobs_run=* -tc $0
+option time=* -l h_rt=$0
+default gpu=0
+option gpu=0
+option gpu=* -l gpu=$0 -P gpu 
+
diff --git a/egs/chime1/s5/conf/topo.proto b/egs/chime1/s5/conf/topo.proto
@@ -0,0 +1,22 @@
+<Topology> 
+<TopologyEntry> 
+<ForPhones>
+NONSILENCEPHONES
+</ForPhones> 
+<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State> 
+<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State> 
+<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State> 
+<State> 3 </State>
+</TopologyEntry> 
+<TopologyEntry> 
+<ForPhones>
+SILENCEPHONES
+</ForPhones> 
+<State> 0 <PdfClass> 0 <Transition> 0 0.25 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 </State> 
+<State> 1 <PdfClass> 1 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State> 
+<State> 2 <PdfClass> 2 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State> 
+<State> 3 <PdfClass> 3 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State> 
+<State> 4 <PdfClass> 4 <Transition> 4 0.25 <Transition> 5 0.75 </State> 
+<State> 5 </State>
+</TopologyEntry> 
+</Topology> 
diff --git a/egs/chime1/s5/config.sh b/egs/chime1/s5/config.sh
@@ -0,0 +1,13 @@
+case "$USER" in
+"ac1nmx")
+  # CHiME Challenge wav root (after unzipping)...
+  export WAV_ROOT="/data/ac1nmx/data/PCCdata16kHz" 
+
+  # Used by the recogniser for storing data/ exp/ mfcc/ etc
+  export REC_ROOT="." 
+  ;;
+*)
+  echo "Please define WAV_ROOT and REC_ROOT for user $USER"
+  ;;
+esac
+
diff --git a/egs/chime1/s5/input/lexicon.txt b/egs/chime1/s5/input/lexicon.txt
@@ -0,0 +1,51 @@
+BIN B IH N
+LAY L EY
+PLACE P L EY S
+SET S EH T
+RED R EH D
+GREEN G R IY N
+BLUE B L UW
+WHITE W AY T
+AT AE T
+BY B AY
+IN IH N
+WITH W IH DH
+A EY
+B B IY
+C S IY
+D D IY
+E IY
+F EH F
+G JH IY
+H EY CH
+I AY
+J JH AX IY
+K K EY
+L EH L
+M EH M
+N EH N
+O OW
+P P IY
+Q K Y UW
+R AA
+S EH S
+T T IY
+U Y UW
+V V IY
+X EH K S
+Y W AY
+Z Z EH D
+ONE W AH N
+TWO T UW
+THREE TH R IY
+FOUR F AO
+FIVE F AY V
+SIX S IH K S
+SEVEN S EH V N
+EIGHT EY T
+NINE N AY N
+ZERO Z IA R OW
+AGAIN AX G EH N
+NOW N AW
+PLEASE P L IY Z
+SOON S UW N
diff --git a/egs/chime1/s5/local/chime1_prepare_data.sh b/egs/chime1/s5/local/chime1_prepare_data.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+# Copyright 2015  University of Sheffield (Author: Ning Ma)
+# Apache 2.0.
+#
+# This script prepares the data/ directory for the CHiME/GRID corpus
+
+. ./config.sh # Needed for REC_ROOT and WAV_ROOT
+
+# Setup relevant folders
+data="$REC_ROOT/data"
+locdata="$data/local"
+mkdir -p "$locdata"
+utils="utils"
+
+# Setup wav folders
+wav_train="$WAV_ROOT/train/reverberated"
+wav_devel="$WAV_ROOT/devel/isolated"
+wav_test="$WAV_ROOT/test/isolated"
+if [ ! -d $wav_train ]; then
+  echo "Cannot find wav directory $wav_train"
+  echo "Please download the CHiME Challenge Data from"
+  echo "  train set  http://spandh.dcs.shef.ac.uk/projects/chime/PCC/data/PCCdata16kHz_train_reverberated.tar.gz"
+  exit 1;
+fi
+set_list="train"
+mkdir -p "$data/train"
+if [ -d "$wav_devel" ]; then
+  set_list="$set_list devel"
+  mkdir -p "$data/devel"
+fi
+if [ -d "$wav_test" ]; then
+  set_list="$set_list test"
+  mkdir -p "$data/test"
+fi
+echo "Preparing data sets: $set_list"
+
+# Create scp files
+scp="$data/train/wav.scp"
+rm -f "$scp"
+for sid in `seq 34`; do
+  sid2=`printf "s%02d" $sid`
+  ls -1 $wav_train/id$sid/*.wav \
+    | sed "s/\(.*\)\/\(.*\).wav/${sid2}_\2\t\1\/\2.wav/" \
+    | sort >> $scp
+done
+for x in "devel" "test"; do
+  if [ -d "$data/$x" ]; then
+    scp="$data/$x/wav.scp"
+    rm -f "$scp"
+    wav_var="wav_$x"
+    wav_dir="${!wav_var}"
+    for sid in `seq 34`; do
+      sid2=`printf "s%02d" $sid`
+      ls -1 $wav_dir/*/s${sid}_*.wav \
+        | sed "s/\(.*\)\/\(.*\)\/s.*_\(.*\).wav/${sid2}_\3_\2\t\1\/\2\/s${sid}_\3.wav/" \
+        | sort >> $scp 
+    done
+  fi
+done
+
+# Prepare other files in data/setname/
+for x in $set_list; do
+  scp="$data/$x/wav.scp"
+  if [ -f "$scp" ]; then
+    # Create transcription files
+    cut -f1 $scp | local/create_chime1_trans.pl - > "$data/$x/text"
+
+    # Create utt2spk files 
+    # No speaker ID
+    sed 's/\(.*\)\t.*/\1\t\1/' < "$scp" > "$data/$x/utt2spk"
+    # Use speaker ID
+    #sed "s/\(s..\)\(.*\)\t.*/\1\2\t\1/" < "$scp" > "$data/$x/utt2spk"
+
+    # Create spk2utt files
+    cat "$data/$x/utt2spk" | $utils/utt2spk_to_spk2utt.pl > "$data/$x/spk2utt" || exit 1;
+  fi
+done
+
+echo "--> Data preparation succeeded"
+exit 0
diff --git a/egs/chime1/s5/local/chime1_prepare_dict.sh b/egs/chime1/s5/local/chime1_prepare_dict.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Copyright 2015  University of Sheffield (Author: Ning Ma)
+# Apache 2.0.
+#
+# Kaldi scripts for preparing dictionary for the GRID corpus (or CHiME 1)
+
+echo "Preparing dictionary"
+
+. ./config.sh # Needed for REC_ROOT and WAV_ROOT
+
+# Prepare relevant folders
+dict="$REC_ROOT/data/local/dict"
+mkdir -p $dict
+
+utils="utils"
+
+# Copy lexicon
+lexicon="input/lexicon.txt" # phone models
+cp $lexicon $dict/lexicon.txt
+
+# Generate phone list
+sil="SIL"
+phone_list="$dict/phone.list" 
+awk '{for (n=2;n<=NF;n++)print $n;}' $lexicon | sort -u > $phone_list
+echo $sil >> $phone_list
+
+# Create phone lists 
+grep -v -w $sil $phone_list > $dict/nonsilence_phones.txt
+echo $sil > $dict/silence_phones.txt
+echo $sil > $dict/optional_silence.txt
+
+# list of "extra questions"-- empty; we don't  have things like tone or 
+# word-positions or stress markings.
+touch $dict/extra_questions.txt
+
+echo "-->Dictionary preparation succeeded"
+exit 0
diff --git a/egs/chime1/s5/local/chime1_prepare_grammar.sh b/egs/chime1/s5/local/chime1_prepare_grammar.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# Copyright 2015  University of Sheffield (Author: Ning Ma)
+# Apache 2.0.
+#
+# Scripts for preparing grammar for the GRID corpus (or CHiME 1)
+
+echo "Preparing grammar for test"
+
+. ./config.sh # Needed for REC_ROOT and WAV_ROOT
+
+# Setup relevant folders
+lang="$REC_ROOT/data/lang"
+utils="utils"
+
+# Create FST grammar for the GRID
+grammar_cmd="local/create_chime1_grammar.pl"
+
+$grammar_cmd | fstcompile --isymbols=$lang/words.txt --osymbols=$lang/words.txt \
+  --keep_isymbols=false --keep_osymbols=false | fstarcsort --sort_type=ilabel \
+  > $lang/G.fst || exit 1
+
+# Draw the FST
+#echo "fstdraw --isymbols=$lang/words.txt --osymbols=$lang/words.txt $lang/G.fst | dot -Tps > local/G.ps"
+
+echo "--> Grammar preparation succeeded"
+exit 0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		--use-energy=false # only non-default option.
		--sample-frequency=16000 # sampled at 16kHz