Adding tests

secti6n · Jun 13, 2016 · d3c4fc2 · d3c4fc2
1 parent 9eaaba1
commit d3c4fc2
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 17 deletions.
diff --git a/Scripts/pytest.ini b/Scripts/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+python_files = *.py
diff --git a/...e/Readers/CNTKTextFormatReader/txt2ctf.py → Scripts/txt2ctf.py b/...e/Readers/CNTKTextFormatReader/txt2ctf.py → Scripts/txt2ctf.py
@@ -22,39 +22,39 @@ class Txt2CftConverter:
        Each token for a stream should be inside the corresponding dictionary file, a token per line, so the line number of the token becomes
        the numeric index written into the cntk text format output file"""
 
-    def __init__(self, dictionaryFiles, inputFiles, output, streamSeparator, comment):
-        self.dictionaryFiles = dictionaryFiles
-        self.inputFiles = inputFiles
+    def __init__(self, dictionaries, inputs, output, streamSeparator, comment):
+        self.dictionaries = dictionaries
+        self.inputs = inputs
         self.streamSeparator = streamSeparator
         self.output = output
         self.comment = comment
 
     def convert(self):
         dictionaries = self._createDictionaries()
-        self._convertInputFiles(dictionaries)
+        self._convertInputs(dictionaries)
 
     def _createDictionaries(self):
         dictionaries = []
-        for dic in self.dictionaryFiles:
+        for dic in self.dictionaries:
             dictionaries.append(self._createDictionary(dic))
         return dictionaries
 
-    def _createDictionary(self, dictionaryFile):
+    def _createDictionary(self, dictionary):
         result = {}
         counter = 0
-        for line in open(dictionaryFile):
+        for line in dictionary:
             line = line.rstrip('\r\n').strip('\t ')
             result[line] = counter
             counter += 1
         return result
 
-    def _convertInputFiles(self, dictionaries):
-        if len(self.inputFiles) == 0:
-            return self._convertInputFile(dictionaries, sys.stdin)
-        for inputFile in self.inputFiles:
-            self._convertInputFile(dictionaries, open(inputFile))
+    def _convertInputs(self, dictionaries):
+        if len(self.inputs) == 0:
+            return self._convertInput(dictionaries, sys.stdin)
+        for input in self.inputs:
+            self._convertInput(dictionaries, input)
 
-    def _convertInputFile(self, dictionaries, input):
+    def _convertInput(self, dictionaries, input):
         sequenceId = 0
         for line in input:
             line = line.rstrip('\r\n')
@@ -83,10 +83,10 @@ def _convertStreams(self, dictionaries, streams, sequenceId):
                     continue
                 token = tokenizedStreams[streamIndex][sampleIndex]
                 value = dictionaries[streamIndex][token]
+                self.output.write(self.streamSeparator)
                 self.output.write("|S" + str(streamIndex) + " "+ str(value) + ":1")
                 if self.comment:
                     self.output.write("|# " + token)
-                self.output.write(self.streamSeparator)
             self.output.write("\n")
 
 if __name__ == "__main__":
@@ -98,15 +98,45 @@ def _convertStreams(self, dictionaries, streams, sequenceId):
     parser.add_argument('--input', help='Name of the inputs files, stdin if not given', default="", required=False)
     args = parser.parse_args()
 
-    # cleaning dictionaryFiles from commas
+    # creating dictionaries
     dictionaryFiles = "".join(str(x) for x in args.map).split(",")    
-    inputFiles = []
+    dictionaries = open(d) for d in dictionaryFiles
+
+    # creating inputs
+    inputs = [sys.stdin]
     if args.input != "":
         inputFiles = "".join(str(x) for x in args.input).split(",")
+        inputs = open(i) for i in inputFiles
 
+    # creating outputs
     output = sys.stdout
     if args.output != "":
         output = open(args.output, "w")
 
-    converter = Txt2CftConverter(dictionaryFiles, inputFiles, output, args.sep, args.comment == "True")
+    converter = Txt2CftConverter(dictionaries, inputs, output, args.sep, args.comment == "True")
     converter.convert()
+
+# Test
+import StringIO
+
+def test_sanityCheck():
+    dictionary1 = StringIO.StringIO()
+    dictionary1.write("hello\nmy\nworld\nof\nnothing\n")
+
+    dictionary2 = StringIO.StringIO()
+    dictionary2.write("let\nme\nbe\nclear\nabout\nit\n")
+
+    input = StringIO.StringIO()
+    input.write("hello my\tclear about\nworld of\tit let clear\n")
+
+    output = StringIO.StringIO()
+    converter = Txt2CftConverter([dictionary1, dictionary2], [input], output, "\t", False)
+
+    expectedOutput = StringIO.StringIO()
+    expectedOutput.write("0\t|S0 0:1\t|S1 3:1\n")
+    expectedOutput.write("0\t|S0 1:1\t|S1 4:1\n")
+    expectedOutput.write("1\t|S0 2:1\t|S1 5:1\n")
+    expectedOutput.write("1\t|S0 3:1\t|S1 0:1\n")
+    expectedOutput.write("1\t\t|S1 3:1")
+
+    assert expectedOutput.content() == output.content()
diff --git a/Tests/EndToEndTests/ScriptTest/baseline.txt b/Tests/EndToEndTests/ScriptTest/baseline.txt
@@ -0,0 +1 @@
+__COMPLETED__
diff --git a/Tests/EndToEndTests/ScriptTest/run-test b/Tests/EndToEndTests/ScriptTest/run-test
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+. $TEST_ROOT_DIR/run-test-common
+
+SCRIPTSDIR=$TEST_ROOT_DIR/../../Scripts
+
+pushd $SCRIPTSDIR
+echo Current dir: $SCRIPTSDIR
+
+py.test
+
+if [ "$?" -eq "0" ]; then
+  echo "__COMPLETED__"
+fi
+
+popd
+
diff --git a/Tests/EndToEndTests/ScriptTest/testcases.yml b/Tests/EndToEndTests/ScriptTest/testcases.yml
@@ -0,0 +1,13 @@
+dataDir: .
+
+tags:
+    # running on linux BVT job in 'E' (Examples) leg
+    - bvt-e (build_sku == 'gpu')
+    # running on linux Nightly job in 'E' (Examples) leg
+    - nightly-e (build_sku == 'gpu')
+
+testCases:
+  PyTest run must finish with error code 0 (outputs __COMPLETED__ in that case):
+    patterns:
+      - __COMPLETED__
+