make Model(renamed from module) a subclass of Layer

compile / do_init: separate the initialzation and forward propagation for Linear, create name of each layer save_states/load_states/set_states: recursive logic
HaotianLiu123 · May 31, 2020 · 4b7ec13 · 4b7ec13
1 parent ad32bf9
commit 4b7ec13
Show file tree

Hide file tree

Showing 4 changed files with 258 additions and 108 deletions.
diff --git a/new_api.py → proof_of_concept_to_remove.py b/new_api.py → proof_of_concept_to_remove.py
@@ -5,16 +5,32 @@
 from singa import model
 from singa import opt
 
+class DoubleLinear(layer.Layer):
+    def __init__(self, a, b, c):
+        super(DoubleLinear, self).__init__()
+        self.l1 = layer.Linear(a,b)
+        self.l2 = layer.Linear(b,c)
+
+    def __call__(self, x):
+        y = self.l1(x)
+        y = self.l2(y)
+        return y
 
 class MyModel(model.Model):
 
     def __init__(self):
         super(MyModel, self).__init__()
-        self.l1 = layer.Linear(2)
+        self.l1 = layer.Linear(4,2)
+        self.bn1 = layer.BatchNorm2d(2)
+        self.dl1 = DoubleLinear(2,4,2)
         self.optimizer = opt.SGD()
 
     def forward(self, x):
         y = self.l1(x)
+        y = autograd.reshape(y, (y.shape[0], y.shape[1], 1, 1))
+        y = self.bn1(y)
+        y = autograd.reshape(y, (y.shape[0], y.shape[1]))
+        y = self.dl1(y)
         return y
 
     def train_one_batch(self, x, y):
@@ -32,24 +48,30 @@ def optim(self, loss):
 
 
 if __name__ == "__main__":
-    dev = device.create_cuda_gpu()
+    dev = device.create_cuda_gpu_on(7)
     x = tensor.PlaceHolder((2, 4), device=dev)
 
     m = MyModel()
     m.on_device(dev)
+
+    print("compile")
     # m.compile([x], is_train=True, use_graph=True, sequential=True)
     m.compile([x], is_train=True, use_graph=True, sequential=False)
     # m.compile([x], is_train=True, use_graph=False, sequential=False)
 
-    print("compile done")
+    # get params
+    _ = m.get_params()
+    print(_)
+    # set params
+    m.set_params(_)
 
-    _ = m.l1.get_params()
+    # get states
+    print("states")
+    _ = m.get_states()
     print(_)
-    print("get params done")
+    m.set_states(_)
 
+    print("training")
     cx = tensor.PlaceHolder((2, 4), device=dev).gaussian(1, 1)
     cy = tensor.PlaceHolder((2, 2), device=dev).gaussian(1, 1)
-
-    print("start training")
     m.train_one_batch(cx, cy)
-    print("train done")