-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNNSAE.py
171 lines (143 loc) · 6.43 KB
/
NNSAE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# Class Definion of the Non-Negative Sparse AutoEncoder (NNSAE)
# The class defines fields that store model parameters and implements methods of the
# NNSAE. The NNSAE uses shared weights. The class is designed to be used with
# non-negative data distributions.
#
###########################################################################
### Copyright (c) 2012 A. Lemme, F. R. Reinhart, CoR-Lab ###
### Univertiy Bielefeld, Germany, http://cor-lab.de ###
###########################################################################
#
# The program is free for non-commercial and academic use. Please contact the
# author if you are interested in using the software for commercial purposes.
# The software must not be modified or distributed without prior permission
# of the authors. Please acknowledge the authors in any academic publications
# that have made use of this code or part of it. Please use this BibTex for
# reference:
#
# A. Lemme, R. F. Reinhart and J. J. Steil.
# "Online learning and generalization of parts-based image representations
# by Non-Negative Sparse Autoencoders". Neural Networks, vol. 33, pp. 194-203, 2012
# doi = "https://doi.org/10.1016/j.neunet.2012.05.003"#
# OR
# A. Lemme, R. F. Reinhart and J. J. Steil. "Efficient online learning of
# a non-negative sparse autoencoder". In Proc. ESANN, 2010.
from __future__ import print_function
from torch.optim.optimizer import Optimizer
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
class Nnsae(nn.Module):
# Constructor for a NNSAE class
# input:
# - inpDim gives the Data sample dimension
# - hidDim specifies size of the hidden layer
# output
# - net is the created Non-Negative Sparse Autoencoder
__constants__ = ['inpDim', 'hidDim']
def __init__(self, inpDim, hidDim, batch_size=1):
torch.autograd.set_detect_anomaly(True)
super(Nnsae, self).__init__()
self.inpDim = inpDim # number of input neurons (and output neurons)
self.hidDim = hidDim # number of hidden neurons
self.nonlin = torch.sigmoid
self.inp = torch.zeros(self.inpDim, 1) # vector holding current input
self.out = torch.zeros(self.hidDim, 1) # output neurons
# neural activity before non-linearity
self.h = torch.zeros(self.hidDim, batch_size) # hidden neuron activation
self.g = torch.zeros(self.hidDim, batch_size) # pre hidden neuron
self.a = Parameter(torch.ones(self.hidDim, 1))
self.b = Parameter(torch.ones(self.hidDim, 1) * (-3.0))
self.weights = Parameter(torch.zeros(inpDim, hidDim))
self.scale = 0.025
self.weights.data = self.scale * (2 * torch.rand(inpDim, hidDim) -
0.5 * torch.ones(inpDim, hidDim)) + self.scale
# learning rate for synaptic plasticity of read-out layer (RO)
self.lrateRO = 0.01
self.regRO = 0.0002 # numerical regularization constant
self.lrateIP = 0.001 # learning rate for intrinsic plasticity (IP)
self.meanIP = 0.2 # desired mean activity, a parameter of IP
self._cuda = False
def __setstate__(self, state):
super(Nnsae, self).__setstate__(state)
@property
def cuda(self):
return self._cuda
def to(self, device):
super().to(device)
self._cuda = device.type != 'cpu'
self.a.to(device)
self.b.to(device)
self.h = self.h.to(device)
self.g = self.g.to(device)
self.out.to(device)
self.inp.to(device)
self.weights.to(device)
def ip(self):
h = self.h
tmp = self.lrateIP * (1.0 - (2.0 + 1.0/self.meanIP) * h + (h**2) / self.meanIP)
self.b += tmp.sum(1, keepdim=True)
a_tmp = self.lrateIP / self.a + self.g * tmp
self.a += a_tmp.sum(1, keepdim=True)
def bpdc(self, error):
# calculate adaptive learning rate
lrate = (self.lrateRO/(self.regRO + (self.h**2).sum(0, keepdim=True))).diag()
self.weights.data += error.mm(lrate * (self.h).t())
def fit(self, inp):
# forward path
out = self.forward(inp)
# bpdc.step()
error = inp - out
self.bpdc(error)
# non negative constraint
self.weights.data[self.weights < 0] = 0
# intrinsic plasticity
self.ip()
return out, error
def forward(self, x):
# Here the forward pass is simply a linear function
g = self.weights.t().mm(x)
h = self.nonlin(self.a * g + self.b)
out = self.weights.mm(h)
self.g[:, :] = g.detach()
self.h[:, :] = h.detach()
return out
def save_state_dict(self, fileName):
torch.save(self.state_dict(), fileName)
def extra_repr(self):
s = ('({inpDim} x {hidDim})')
s += ', Intrinsic plasticity: mean={meanIP}, leaning rate={lrateIP}'
s += '; Synaptic plasticity: learning rate={lrateRO}, epsilon={regRO}'
return s.format(**self.__dict__)
class BackpropagationDecoralation(Optimizer):
# learning rate for synaptic plasticity of read-out layer (RO)
def __init__(self, params, hidden_activations, lrateRO=0.01, regRO=0.0002):
self.lrateRO = lrateRO
self.regRO = regRO # numerical regularization constant
defaults = dict(lrateRO=self.lrateRO,
regRO=self.regRO,
hidden_activations=hidden_activations)
super(BackpropagationDecoralation, self).__init__(params, defaults)
def __setstate__(self, state):
super(BackpropagationDecoralation, self).__setstate__(state)
def step(self, closure=None):
r"""Performs a single optimization step (parameter update).
Arguments:
closure (callable): A closure that reevaluates the model and
returns the loss. Optional for most optimizers.
"""
loss = None
if closure is not None:
loss = closure()
for group in self.param_groups:
for p, h in zip(group['params'], group['hidden_activations']):
grad = p.grad.data
if grad is None:
continue
# calculate adaptive learning rate
lrate = (self.lrateRO/(self.regRO + (h**2).sum(0, keepdim=True)))
d_p = -lrate * grad
p.data.add_(d_p)
return loss