-
Notifications
You must be signed in to change notification settings - Fork 207
/
Copy pathexample2.py
187 lines (137 loc) · 6.13 KB
/
example2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
"""
Minimal example of using theano and neural network and gradient decent
"""
import theano
import theano.tensor as T
import numpy as np
# import theano.printing as tprint
def shared_dataset(data_xy):
"""
Transform data into theano.shared. This is important for parallelising computations later
"""
data_x, data_y = data_xy
shared_x = theano.shared(np.asarray(data_x, dtype=theano.config.floatX))
shared_y = theano.shared(np.asarray(data_y, dtype=theano.config.floatX))
return shared_x, shared_y
class HiddenLayer:
"""
Implements hidden layer of
"""
def __init__(self, input, n_in, n_nodes):
self.input = input
#: Weight matrix (n_in x n_nodes)
W_values = np.asarray(np.ones((n_in, n_nodes)) * 2, dtype=theano.config.floatX)
self.W = theano.shared(value=W_values, name='W', borrow=True)
#: Bias term
b_values = np.zeros((n_nodes,), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, name='b', borrow=True)
#: Output is just the weighted sum of activations
self.output = T.dot(input, self.W) + self.b
#all the variables that can change during learning
self.params = [self.W, self.b]
class OutputLayer:
"""
Implement last layer of the network. Output values of this layer are the results of the computation.
"""
def __init__(self, input_from_previous_layer, n_in, n_nodes):
#: Weight matrix (n_in x n_nodes)
W_values = np.asarray(np.ones((n_in, n_nodes)) * 2, dtype=theano.config.floatX)
self.W = theano.shared(value=W_values, name='W', borrow=True)
#: Bias term
b_values = np.zeros((n_nodes,), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, name='b', borrow=True)
#output using linear rectifier
self.threshold = 1
lin_output = T.dot(input_from_previous_layer, self.W) + self.b
above_threshold = lin_output > self.threshold
self.output = above_threshold * (lin_output - self.threshold)
#all the variables that can change during learning
self.params = [self.W, self.b]
def errors(self, y):
""" return the error made in predicting the output value
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
"""
# check if y has same dimension of output
if y.ndim != self.output.ndim:
raise TypeError('y should have the same shape as self.output', ('y', y.type, 'output', self.output.type))
return np.abs(T.mean(self.output-y))
class MLP:
"""
Class which implements the classification algorithm (neural network in our case)
"""
def __init__(self, input, n_in, n_hidden, n_out):
#: Hidden layer implements summation
self.hidden_layer = HiddenLayer(input, n_in, n_hidden)
#: Output layer implements summations and rectifier non-linearity
self.output_layer = OutputLayer(self.hidden_layer.output, n_hidden, n_out)
# L1 norm ; one regularization option is to enforce L1 norm to
# be small
self.L1 = abs(self.hidden_layer.W).sum() \
+ abs(self.output_layer.W).sum()
# square of L2 norm ; one regularization option is to enforce
# square of L2 norm to be small
self.L2_sqr = (self.hidden_layer.W ** 2).sum() \
+ (self.output_layer.W ** 2).sum()
self.params = self.hidden_layer.params + self.output_layer.params
def main():
#: Define data sets
#train_set = (np.array([[1, 1], [1, 0], [0, 1], [0, 0]]), np.array([1, 0, 0, 0]))
train_set = (np.array([[[0, 0], [0, 1], [1, 1], [1, 0]], [[0, 0], [0, 1], [1, 1], [1, 0]]]), np.array([[[0],[0], [1], [0]], [[0],[0], [1], [0]]]))
test_set = (np.array([[0, 0], [1, 0]]), np.array([0, 0]))
# Transform them to theano.shared
train_set_x, train_set_y = shared_dataset(train_set)
test_set_x, test_set_y = shared_dataset(test_set)
# This is how you can print weird theano stuff
print train_set_x.eval()
print train_set_y.eval()
# Define some structures to store training data and labels
x = T.matrix('x')
y = T.matrix('y')
index = T.lscalar()
# Define the classification algorithm
classifier = MLP(input=x, n_in=2, n_hidden=1, n_out=1)
#define the cost function using l1 and l2 regularization terms:
cost = classifier.output_layer.errors(y) \
+ 0.0 * classifier.L1 \
+ 0.0 * classifier.L2_sqr
# print type(cost)
# Calculate the derivatives by each existing parameter
gparams = []
for param in classifier.params:
gparam = T.grad(cost, param)
gparams.append(gparam)
# Define how much we need to change the parameter values
learning_rate = 0.02
updates = []
for param, gparam in zip(classifier.params, gparams):
updates.append((param, param - learning_rate * gparam))
print updates
# Train model is a theano.function type object that performs updates on parameter values
train_model = theano.function(inputs=[index], outputs=cost,
updates=updates,
givens={
x: train_set_x[index],
y: train_set_y[index]})
# We construct an object of type theano.function, which we call test_model
test_model = theano.function(
inputs=[index],
outputs=[classifier.hidden_layer.input, classifier.output_layer.output, cost, classifier.hidden_layer.W,
classifier.hidden_layer.b, classifier.output_layer.W, classifier.output_layer.b],
givens={
x: train_set_x[index],
y: train_set_y[index]})
n_train_points = train_set_x.get_value(borrow=True).shape[0]
print "nr of training points is ", n_train_points
for i in range(n_train_points):
result = test_model(i)
print "we calculated something: ", result
# lets train some iterations:
for iteration in range(1000):
cost = train_model(0)
for i in range(n_train_points):
result = test_model(i)
print "we calculated something: ", result
if __name__ == '__main__':
main()