一枚NLPer小菜鸡

全连接神经网络-FNN

1
import numpy as np
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
class Node:
#该类为所有其他图节点类的父类
def __init__(self,inputs=[]):
#定义每个节点的输入和输出
self.inputs = inputs
self.outputs = []

#每个节点都是其输入节点的输出节点
for n in self.inputs:
n.outputs.append(self)
# set 'self' node as inbound_nodes's outbound_nodes

self.value = None

self.gradients = {}
# keys are the inputs to this node,and
#their values are the partials of this node with
# respect to that input.
# \partial{node}{input_i}

def forward(self):
#前向传播函数,继承该类的其他类会覆写该函数
"""
Forward propagation.
Compute the output value based on 'inbound_nodes' and store the
result in self.value
"""
raise NotImplemented

def backward(self):
#反向传播函数,继承该类的其他类会覆写该函数
raise NotImplemented


class Input(Node):
#输入节点,包括神经网络输入节点,权重节点,和偏差节点
def __init__(self):
"""
An Input node has no inbound nodes,
So no need to pass anythinto the Node instantiator.
"""
Node.__init__(self)

def forward(self, value=None):
"""
Only input node is the node where the value may be passed
as anargument to forward().
All other node implementations should get the value of the
previous node from self.inbound_nodes

Example:
val0:self.inbound_nodes[0].value
"""
#定义节点数值
if value is not None:
self.value =value
#It's is input node,when need to forward,this node initiate
#self's value.
# Input subclass just holds a value,such as a data feature or
# model parameter(weight/bias)
def backward(self):
#计算节点梯度
self.gradients = {self:0}# initialization
for n in self.outputs:
#以下计算该节点的输出节点对该节点的梯度
grad_cost = n.gradients[self]
self.gradients[self] =grad_cost*1

# input --> N1,N2
#\partial L / \partial N
# ==> \partial L / partial N1 * \ partial N1 / \partial N

class Add(Node):
def __init__(self,*nodes):
Node.__init__(self,nodes)

def forward(self):
self.value = sum(map(lambda n:n.value,self.inputs))
# when execute forward, this node cacultae value as defined


class Linear(Node):
#全连接网络层的计算
def __init__(self,nodes,weights,bias):
Node.__init__(self,[nodes,weights,bias])

def forward(self):
#前向传播的计算 y=w*x + b
inputs = self.inputs[0].value
weights = self.inputs[1].value
bias = self.inputs[2].value

self.value =np.dot(inputs,weights) + bias

def backward(self):
#反向传播计算
# initial a partial for each of the inbound_nodes.
self.gradients = {n:np.zeros_like(n.value) for n in self.inputs}

for n in self.outputs:
# Get the partial of the cost w.r.t this node.
grad_cost = n.gradients[self]

self.gradients[self.inputs[0]] = np.dot(grad_cost,self.inputs[1].value.T)
self.gradients[self.inputs[1]] = np.dot(self.inputs[0].value.T,grad_cost)
self.gradients[self.inputs[2]] = np.sum(grad_cost,axis=0,keepdims=False)
# WX + B / W ==> X
# WX + B / X ==> W

class Sigmoid(Node):
#定义sigmod函数
def __init__(self,node):
Node.__init__(self,[node])

def _sigmoid(self,x):
return 1./(1+np.exp(-1*x))
def forward(self):
#前向 即sigmoid函数计算
self.x = self.inputs[0].value # [0] input is a list
self.value = self._sigmoid(self.x)

def backward(self):
#反向传播计算梯度
self.partial = self._sigmoid(self.x) * (1 -self._sigmoid(self.x))

# y = 1/(1+ e^-x)
# y'= 1/(1 + e^-x) (1 - 1/(1 + e^-x))

self.gradients = {n:np.zeros_like(n.value) for n in self.inputs}

for n in self.outputs:
grad_cost = n.gradients[self] # Get the partial of the cost with respect to this node

self.gradients[self.inputs[0]] = grad_cost * self.partial
# use * to keep all the dimension same!.

class MSE(Node):
# 定义平均平方误差
def __init__(self,y,a):
Node.__init__(self,[y,a])

def forward(self):
#前向传播计算
y = self.inputs[0].value.reshape(-1,1)
a = self.inputs[1].value.reshape(-1,1)
assert(y.shape == a.shape)

self.m = self.inputs[0].value.shape[0]
self.diff = y -a

self.value = np.mean(self.diff**2)

def backward(self):
#反向计算相应的梯度
self.gradients[self.inputs[0]] = ( 2 / self.m) * self.diff
self.gradients[self.inputs[1]] = ( -2 /self.m) * self.diff

def forward_and_backward(outputnode,graph):
# execute all the forward method of sorted_nodes.
## In practice,it's common to feed in mutiple data example in each forward pass rather than just 1. Because the example can be
## processed in parallel.The number of examples is called batch size.
for n in graph:
n.forward()
# each node execute forward, get self.value based on the topological sort result.
for n in graph[::-1]:
n.backward()
# return outputnode.value

### v -> a -> C
## b -> C
## b -> v - a -> C
## v -> v -> a -> C

def topological_sort(feed_dict):
"""
Sort generic nodes in topological order using Kahn's Algorithm.
'feed_dict': A dictionary where the key is a 'Input' node and the value is the respective value feed to that node.
Returns a list of sorted nodes.
"""

input_nodes = [n for n in feed_dict.keys()]

G = {}
nodes = [n for n in input_nodes]
while len(nodes)>0:
n = nodes.pop(0)
if n not in G:
G[n] = {'in':set(),'out':set()}
for m in n.outputs:
if m not in G:
G[m] = {'in':set(),'out':set()}
G[n]['out'].add(m)
G[m]['in'].add(n)
nodes.append(m)

L =[]
S = set(input_nodes)
while len(S) >0:
n = S.pop()

if isinstance(n,Input):
n.value= feed_dict[n]
## if n is Input Node,setn'value as
## feed_dict[n]
## else, n's value is caculate as its
## inbounds
L.append(n)
for m in n.outputs:
G[n]['out'].remove(m)
G[m]['in'].remove(n)
# if no other incoming edges add to S
if len(G[m]['in']) == 0:
S.add(m)
return L

def sgd_update(trainables,learning_rate =1e-2):
#there are so many other update / optimigation methods
#such as Adam,Mom,
for t in trainables:
t.value += -1 * learning_rate * t.gradients[t]
1
from sklearn.datasets import load_boston
1
data = load_boston()
1
losses = []
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
Check out the new network architecture and dataset!
Notice that the weights and biases are
generated randomly.
No need to change anything,but feel free to tweak
to test your network, play around with the epoches, batch size,etc!
"""

import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle,resample
#from minflow import *

#Load data
data =load_boston()
X_ = data['data']
_ = data['target']

# Normalize data

X_ = (X_ - np.mean(X_,axis=0)) / np.std(X_,axis =0)

n_features =X_.shape[1]
n_hidden = 10
W1_ = np.random.randn(n_features,n_hidden)
b1_ = np.zeros(n_hidden)
W2_ = np.random.randn(n_hidden,1)
b2_ = np.zeros(1)

# Neural network
X,y = Input(),Input()
W1,b1=Input(),Input()
W2,b2 =Input(),Input()

l1 = Linear(X,W1,b1)
s1 = Sigmoid(l1)
l2 = Linear(s1,W2,b2)
cost = MSE(y,l2)

feed_dict = {
X:X_,
y:y_,
W1:W1_,
b1:b1_,
W2:W2_,
b2:b2_
}

epochs = 5000
# Total number of examples
m = X_.shape[0]
batch_size = 16
steps_per_epoch = m // batch_size

graph = topological_sort(feed_dict)
trainables = [W1,b1,W2,b2]

print("Total number of examples = {}".format(m))

#Step 4
for i in range(epochs):
loss = 0
for j in range(steps_per_epoch):
# Step 1
# Randomly sample a batch of examples
X_batch,y_batch =resample(X_,y_,n_samples=batch_size)

# Reset valueof X and y Inputs
X.value = X_batch
y.value = y_batch

# Step 2
_ = None
forward_and_backward(_,graph) #set output node not important.

# Step 3
rate =1e-2

sgd_update(trainables,rate)

loss += graph[-1].value

if i % 100 ==0:
print("Epoch:{},Loss:{:.3f}".format(i+1,loss/steps_per_epoch))
losses.append(loss)
Total number of examples = 506
Epoch:1,Loss:171.205
Epoch:101,Loss:8.215
Epoch:201,Loss:7.937
Epoch:301,Loss:7.301
Epoch:401,Loss:6.069
Epoch:501,Loss:5.735
Epoch:601,Loss:4.524
Epoch:701,Loss:4.418
Epoch:801,Loss:4.473
Epoch:901,Loss:4.510
Epoch:1001,Loss:3.484
Epoch:1101,Loss:4.627
Epoch:1201,Loss:4.473
Epoch:1301,Loss:4.152
Epoch:1401,Loss:4.831
Epoch:1501,Loss:4.992
Epoch:1601,Loss:4.500
Epoch:1701,Loss:4.706
Epoch:1801,Loss:3.927
Epoch:1901,Loss:4.712
Epoch:2001,Loss:4.262
Epoch:2101,Loss:3.968
Epoch:2201,Loss:4.792
Epoch:2301,Loss:4.106
Epoch:2401,Loss:3.815
Epoch:2501,Loss:4.089
Epoch:2601,Loss:4.376
Epoch:2701,Loss:3.923
Epoch:2801,Loss:5.195
Epoch:2901,Loss:4.273
Epoch:3001,Loss:3.618
Epoch:3101,Loss:3.368
Epoch:3201,Loss:3.754
Epoch:3301,Loss:4.027
Epoch:3401,Loss:3.442
Epoch:3501,Loss:4.029
Epoch:3601,Loss:3.475
Epoch:3701,Loss:3.861
Epoch:3801,Loss:4.248
Epoch:3901,Loss:3.697
Epoch:4001,Loss:4.466
Epoch:4101,Loss:4.746
Epoch:4201,Loss:4.145
Epoch:4301,Loss:3.594
Epoch:4401,Loss:4.501
Epoch:4501,Loss:3.719
Epoch:4601,Loss:4.005
Epoch:4701,Loss:4.331
Epoch:4801,Loss:4.092
Epoch:4901,Loss:3.657
1
2
3
4
def forward(outputNode,graph):
for n in graph:
n.forward()
return outputNode.value
1
forward(l2,graph)
array([[18.41675144],
       [29.47806568],
       [15.32327987],
       [48.89022023],
       [49.92947466],
       [45.52560889],
       [30.32152378],
       [13.889439  ],
       [20.52297098],
       [38.29474339],
       [19.54218589],
       [10.72223761],
       [10.13966276],
       [19.20474889],
       [ 9.715787  ],
       [26.87621257]])
1
import matplotlib.pyplot as plt
1
plt.plot(range(len(losses)),losses)
[<matplotlib.lines.Line2D at 0x1fd125539c8>]

png

1
W2.value
array([[3.78502026],
       [5.48520715],
       [6.6856144 ],
       [5.89625598],
       [5.81552044],
       [3.22412022],
       [8.54504219],
       [2.76550267],
       [9.40747634],
       [5.60113079]])
1
X_ = data['data']
1
X_[0]
array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
       6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
       4.980e+00])
1
import keras
Using TensorFlow backend.
1
2
3
4
5
6
7
8
9
10
from keras.layers import Dense
from keras.models import Sequential

model =Sequential()

model.add(Dense(units = 64,activation='sigmoid',input_dim=13))
model.add(Dense(units = 30,activation='sigmoid',input_dim=64))
model.add(Dense(units=1))

model.compile(loss = 'mse',optimizer='sgd',metrics = ['mse'])
1
model.fit(X_,y_,epochs=5000,batch_size=32)
Epoch 1/5000
506/506 [==============================] - 1s 3ms/step - loss: 158.0243 - mse: 158.0243
Epoch 2/5000
506/506 [==============================] - 0s 91us/step - loss: 81.3887 - mse: 81.3887
Epoch 3/5000
506/506 [==============================] - 0s 87us/step - loss: 79.5072 - mse: 79.5072
Epoch 4/5000
506/506 [==============================] - 0s 
Epoch 2700/5000
506/506 [==============================] - 0s 99us/step - loss: 85.0554 - mse: 85.0554
Epoch 2701/5000
506/506 [==============================] - 0s 142us/step - loss: 84.8442 - mse: 84.8442
Epoch 2702/5000
506/506 [==============================] - 0s 103us/step - loss: 85.1862 - mse: 85.1862
Epoch 2703/5000
506/506 [==============================] - 0s 97us/step - loss: 84.6198 - mse: 84.6198
Epoch 2704/5000
506/506 [==============================] - 0s 89us/step - loss: 85.3531 - mse: 85.3531
Epoch 2705/5000
506/506 [==============================] - 0s 101us/step - loss: 84.8638 - mse: 84.8638
Epoch 2706/5000
506/506 [==============================] - 0s 87us/step - loss: 84.6398 - mse: 84.6398
Epoch 2707/5000
506/506 [==============================] - 0s 154us/step - loss: 84.8038 - mse: 84.8038
Epoch 2708/5000
506/506 [==============================] - 0s 99us/step - loss: 84.6299 - mse: 84.6299
Epoch 2709/5000
506/506 [==============================] - 0s 101us/step - loss: 84.8330 - mse: 84.8330
Epoch 2710/5000
506/506 [==============================] - 0s 130us/step - loss: 85.0682 - mse: 85.0682
Epoch 2711/5000
506/506 [==============================] - 0s 111us/step - loss: 84.6372 - mse: 84.6372
Epoch 2712/5000
506/506 [==============================] - 0s 101us/step - loss: 84.6715 - mse: 84.6715
Epoch 2713/5000
506/506 [==============================] - 0s 128us/step - loss: 85.0228 - mse: 85.0228
Epoch 2714/5000
506/506 [==============================] - 0s 93us/step - loss: 84.8349 - mse: 84.8349
Epoch 2715/5000
506/506 [==============================] - 0s 109us/step - loss: 84.7732 - mse: 84.7732
Epoch 2716/5000
506/506 [==============================] - 0s 113us/step - loss: 84.7862 - mse: 84.7862
Epoch 2717/5000
506/506 [==============================] - 0s 113us/step - loss: 84.5297 - mse: 84.5297
Epoch 2718/5000
506/506 [==============================] - 0s 107us/step - loss: 84.7425 - mse: 84.7425
Epoch 2719/5000
506/506 [==============================] - 0s 103us/step - loss: 85.2987 - mse: 85.2987
Epoch 2720/5000
506/506 [==============================] - 0s 107us/step - loss: 84.6666 - mse: 84.6666
Epoch 2721/5000
506/506 [==============================] - 0s 103us/step - loss: 
506/506 [==============================] - 0s 105us/step - loss: 85.1947 - mse: 85.1947
Epoch 3139/5000
506/506 [==============================] - 0s 97us/step - loss: 84.9491 - mse: 84.9491
Epoch 3140/5000
506/506 [==============================] - 0s 99us/step - loss: 85.3199 - mse: 85.3198
Epoch 3141/5000
 32/506 [>.............................] - ETA: 0s - loss: 114.7459 - mse: 114.7459
O(∩_∩)O哈哈~