全连接神经网络-FNN

1	import numpy as np

class Node:
    #该类为所有其他图节点类的父类
    def __init__(self,inputs=[]):
        #定义每个节点的输入和输出
        self.inputs = inputs
        self.outputs = []
        
        #每个节点都是其输入节点的输出节点
        for n in self.inputs:
            n.outputs.append(self)
            # set 'self' node as inbound_nodes's outbound_nodes
            
        self.value = None
        
        self.gradients = {}
        # keys are the inputs to this node,and
        #their values are the partials of this node with
        # respect to that input.
        # \partial{node}{input_i}
        
    def forward(self):
        #前向传播函数，继承该类的其他类会覆写该函数
        """
        Forward propagation.
        Compute the output value based on 'inbound_nodes' and store the
        result in self.value
        """
        raise NotImplemented
        
    def backward(self):
        #反向传播函数，继承该类的其他类会覆写该函数
        raise NotImplemented
        
        
class Input(Node):
    #输入节点，包括神经网络输入节点，权重节点，和偏差节点
    def __init__(self):
        """
        An Input node has no inbound nodes,
        So no need to pass anythinto the Node instantiator.
        """
        Node.__init__(self)
        
    def forward(self, value=None):
        """
        Only input node is the node where the value may be passed
        as anargument to forward().
        All other node implementations should get the value of the 
        previous node from self.inbound_nodes
        
        Example:
        val0:self.inbound_nodes[0].value
        """
        #定义节点数值
        if value is not None:
            self.value =value
            #It's is input node,when need to forward,this node initiate
            #self's value.
        # Input subclass just holds a value,such as a data feature or
        # model parameter(weight/bias)
    def backward(self):
        #计算节点梯度
        self.gradients = {self:0}# initialization
        for n in self.outputs:
            #以下计算该节点的输出节点对该节点的梯度
            grad_cost = n.gradients[self]
            self.gradients[self] =grad_cost*1
            
            # input --> N1,N2
            #\partial L / \partial N
            # ==> \partial L / partial N1 * \ partial N1 / \partial N

class Add(Node):
    def __init__(self,*nodes):
        Node.__init__(self,nodes)
    
    def forward(self):
        self.value = sum(map(lambda n:n.value,self.inputs))
        # when execute forward, this node cacultae value as defined

        
class Linear(Node):
    #全连接网络层的计算
    def __init__(self,nodes,weights,bias):
        Node.__init__(self,[nodes,weights,bias])
    
    def forward(self):
        #前向传播的计算 y=w*x + b
        inputs = self.inputs[0].value
        weights = self.inputs[1].value
        bias = self.inputs[2].value
        
        self.value =np.dot(inputs,weights) + bias
        
    def backward(self):
        #反向传播计算
        # initial a partial for each of the inbound_nodes.
        self.gradients = {n:np.zeros_like(n.value) for n in self.inputs}
        
        for n in self.outputs:
            # Get the partial of the cost w.r.t this node.
            grad_cost = n.gradients[self]
            
            self.gradients[self.inputs[0]] = np.dot(grad_cost,self.inputs[1].value.T)
            self.gradients[self.inputs[1]] = np.dot(self.inputs[0].value.T,grad_cost)
            self.gradients[self.inputs[2]] = np.sum(grad_cost,axis=0,keepdims=False)
        # WX + B / W ==> X
        # WX + B / X ==> W
        
class Sigmoid(Node):
    #定义sigmod函数
    def __init__(self,node):
        Node.__init__(self,[node])
    
    def _sigmoid(self,x):
        return 1./(1+np.exp(-1*x))
    def forward(self):
        #前向 即sigmoid函数计算
        self.x = self.inputs[0].value # [0] input is a list
        self.value = self._sigmoid(self.x)
        
    def backward(self):
        #反向传播计算梯度
        self.partial = self._sigmoid(self.x) * (1 -self._sigmoid(self.x))
        
        # y = 1/(1+ e^-x)
        # y'= 1/(1 + e^-x) (1 - 1/(1 + e^-x))
        
        self.gradients = {n:np.zeros_like(n.value) for n in self.inputs}
        
        for n in self.outputs:
            grad_cost = n.gradients[self] # Get the partial of the cost with respect to this node
            
            self.gradients[self.inputs[0]] = grad_cost * self.partial
            # use * to keep all the dimension same!.

class MSE(Node):
    # 定义平均平方误差
    def __init__(self,y,a):
        Node.__init__(self,[y,a])
        
    def forward(self):
        #前向传播计算
        y = self.inputs[0].value.reshape(-1,1)
        a = self.inputs[1].value.reshape(-1,1)
        assert(y.shape == a.shape)
        
        self.m = self.inputs[0].value.shape[0]
        self.diff = y -a
        
        self.value = np.mean(self.diff**2)
    
    def backward(self):
        #反向计算相应的梯度
        self.gradients[self.inputs[0]] = ( 2 / self.m) * self.diff
        self.gradients[self.inputs[1]] = ( -2 /self.m) * self.diff

def forward_and_backward(outputnode,graph):
    # execute all the forward method of sorted_nodes.
    ## In practice,it's common to feed in mutiple data example in each forward pass rather than just 1. Because the example can be
    ## processed in parallel.The number of examples is called batch size.
    for n in graph:
        n.forward()
        # each node execute forward, get self.value based on the topological sort result.
    for n in graph[::-1]:
        n.backward()
    # return outputnode.value

### v -> a -> C
##  b -> C
##  b -> v - a -> C
## v -> v -> a -> C

def topological_sort(feed_dict):
    """
    Sort generic nodes in topological order using Kahn's Algorithm.
    'feed_dict': A dictionary where the key is a 'Input' node and the value is the respective value feed to that node.
    Returns a list of sorted nodes.
    """
    
    input_nodes = [n for n in feed_dict.keys()]
    
    G = {}
    nodes = [n for n in input_nodes]
    while len(nodes)>0:
        n = nodes.pop(0)
        if n not in G:
            G[n] = {'in':set(),'out':set()}
        for m in n.outputs:
            if m not in G:
                G[m] = {'in':set(),'out':set()}
            G[n]['out'].add(m)
            G[m]['in'].add(n)
            nodes.append(m)
    
    L =[]
    S = set(input_nodes)
    while len(S) >0:
        n = S.pop()
        
        if isinstance(n,Input):
            n.value= feed_dict[n]
            ## if n is Input Node,setn'value as
            ## feed_dict[n]
            ## else, n's value is caculate as its
            ## inbounds
        L.append(n)
        for m in n.outputs:
            G[n]['out'].remove(m)
            G[m]['in'].remove(n)
            # if no other incoming edges add to S
            if len(G[m]['in']) == 0:
                S.add(m)
    return L

def sgd_update(trainables,learning_rate =1e-2):
    #there are so many other update / optimigation methods
    #such as Adam,Mom,
    for t in trainables:
        t.value += -1 * learning_rate * t.gradients[t]

$\partial{node}{input_i}$

1	from sklearn.datasets import load_boston

1	data = load_boston()

1	losses = []

"""
Check out the new network architecture and dataset!
Notice that the weights and biases are 
generated randomly.
No need to change anything,but feel free to tweak
to test your network, play around with the epoches, batch size,etc!
"""

import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle,resample
#from minflow import *

#Load data
data =load_boston()
X_ = data['data']
_ = data['target']

# Normalize data

X_ = (X_ - np.mean(X_,axis=0)) / np.std(X_,axis =0)

n_features =X_.shape[1]
n_hidden = 10
W1_ = np.random.randn(n_features,n_hidden)
b1_ = np.zeros(n_hidden)
W2_ = np.random.randn(n_hidden,1)
b2_ = np.zeros(1)

# Neural network
X,y = Input(),Input()
W1,b1=Input(),Input()
W2,b2 =Input(),Input()

l1 = Linear(X,W1,b1)
s1 = Sigmoid(l1)
l2 = Linear(s1,W2,b2)
cost = MSE(y,l2)

feed_dict = {
    X:X_,
    y:y_,
    W1:W1_,
    b1:b1_,
    W2:W2_,
    b2:b2_
}

epochs = 5000
# Total number of examples
m = X_.shape[0]
batch_size = 16
steps_per_epoch = m // batch_size

graph = topological_sort(feed_dict)
trainables = [W1,b1,W2,b2]

print("Total number of examples = {}".format(m))

#Step 4
for i in range(epochs):
    loss = 0
    for j in range(steps_per_epoch):
        # Step 1
        # Randomly sample a batch of examples
        X_batch,y_batch =resample(X_,y_,n_samples=batch_size)
        
        # Reset valueof X and y Inputs
        X.value = X_batch
        y.value = y_batch
        
        # Step 2
        _ = None
        forward_and_backward(_,graph) #set output node not important.
        
        # Step 3
        rate =1e-2
        
        sgd_update(trainables,rate)
        
        loss += graph[-1].value
        
    if i % 100 ==0:
        print("Epoch:{},Loss:{:.3f}".format(i+1,loss/steps_per_epoch))
        losses.append(loss)

Total number of examples = 506
Epoch:1,Loss:171.205
Epoch:101,Loss:8.215
Epoch:201,Loss:7.937
Epoch:301,Loss:7.301
Epoch:401,Loss:6.069
Epoch:501,Loss:5.735
Epoch:601,Loss:4.524
Epoch:701,Loss:4.418
Epoch:801,Loss:4.473
Epoch:901,Loss:4.510
Epoch:1001,Loss:3.484
Epoch:1101,Loss:4.627
Epoch:1201,Loss:4.473
Epoch:1301,Loss:4.152
Epoch:1401,Loss:4.831
Epoch:1501,Loss:4.992
Epoch:1601,Loss:4.500
Epoch:1701,Loss:4.706
Epoch:1801,Loss:3.927
Epoch:1901,Loss:4.712
Epoch:2001,Loss:4.262
Epoch:2101,Loss:3.968
Epoch:2201,Loss:4.792
Epoch:2301,Loss:4.106
Epoch:2401,Loss:3.815
Epoch:2501,Loss:4.089
Epoch:2601,Loss:4.376
Epoch:2701,Loss:3.923
Epoch:2801,Loss:5.195
Epoch:2901,Loss:4.273
Epoch:3001,Loss:3.618
Epoch:3101,Loss:3.368
Epoch:3201,Loss:3.754
Epoch:3301,Loss:4.027
Epoch:3401,Loss:3.442
Epoch:3501,Loss:4.029
Epoch:3601,Loss:3.475
Epoch:3701,Loss:3.861
Epoch:3801,Loss:4.248
Epoch:3901,Loss:3.697
Epoch:4001,Loss:4.466
Epoch:4101,Loss:4.746
Epoch:4201,Loss:4.145
Epoch:4301,Loss:3.594
Epoch:4401,Loss:4.501
Epoch:4501,Loss:3.719
Epoch:4601,Loss:4.005
Epoch:4701,Loss:4.331
Epoch:4801,Loss:4.092
Epoch:4901,Loss:3.657

def forward(outputNode,graph):
    for n in graph:
        n.forward()
    return outputNode.value

1	forward(l2,graph)

array([[18.41675144],
       [29.47806568],
       [15.32327987],
       [48.89022023],
       [49.92947466],
       [45.52560889],
       [30.32152378],
       [13.889439  ],
       [20.52297098],
       [38.29474339],
       [19.54218589],
       [10.72223761],
       [10.13966276],
       [19.20474889],
       [ 9.715787  ],
       [26.87621257]])

1	import matplotlib.pyplot as plt

1	plt.plot(range(len(losses)),losses)

[<matplotlib.lines.Line2D at 0x1fd125539c8>]

W2.value

array([[3.78502026],
       [5.48520715],
       [6.6856144 ],
       [5.89625598],
       [5.81552044],
       [3.22412022],
       [8.54504219],
       [2.76550267],
       [9.40747634],
       [5.60113079]])

1	X_ = data['data']

X_[0]

array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
       6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
       4.980e+00])

1	import keras

Using TensorFlow backend.

from keras.layers import Dense
from keras.models import Sequential

model =Sequential()

model.add(Dense(units = 64,activation='sigmoid',input_dim=13))
model.add(Dense(units = 30,activation='sigmoid',input_dim=64))
model.add(Dense(units=1))

model.compile(loss = 'mse',optimizer='sgd',metrics = ['mse'])

1	model.fit(X_,y_,epochs=5000,batch_size=32)

Epoch 1/5000
506/506 [==============================] - 1s 3ms/step - loss: 158.0243 - mse: 158.0243
Epoch 2/5000
506/506 [==============================] - 0s 91us/step - loss: 81.3887 - mse: 81.3887
Epoch 3/5000
506/506 [==============================] - 0s 87us/step - loss: 79.5072 - mse: 79.5072
Epoch 4/5000
506/506 [==============================] - 0s 
Epoch 2700/5000
506/506 [==============================] - 0s 99us/step - loss: 85.0554 - mse: 85.0554
Epoch 2701/5000
506/506 [==============================] - 0s 142us/step - loss: 84.8442 - mse: 84.8442
Epoch 2702/5000
506/506 [==============================] - 0s 103us/step - loss: 85.1862 - mse: 85.1862
Epoch 2703/5000
506/506 [==============================] - 0s 97us/step - loss: 84.6198 - mse: 84.6198
Epoch 2704/5000
506/506 [==============================] - 0s 89us/step - loss: 85.3531 - mse: 85.3531
Epoch 2705/5000
506/506 [==============================] - 0s 101us/step - loss: 84.8638 - mse: 84.8638
Epoch 2706/5000
506/506 [==============================] - 0s 87us/step - loss: 84.6398 - mse: 84.6398
Epoch 2707/5000
506/506 [==============================] - 0s 154us/step - loss: 84.8038 - mse: 84.8038
Epoch 2708/5000
506/506 [==============================] - 0s 99us/step - loss: 84.6299 - mse: 84.6299
Epoch 2709/5000
506/506 [==============================] - 0s 101us/step - loss: 84.8330 - mse: 84.8330
Epoch 2710/5000
506/506 [==============================] - 0s 130us/step - loss: 85.0682 - mse: 85.0682
Epoch 2711/5000
506/506 [==============================] - 0s 111us/step - loss: 84.6372 - mse: 84.6372
Epoch 2712/5000
506/506 [==============================] - 0s 101us/step - loss: 84.6715 - mse: 84.6715
Epoch 2713/5000
506/506 [==============================] - 0s 128us/step - loss: 85.0228 - mse: 85.0228
Epoch 2714/5000
506/506 [==============================] - 0s 93us/step - loss: 84.8349 - mse: 84.8349
Epoch 2715/5000
506/506 [==============================] - 0s 109us/step - loss: 84.7732 - mse: 84.7732
Epoch 2716/5000
506/506 [==============================] - 0s 113us/step - loss: 84.7862 - mse: 84.7862
Epoch 2717/5000
506/506 [==============================] - 0s 113us/step - loss: 84.5297 - mse: 84.5297
Epoch 2718/5000
506/506 [==============================] - 0s 107us/step - loss: 84.7425 - mse: 84.7425
Epoch 2719/5000
506/506 [==============================] - 0s 103us/step - loss: 85.2987 - mse: 85.2987
Epoch 2720/5000
506/506 [==============================] - 0s 107us/step - loss: 84.6666 - mse: 84.6666
Epoch 2721/5000
506/506 [==============================] - 0s 103us/step - loss: 
506/506 [==============================] - 0s 105us/step - loss: 85.1947 - mse: 85.1947
Epoch 3139/5000
506/506 [==============================] - 0s 97us/step - loss: 84.9491 - mse: 84.9491
Epoch 3140/5000
506/506 [==============================] - 0s 99us/step - loss: 85.3199 - mse: 85.3198
Epoch 3141/5000
 32/506 [>.............................] - ETA: 0s - loss: 114.7459 - mse: 114.7459