classNode: #该类为所有其他图节点类的父类 def__init__(self,inputs=[]): #定义每个节点的输入和输出 self.inputs = inputs self.outputs = [] #每个节点都是其输入节点的输出节点 for n in self.inputs: n.outputs.append(self) # set 'self' node as inbound_nodes's outbound_nodes self.value = None self.gradients = {} # keys are the inputs to this node,and #their values are the partials of this node with # respect to that input. # \partial{node}{input_i} defforward(self): #前向传播函数,继承该类的其他类会覆写该函数 """ Forward propagation. Compute the output value based on 'inbound_nodes' and store the result in self.value """ raiseNotImplemented defbackward(self): #反向传播函数,继承该类的其他类会覆写该函数 raiseNotImplemented classInput(Node): #输入节点,包括神经网络输入节点,权重节点,和偏差节点 def__init__(self): """ An Input node has no inbound nodes, So no need to pass anythinto the Node instantiator. """ Node.__init__(self) defforward(self, value=None): """ Only input node is the node where the value may be passed as anargument to forward(). All other node implementations should get the value of the previous node from self.inbound_nodes Example: val0:self.inbound_nodes[0].value """ #定义节点数值 if value isnotNone: self.value =value #It's is input node,when need to forward,this node initiate #self's value. # Input subclass just holds a value,such as a data feature or # model parameter(weight/bias) defbackward(self): #计算节点梯度 self.gradients = {self:0}# initialization for n in self.outputs: #以下计算该节点的输出节点对该节点的梯度 grad_cost = n.gradients[self] self.gradients[self] =grad_cost*1 # input --> N1,N2 #\partial L / \partial N # ==> \partial L / partial N1 * \ partial N1 / \partial N
classAdd(Node): def__init__(self,*nodes): Node.__init__(self,nodes) defforward(self): self.value = sum(map(lambda n:n.value,self.inputs)) # when execute forward, this node cacultae value as defined
classLinear(Node): #全连接网络层的计算 def__init__(self,nodes,weights,bias): Node.__init__(self,[nodes,weights,bias]) defforward(self): #前向传播的计算 y=w*x + b inputs = self.inputs[0].value weights = self.inputs[1].value bias = self.inputs[2].value self.value =np.dot(inputs,weights) + bias defbackward(self): #反向传播计算 # initial a partial for each of the inbound_nodes. self.gradients = {n:np.zeros_like(n.value) for n in self.inputs} for n in self.outputs: # Get the partial of the cost w.r.t this node. grad_cost = n.gradients[self] self.gradients[self.inputs[0]] = np.dot(grad_cost,self.inputs[1].value.T) self.gradients[self.inputs[1]] = np.dot(self.inputs[0].value.T,grad_cost) self.gradients[self.inputs[2]] = np.sum(grad_cost,axis=0,keepdims=False) # WX + B / W ==> X # WX + B / X ==> W classSigmoid(Node): #定义sigmod函数 def__init__(self,node): Node.__init__(self,[node]) def_sigmoid(self,x): return1./(1+np.exp(-1*x)) defforward(self): #前向 即sigmoid函数计算 self.x = self.inputs[0].value # [0] input is a list self.value = self._sigmoid(self.x) defbackward(self): #反向传播计算梯度 self.partial = self._sigmoid(self.x) * (1 -self._sigmoid(self.x)) # y = 1/(1+ e^-x) # y'= 1/(1 + e^-x) (1 - 1/(1 + e^-x)) self.gradients = {n:np.zeros_like(n.value) for n in self.inputs} for n in self.outputs: grad_cost = n.gradients[self] # Get the partial of the cost with respect to this node self.gradients[self.inputs[0]] = grad_cost * self.partial # use * to keep all the dimension same!.
defforward_and_backward(outputnode,graph): # execute all the forward method of sorted_nodes. ## In practice,it's common to feed in mutiple data example in each forward pass rather than just 1. Because the example can be ## processed in parallel.The number of examples is called batch size. for n in graph: n.forward() # each node execute forward, get self.value based on the topological sort result. for n in graph[::-1]: n.backward() # return outputnode.value
### v -> a -> C ## b -> C ## b -> v - a -> C ## v -> v -> a -> C
deftopological_sort(feed_dict): """ Sort generic nodes in topological order using Kahn's Algorithm. 'feed_dict': A dictionary where the key is a 'Input' node and the value is the respective value feed to that node. Returns a list of sorted nodes. """ input_nodes = [n for n in feed_dict.keys()] G = {} nodes = [n for n in input_nodes] while len(nodes)>0: n = nodes.pop(0) if n notin G: G[n] = {'in':set(),'out':set()} for m in n.outputs: if m notin G: G[m] = {'in':set(),'out':set()} G[n]['out'].add(m) G[m]['in'].add(n) nodes.append(m) L =[] S = set(input_nodes) while len(S) >0: n = S.pop() if isinstance(n,Input): n.value= feed_dict[n] ## if n is Input Node,setn'value as ## feed_dict[n] ## else, n's value is caculate as its ## inbounds L.append(n) for m in n.outputs: G[n]['out'].remove(m) G[m]['in'].remove(n) # if no other incoming edges add to S if len(G[m]['in']) == 0: S.add(m) return L
defsgd_update(trainables,learning_rate =1e-2): #there are so many other update / optimigation methods #such as Adam,Mom, for t in trainables: t.value += -1 * learning_rate * t.gradients[t]
""" Check out the new network architecture and dataset! Notice that the weights and biases are generated randomly. No need to change anything,but feel free to tweak to test your network, play around with the epoches, batch size,etc! """
import numpy as np from sklearn.datasets import load_boston from sklearn.utils import shuffle,resample #from minflow import *
#Load data data =load_boston() X_ = data['data'] _ = data['target']
#Step 4 for i in range(epochs): loss = 0 for j in range(steps_per_epoch): # Step 1 # Randomly sample a batch of examples X_batch,y_batch =resample(X_,y_,n_samples=batch_size) # Reset valueof X and y Inputs X.value = X_batch y.value = y_batch # Step 2 _ = None forward_and_backward(_,graph) #set output node not important. # Step 3 rate =1e-2 sgd_update(trainables,rate) loss += graph[-1].value if i % 100 ==0: print("Epoch:{},Loss:{:.3f}".format(i+1,loss/steps_per_epoch)) losses.append(loss)