Backprop not exploding
This commit is contained in:
19
functions.py
19
functions.py
@@ -212,8 +212,13 @@ class Network:
|
|||||||
|
|
||||||
# Determine partial derivative and delta for the output layer.
|
# Determine partial derivative and delta for the output layer.
|
||||||
# delta output layer
|
# delta output layer
|
||||||
delta = self.loss.delta(a[self.n_layers], y_true)
|
delta = self.loss.delta(y_true, a[self.n_layers])
|
||||||
dw = np.dot(a[self.n_layers - 1].T, delta)
|
dw = np.dot(a[self.n_layers - 1].T, delta)
|
||||||
|
|
||||||
|
update_params = {
|
||||||
|
self.n_layers - 1: (dw, delta)
|
||||||
|
}
|
||||||
|
|
||||||
# update weights and biases
|
# update weights and biases
|
||||||
self.update_w_b(self.n_layers - 1, dw, delta)
|
self.update_w_b(self.n_layers - 1, dw, delta)
|
||||||
|
|
||||||
@@ -223,7 +228,10 @@ class Network:
|
|||||||
for i in reversed(range(2, self.n_layers)):
|
for i in reversed(range(2, self.n_layers)):
|
||||||
delta = np.dot(delta, self.w[i].T) * self.activations[i].prime(z[i])
|
delta = np.dot(delta, self.w[i].T) * self.activations[i].prime(z[i])
|
||||||
dw = np.dot(a[i - 1].T, delta)
|
dw = np.dot(a[i - 1].T, delta)
|
||||||
self.update_w_b(i - 1, dw, delta)
|
update_params[i - 1] = (dw, delta)
|
||||||
|
|
||||||
|
for k, v in update_params.items():
|
||||||
|
self.update_w_b(k, v[0], v[1])
|
||||||
|
|
||||||
def update_w_b(self, index, dw, delta):
|
def update_w_b(self, index, dw, delta):
|
||||||
"""
|
"""
|
||||||
@@ -259,13 +267,13 @@ class Network:
|
|||||||
z, a = self.feed_forward(x_[k:l])
|
z, a = self.feed_forward(x_[k:l])
|
||||||
self.back_prop(z, a, y_[k:l])
|
self.back_prop(z, a, y_[k:l])
|
||||||
|
|
||||||
if (i + 1) % epochs // 10 == 0:
|
if (i + 1) % 100 == 0:
|
||||||
print("Loss:", self.loss.loss(y_true, z[self.n_layers]))
|
print("Loss:", self.loss.loss(y_true, z[self.n_layers]))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
from sklearn import datasets
|
from sklearn import datasets
|
||||||
#import sklearn.metrics
|
#import sklearn.metrics
|
||||||
|
np.random.seed(1)
|
||||||
# Load data
|
# Load data
|
||||||
data = datasets.load_iris()
|
data = datasets.load_iris()
|
||||||
x = data["data"]
|
x = data["data"]
|
||||||
@@ -277,4 +285,5 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
nn = Network((4, 8, 3), (Relu, Sigmoid))
|
nn = Network((4, 8, 3), (Relu, Sigmoid))
|
||||||
|
|
||||||
nn.fit(x, y, MSE, 1000, batch_size=16)
|
nn.fit(x[:2], y[:2], MSE, 1, batch_size=2)
|
||||||
|
#nn.fit(x, y, MSE, 10000, 16)
|
||||||
@@ -79,7 +79,7 @@ def cost_mse(a, y):
|
|||||||
:param y: (array) Ground truth labels
|
:param y: (array) Ground truth labels
|
||||||
:return: (flt) Loss
|
:return: (flt) Loss
|
||||||
"""
|
"""
|
||||||
return 0.5 * np.sum((a - y)**2)
|
return np.mean((a - y)**2)
|
||||||
|
|
||||||
|
|
||||||
def diff_cost_mse(a, y):
|
def diff_cost_mse(a, y):
|
||||||
@@ -135,7 +135,7 @@ class NeuralNetwork:
|
|||||||
|
|
||||||
# partial derivative with respect to layer 1
|
# partial derivative with respect to layer 1
|
||||||
delta2 = np.dot(delta3, self.w[2].T) * diff_relu(self.z[2])
|
delta2 = np.dot(delta3, self.w[2].T) * diff_relu(self.z[2])
|
||||||
|
print(self.w[2].T)
|
||||||
# dc_db1 = delta2
|
# dc_db1 = delta2
|
||||||
dc_dw1 = np.dot(self.x.T, delta2)
|
dc_dw1 = np.dot(self.x.T, delta2)
|
||||||
|
|
||||||
@@ -175,7 +175,7 @@ class NeuralNetwork:
|
|||||||
|
|
||||||
_, y = feed_forward(x, self.w, self.b)
|
_, y = feed_forward(x, self.w, self.b)
|
||||||
|
|
||||||
if i % epochs // 10 == 0:
|
if i % 100:
|
||||||
print("Loss:", cost_mse(y[3], labels))
|
print("Loss:", cost_mse(y[3], labels))
|
||||||
|
|
||||||
|
|
||||||
@@ -194,15 +194,15 @@ if __name__ == "__main__":
|
|||||||
y = np.eye(3)[y]
|
y = np.eye(3)[y]
|
||||||
|
|
||||||
nn = NeuralNetwork(4, 8, 3, 2e-2)
|
nn = NeuralNetwork(4, 8, 3, 2e-2)
|
||||||
nn.fit(x, y, 10, int(1e3))
|
nn.fit(x[:2], y[:2], 2, 1)
|
||||||
|
|
||||||
# result
|
|
||||||
_, y_ = feed_forward(x, nn.w, nn.b)
|
|
||||||
y_true = []
|
|
||||||
y_pred = []
|
|
||||||
for i in range(len(y)):
|
|
||||||
y_pred.append(np.argmax(y_[3][i]))
|
|
||||||
y_true.append(np.argmax(y[i]))
|
|
||||||
|
|
||||||
print(sklearn.metrics.classification_report(y_true, y_pred))
|
|
||||||
|
|
||||||
|
# # result
|
||||||
|
# _, y_ = feed_forward(x, nn.w, nn.b)
|
||||||
|
# y_true = []
|
||||||
|
# y_pred = []
|
||||||
|
# for i in range(len(y)):
|
||||||
|
# y_pred.append(np.argmax(y_[3][i]))
|
||||||
|
# y_true.append(np.argmax(y[i]))
|
||||||
|
#
|
||||||
|
# print(sklearn.metrics.classification_report(y_true, y_pred))
|
||||||
|
#
|
||||||
|
|||||||
Reference in New Issue
Block a user