refactor
This commit is contained in:
0
neural_networks/__init__.py
Normal file
0
neural_networks/__init__.py
Normal file
340
neural_networks/functions.py
Normal file
340
neural_networks/functions.py
Normal file
@@ -0,0 +1,340 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Relu:
|
||||
@staticmethod
|
||||
def activation(z):
|
||||
z[z < 0] = 0
|
||||
return z
|
||||
|
||||
@staticmethod
|
||||
def prime(z):
|
||||
z[z < 0] = 0
|
||||
z[z > 0] = 1
|
||||
return z
|
||||
|
||||
|
||||
class Sigmoid:
|
||||
@staticmethod
|
||||
def activation(z):
|
||||
return 1 / (1 + np.exp(-z))
|
||||
|
||||
@staticmethod
|
||||
def prime(z):
|
||||
return Sigmoid.activation(z) * (1 - Sigmoid.activation(z))
|
||||
|
||||
|
||||
class Softmax:
|
||||
@staticmethod
|
||||
def activation(z):
|
||||
"""
|
||||
https://stackoverflow.com/questions/34968722/softmax-function-python
|
||||
|
||||
Numerically stable version
|
||||
"""
|
||||
e_x = np.exp(z - np.max(z))
|
||||
return e_x / e_x.sum()
|
||||
|
||||
# https://stackoverflow.com/questions/33541930/how-to-implement-the-softmax-derivative-independently-from-any-loss-function
|
||||
# http://cs231n.github.io/neural-networks-case-study/#loss
|
||||
|
||||
|
||||
class CrossEntropy:
|
||||
"""
|
||||
Used with Softmax activation in final layer
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def activation(z):
|
||||
return Softmax.activation(z)
|
||||
|
||||
@staticmethod
|
||||
def delta(y_true, y):
|
||||
"""
|
||||
http://cs231n.github.io/linear-classify/#softmax
|
||||
https://stackoverflow.com/questions/27089932/cross-entropy-softmax-and-the-derivative-term-in-backpropagation
|
||||
:param y_true: (array) One hot encoded truth vector.
|
||||
:param y: (array) Prediction vector.
|
||||
:return: (array) Delta vector.
|
||||
|
||||
y are softmax probabilitys
|
||||
y_true is truth vector one hot encoded
|
||||
|
||||
y y_true
|
||||
[0.8] [1]
|
||||
[0.1] [0]
|
||||
[0.1] [0]
|
||||
|
||||
result:
|
||||
|
||||
[-0.2]
|
||||
[0.1]
|
||||
[0.1]
|
||||
|
||||
"""
|
||||
return y - y_true
|
||||
|
||||
@staticmethod
|
||||
def loss(y_true, y):
|
||||
"""
|
||||
https://datascience.stackexchange.com/questions/9302/the-cross-entropy-error-function-in-neural-networks
|
||||
|
||||
:param y_true: (array) One hot encoded truth vector.
|
||||
:param y: (array) Prediction vector
|
||||
:return: (flt)
|
||||
"""
|
||||
return -np.dot(y_true, np.log(y))
|
||||
|
||||
|
||||
class MSE:
|
||||
def __init__(self, activation_fn=None):
|
||||
"""
|
||||
|
||||
:param activation_fn: Class object of the activation function.
|
||||
"""
|
||||
if activation_fn:
|
||||
self.activation_fn = activation_fn
|
||||
else:
|
||||
self.activation_fn = NoActivation
|
||||
|
||||
def activation(self, z):
|
||||
return self.activation_fn.activation(z)
|
||||
|
||||
@staticmethod
|
||||
def loss(y_true, y_pred):
|
||||
"""
|
||||
:param y_true: (array) One hot encoded truth vector.
|
||||
:param y_pred: (array) Prediction vector
|
||||
:return: (flt)
|
||||
"""
|
||||
return np.mean((y_pred - y_true)**2)
|
||||
|
||||
@staticmethod
|
||||
def prime(y_true, y_pred):
|
||||
return y_pred - y_true
|
||||
|
||||
def delta(self, y_true, y_pred):
|
||||
"""
|
||||
Back propagation error delta
|
||||
:return: (array)
|
||||
"""
|
||||
return self.prime(y_true, y_pred) * self.activation_fn.prime(y_pred)
|
||||
|
||||
|
||||
class NoActivation:
|
||||
"""
|
||||
This is a plugin function for no activation.
|
||||
|
||||
f(x) = x * 1
|
||||
"""
|
||||
@staticmethod
|
||||
def activation(z):
|
||||
"""
|
||||
:param z: (array) w(x) + b
|
||||
:return: z (array)
|
||||
"""
|
||||
return z
|
||||
|
||||
@staticmethod
|
||||
def prime(z):
|
||||
"""
|
||||
The prime of z * 1 = 1
|
||||
:param z: (array)
|
||||
:return: z': (array)
|
||||
"""
|
||||
return np.ones_like(z)
|
||||
|
||||
|
||||
class Network:
|
||||
def __init__(self, dimensions, activations):
|
||||
"""
|
||||
:param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output)
|
||||
:param activations: (tpl/ list) Activations functions.
|
||||
|
||||
Example of one hidden layer with
|
||||
- 2 inputs
|
||||
- 3 hidden nodes
|
||||
- 3 outputs
|
||||
|
||||
|
||||
layers --> [1, 2, 3]
|
||||
----------------------------------------
|
||||
|
||||
dimensions = (2, 3, 3)
|
||||
activations = ( Relu, Sigmoid)
|
||||
"""
|
||||
self.n_layers = len(dimensions)
|
||||
self.loss = None
|
||||
self.learning_rate = None
|
||||
# Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2]
|
||||
self.w = {}
|
||||
self.b = {}
|
||||
|
||||
# Activations are also initiated by index. For the example we will have activations[2] and activations[3]
|
||||
self.activations = {}
|
||||
for i in range(len(dimensions) - 1):
|
||||
self.w[i + 1] = np.random.randn(dimensions[i], dimensions[i + 1]) / np.sqrt(dimensions[i])
|
||||
self.b[i + 1] = np.zeros(dimensions[i + 1])
|
||||
self.activations[i + 2] = activations[i]
|
||||
|
||||
def _feed_forward(self, x):
|
||||
"""
|
||||
Execute a forward feed through the network.
|
||||
:param x: (array) Batch of input data vectors.
|
||||
:return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
|
||||
"""
|
||||
|
||||
# w(x) + b
|
||||
z = {}
|
||||
|
||||
# activations: f(z)
|
||||
a = {1: x} # First layer has no activations as input. The input x is the input.
|
||||
|
||||
for i in range(1, self.n_layers):
|
||||
# current layer = i
|
||||
# activation layer = i + 1
|
||||
z[i + 1] = np.dot(a[i], self.w[i]) + self.b[i]
|
||||
a[i + 1] = self.activations[i + 1].activation(z[i + 1])
|
||||
|
||||
return z, a
|
||||
|
||||
def _back_prop(self, z, a, y_true):
|
||||
"""
|
||||
The input dicts keys represent the layers of the net.
|
||||
|
||||
a = { 1: x,
|
||||
2: f(w1(x) + b1)
|
||||
3: f(w2(a2) + b2)
|
||||
}
|
||||
|
||||
:param z: (dict) w(x) + b
|
||||
:param a: (dict) f(z)
|
||||
:param y_true: (array) One hot encoded truth vector.
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Determine partial derivative and delta for the output layer.
|
||||
# delta output layer
|
||||
delta = self.loss.delta(y_true, a[self.n_layers])
|
||||
dw = np.dot(a[self.n_layers - 1].T, delta)
|
||||
|
||||
update_params = {
|
||||
self.n_layers - 1: (dw, delta)
|
||||
}
|
||||
|
||||
# In case of three layer net will iterate over i = 2 and i = 1
|
||||
# Determine partial derivative and delta for the rest of the layers.
|
||||
# Each iteration requires the delta from the previous layer, propagating backwards.
|
||||
for i in reversed(range(2, self.n_layers)):
|
||||
delta = np.dot(delta, self.w[i].T) * self.activations[i].prime(z[i])
|
||||
dw = np.dot(a[i - 1].T, delta)
|
||||
update_params[i - 1] = (dw, delta)
|
||||
|
||||
for k, v in update_params.items():
|
||||
self._update_w_b(k, v[0], v[1])
|
||||
|
||||
def _update_w_b(self, index, dw, delta):
|
||||
"""
|
||||
Update weights and biases.
|
||||
|
||||
:param index: (int) Number of the layer
|
||||
:param dw: (array) Partial derivatives
|
||||
:param delta: (array) Delta error.
|
||||
"""
|
||||
|
||||
self.w[index] -= self.learning_rate * dw
|
||||
self.b[index] -= self.learning_rate * np.mean(delta, 0)
|
||||
|
||||
def fit(self, x, y_true, loss, epochs, batch_size, learning_rate=2e-2):
|
||||
"""
|
||||
:param x: (array) Containing parameters
|
||||
:param y_true: (array) Containing one hot encoded labels.
|
||||
:param loss: Loss class (MSE, CrossEntropy etc.)
|
||||
:param epochs: (int) Number of epochs.
|
||||
:param batch_size: (int)
|
||||
:param learning_rate: (flt)
|
||||
"""
|
||||
if not x.shape[0] == y_true.shape[0]:
|
||||
raise ValueError("Length of x and y arrays don't match")
|
||||
# Initiate the loss object with the final activation function
|
||||
self.loss = loss(self.activations[self.n_layers])
|
||||
self.learning_rate = learning_rate
|
||||
|
||||
for i in range(epochs):
|
||||
# Shuffle the data
|
||||
seed = np.arange(x.shape[0])
|
||||
np.random.shuffle(seed)
|
||||
x_ = x[seed]
|
||||
y_ = y_true[seed]
|
||||
|
||||
for j in range(x.shape[0] // batch_size):
|
||||
k = j * batch_size
|
||||
l = (j + 1) * batch_size
|
||||
z, a = self._feed_forward(x_[k:l])
|
||||
self._back_prop(z, a, y_[k:l])
|
||||
|
||||
if (i + 1) % 10 == 0:
|
||||
_, a = self._feed_forward(x)
|
||||
print("Loss:", self.loss.loss(y_true, a[self.n_layers]))
|
||||
|
||||
def predict(self, x):
|
||||
"""
|
||||
:param x: (array) Containing parameters
|
||||
:return: (array) A 2D array of shape (n_cases, n_classes).
|
||||
"""
|
||||
_, a = self._feed_forward(x)
|
||||
return a[self.n_layers]
|
||||
|
||||
if __name__ == "__main__":
|
||||
from sklearn import datasets
|
||||
import sklearn.metrics
|
||||
np.random.seed(1)
|
||||
# Load data
|
||||
data = datasets.load_iris()
|
||||
x = data["data"]
|
||||
x = (x - x.mean()) / x.std()
|
||||
y = data["target"]
|
||||
#y = np.expand_dims(data["target"], 1)
|
||||
|
||||
# one hot encoding
|
||||
y = np.eye(3)[y]
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
nn = Network((2, 3, 1), (Relu, Sigmoid))
|
||||
|
||||
print("Weights:")
|
||||
pprint(nn.w)
|
||||
|
||||
print("Biases:")
|
||||
pprint(nn.b)
|
||||
|
||||
pprint(nn.activations)
|
||||
|
||||
pprint()
|
||||
|
||||
#nn.fit(x[:2], y[:2], MSE, 1, batch_size=2)
|
||||
# nn.fit(x, y, MSE, 1000, 16)
|
||||
|
||||
# data = datasets.load_digits()
|
||||
#
|
||||
# x = data["data"]
|
||||
# y = data["target"]
|
||||
# y = np.eye(10)[y]
|
||||
#
|
||||
# nn = Network((64, 32, 10), (Relu, Sigmoid))
|
||||
# nn.fit(x, y, MSE, 100, 2)
|
||||
#
|
||||
# y_ = nn.predict(x)
|
||||
# a = np.argmax(y_, 1)
|
||||
#
|
||||
# for i in range(a.size):
|
||||
# print(a[i], y[i], "\t", np.round(y_[i], 3))
|
||||
#
|
||||
# y_true = []
|
||||
# y_pred = []
|
||||
# for i in range(len(y)):
|
||||
# y_pred.append(np.argmax(y_[i]))
|
||||
# y_true.append(np.argmax(y[i]))
|
||||
#
|
||||
# print(sklearn.metrics.classification_report(y_true, y_pred))
|
||||
211
neural_networks/simple_mlp.py
Normal file
211
neural_networks/simple_mlp.py
Normal file
@@ -0,0 +1,211 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def new_model(in_dim, h_dim, out_dim):
|
||||
w = {
|
||||
1: np.random.randn(in_dim, h_dim) / np.sqrt(in_dim),
|
||||
2: np.random.randn(h_dim, out_dim) / np.sqrt(h_dim)
|
||||
}
|
||||
|
||||
b = {
|
||||
1: np.zeros(h_dim),
|
||||
2: np.zeros(out_dim)
|
||||
}
|
||||
|
||||
return w, b
|
||||
|
||||
|
||||
def relu(a):
|
||||
"""
|
||||
Rectifier unit
|
||||
:param a: (array) activation vector.
|
||||
:return: (array) Relu activation.
|
||||
"""
|
||||
a[a < 0] = 0
|
||||
return a
|
||||
|
||||
|
||||
def sigmoid(a):
|
||||
"""
|
||||
Sigmoid activation function.
|
||||
:param a: (array) activation vector.
|
||||
:return: (array) Sigmoid activation.
|
||||
"""
|
||||
return 1 / (1 + np.exp(-a))
|
||||
|
||||
|
||||
def diff_sigmoid(a):
|
||||
"""
|
||||
Derivative of the sigmoid function.
|
||||
:param a: (array) activation vector.
|
||||
:return: (array)
|
||||
"""
|
||||
return sigmoid(a) * (1 - sigmoid(a))
|
||||
|
||||
|
||||
def diff_relu(a):
|
||||
"""
|
||||
Derivative of the relu function.
|
||||
:param a: (array) activation vector.
|
||||
:return: (array)
|
||||
"""
|
||||
a[a < 0] = 0
|
||||
a[a > 0] = 1
|
||||
return a
|
||||
|
||||
|
||||
def feed_forward(p, w, b):
|
||||
"""
|
||||
Feed forward propagation.
|
||||
:param p: (array) Parameters.
|
||||
:param w: (dict) Weights.
|
||||
:param b: (dict) Biases
|
||||
:return: (array) Output.
|
||||
"""
|
||||
a = {}
|
||||
z = {}
|
||||
|
||||
z[2] = np.dot(p, w[1]) + b[1]
|
||||
a[2] = relu(z[2])
|
||||
z[3] = sigmoid(np.dot(a[2], w[2]) + b[2])
|
||||
|
||||
return a, z
|
||||
|
||||
|
||||
def cost_mse(a, y):
|
||||
"""
|
||||
Cost function.
|
||||
:param a: (array) Predictions
|
||||
:param y: (array) Ground truth labels
|
||||
:return: (flt) Loss
|
||||
"""
|
||||
return np.mean((a - y)**2)
|
||||
|
||||
|
||||
def diff_cost_mse(a, y):
|
||||
return a - y
|
||||
|
||||
|
||||
def bpe_delta(a, y):
|
||||
"""
|
||||
Back propagating error delta
|
||||
:param a: (array) Predictions
|
||||
:param y: (array) Ground truth labels
|
||||
:return: (array)
|
||||
"""
|
||||
return diff_cost_mse(a, y) * diff_sigmoid(a)
|
||||
|
||||
|
||||
class NeuralNetwork:
|
||||
def __init__(self, in_dim, h_dim, out_dim, learning_rate=1e-4):
|
||||
"""
|
||||
Simple one hidden layer net with relu activation in the hidden layer and sigmoid activation at the output
|
||||
layer.
|
||||
|
||||
:param in_dim: (int) Size of the input vector.
|
||||
:param h_dim: (int) No. of hidden nodes.
|
||||
:param out_dim: (int) No. of output nodes.
|
||||
:param learning_rate: (flt)
|
||||
"""
|
||||
self.w, self.b = new_model(in_dim, h_dim, out_dim)
|
||||
self.x = None
|
||||
self.a = None # activations
|
||||
self.z = None # xi * wi + bi
|
||||
self.learning_rate = learning_rate
|
||||
|
||||
def feed_forward(self, p):
|
||||
"""
|
||||
Compute the activations and z's. z = w(x) + b
|
||||
:param p: (array)
|
||||
"""
|
||||
self.x = p
|
||||
self.a, self.z = feed_forward(p, self.w, self.b)
|
||||
|
||||
def backprop(self, labels):
|
||||
"""
|
||||
Backpropagate the error and update the weights and biases
|
||||
|
||||
:param labels: (array) Ground truth vector.
|
||||
"""
|
||||
# partial derivative with respect to layer 2
|
||||
delta3 = bpe_delta(self.z[3], labels)
|
||||
|
||||
# dc_db2 = delta3
|
||||
dc_dw2 = np.dot(self.a[2].T, delta3)
|
||||
|
||||
# partial derivative with respect to layer 1
|
||||
delta2 = np.dot(delta3, self.w[2].T) * diff_relu(self.z[2])
|
||||
|
||||
# dc_db1 = delta2
|
||||
dc_dw1 = np.dot(self.x.T, delta2)
|
||||
|
||||
# update weights and biases
|
||||
self.w[2] -= self.learning_rate * dc_dw2
|
||||
self.b[2] -= self.learning_rate * np.mean(delta3, 0)
|
||||
self.w[1] -= self.learning_rate * dc_dw1
|
||||
self.b[1] -= self.learning_rate * np.mean(delta2, 0)
|
||||
|
||||
def stats(self):
|
||||
"""
|
||||
Prints some weights and biases
|
||||
"""
|
||||
for i in range(1, 3):
|
||||
print("Weights layer {}:\n".format(i), self.w[i], "\nBiases layer {}:\n".format(i), self.b[i], "\n")
|
||||
|
||||
def fit(self, x, labels, batch_size, epochs):
|
||||
"""
|
||||
Train the net.
|
||||
|
||||
:param x: (array) Input vector.
|
||||
:param labels: (array) Ground truth vector.
|
||||
:param batch_size: (int) Size of mini batch
|
||||
:param epochs: (int) No. of epochs to train.
|
||||
"""
|
||||
|
||||
for i in range(epochs):
|
||||
# Shuffle the data
|
||||
seed = np.arange(x.shape[0])
|
||||
np.random.shuffle(seed)
|
||||
x_ = x[seed]
|
||||
labels_ = labels[seed]
|
||||
|
||||
for j in range(x.shape[0] // batch_size):
|
||||
self.feed_forward(x_[j * batch_size: (j + 1) * batch_size])
|
||||
self.backprop(labels_[j * batch_size: (j + 1) * batch_size])
|
||||
|
||||
_, y = feed_forward(x, self.w, self.b)
|
||||
|
||||
if i % 100:
|
||||
print("Loss:", cost_mse(y[3], labels))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from sklearn import datasets
|
||||
import sklearn.metrics
|
||||
np.random.seed(1)
|
||||
|
||||
# Load data
|
||||
data = datasets.load_iris()
|
||||
x = data["data"]
|
||||
x = (x - x.mean()) / x.std()
|
||||
y = data["target"]
|
||||
|
||||
# one hot encoding
|
||||
y = np.eye(3)[y]
|
||||
|
||||
nn = NeuralNetwork(4, 4, 3, 1e-2)
|
||||
#nn.fit(x[:2], y[:2], 2, 1)
|
||||
nn.fit(x, y, 8, 1000)
|
||||
_, y_ = feed_forward(x, nn.w, nn.b)
|
||||
print(y_[3])
|
||||
|
||||
# # result
|
||||
# _, y_ = feed_forward(x, nn.w, nn.b)
|
||||
y_true = []
|
||||
y_pred = []
|
||||
for i in range(len(y)):
|
||||
y_pred.append(np.argmax(y_[3][i]))
|
||||
y_true.append(np.argmax(y[i]))
|
||||
|
||||
print(sklearn.metrics.classification_report(y_true, y_pred))
|
||||
|
||||
247
neural_networks/vanilla_mlp.py
Normal file
247
neural_networks/vanilla_mlp.py
Normal file
@@ -0,0 +1,247 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Relu:
|
||||
@staticmethod
|
||||
def activation(z):
|
||||
z[z < 0] = 0
|
||||
return z
|
||||
|
||||
@staticmethod
|
||||
def prime(z):
|
||||
z[z < 0] = 0
|
||||
z[z > 0] = 1
|
||||
return z
|
||||
|
||||
|
||||
class Sigmoid:
|
||||
@staticmethod
|
||||
def activation(z):
|
||||
return 1 / (1 + np.exp(-z))
|
||||
|
||||
@staticmethod
|
||||
def prime(z):
|
||||
return Sigmoid.activation(z) * (1 - Sigmoid.activation(z))
|
||||
|
||||
|
||||
class MSE:
|
||||
def __init__(self, activation_fn=None):
|
||||
"""
|
||||
|
||||
:param activation_fn: Class object of the activation function.
|
||||
"""
|
||||
if activation_fn:
|
||||
self.activation_fn = activation_fn
|
||||
else:
|
||||
self.activation_fn = NoActivation
|
||||
|
||||
def activation(self, z):
|
||||
return self.activation_fn.activation(z)
|
||||
|
||||
@staticmethod
|
||||
def loss(y_true, y_pred):
|
||||
"""
|
||||
:param y_true: (array) One hot encoded truth vector.
|
||||
:param y_pred: (array) Prediction vector
|
||||
:return: (flt)
|
||||
"""
|
||||
return np.mean((y_pred - y_true)**2)
|
||||
|
||||
@staticmethod
|
||||
def prime(y_true, y_pred):
|
||||
return y_pred - y_true
|
||||
|
||||
def delta(self, y_true, y_pred):
|
||||
"""
|
||||
Back propagation error delta
|
||||
:return: (array)
|
||||
"""
|
||||
return self.prime(y_true, y_pred) * self.activation_fn.prime(y_pred)
|
||||
|
||||
|
||||
class NoActivation:
|
||||
"""
|
||||
This is a plugin function for no activation.
|
||||
|
||||
f(x) = x * 1
|
||||
"""
|
||||
@staticmethod
|
||||
def activation(z):
|
||||
"""
|
||||
:param z: (array) w(x) + b
|
||||
:return: z (array)
|
||||
"""
|
||||
return z
|
||||
|
||||
@staticmethod
|
||||
def prime(z):
|
||||
"""
|
||||
The prime of z * 1 = 1
|
||||
:param z: (array)
|
||||
:return: z': (array)
|
||||
"""
|
||||
return np.ones_like(z)
|
||||
|
||||
|
||||
class Network:
|
||||
def __init__(self, dimensions, activations):
|
||||
"""
|
||||
:param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output)
|
||||
:param activations: (tpl/ list) Activations functions.
|
||||
|
||||
Example of one hidden layer with
|
||||
- 2 inputs
|
||||
- 3 hidden nodes
|
||||
- 3 outputs
|
||||
|
||||
|
||||
layers --> [1, 2, 3]
|
||||
----------------------------------------
|
||||
|
||||
dimensions = (2, 3, 3)
|
||||
activations = ( Relu, Sigmoid)
|
||||
"""
|
||||
self.n_layers = len(dimensions)
|
||||
self.loss = None
|
||||
self.learning_rate = None
|
||||
# Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2]
|
||||
self.w = {}
|
||||
self.b = {}
|
||||
|
||||
# Activations are also initiated by index. For the example we will have activations[2] and activations[3]
|
||||
self.activations = {}
|
||||
for i in range(len(dimensions) - 1):
|
||||
self.w[i + 1] = np.random.randn(dimensions[i], dimensions[i + 1]) / np.sqrt(dimensions[i])
|
||||
self.b[i + 1] = np.zeros(dimensions[i + 1])
|
||||
self.activations[i + 2] = activations[i]
|
||||
|
||||
def _feed_forward(self, x):
|
||||
"""
|
||||
Execute a forward feed through the network.
|
||||
:param x: (array) Batch of input data vectors.
|
||||
:return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
|
||||
"""
|
||||
|
||||
# w(x) + b
|
||||
z = {}
|
||||
|
||||
# activations: f(z)
|
||||
a = {1: x} # First layer has no activations as input. The input x is the input.
|
||||
|
||||
for i in range(1, self.n_layers):
|
||||
# current layer = i
|
||||
# activation layer = i + 1
|
||||
z[i + 1] = np.dot(a[i], self.w[i]) + self.b[i]
|
||||
a[i + 1] = self.activations[i + 1].activation(z[i + 1])
|
||||
|
||||
return z, a
|
||||
|
||||
def _back_prop(self, z, a, y_true):
|
||||
"""
|
||||
The input dicts keys represent the layers of the net.
|
||||
|
||||
a = { 1: x,
|
||||
2: f(w1(x) + b1)
|
||||
3: f(w2(a2) + b2)
|
||||
}
|
||||
|
||||
:param z: (dict) w(x) + b
|
||||
:param a: (dict) f(z)
|
||||
:param y_true: (array) One hot encoded truth vector.
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Determine partial derivative and delta for the output layer.
|
||||
# delta output layer
|
||||
delta = self.loss.delta(y_true, a[self.n_layers])
|
||||
dw = np.dot(a[self.n_layers - 1].T, delta)
|
||||
|
||||
update_params = {
|
||||
self.n_layers - 1: (dw, delta)
|
||||
}
|
||||
|
||||
# In case of three layer net will iterate over i = 2 and i = 1
|
||||
# Determine partial derivative and delta for the rest of the layers.
|
||||
# Each iteration requires the delta from the previous layer, propagating backwards.
|
||||
for i in reversed(range(2, self.n_layers)):
|
||||
delta = np.dot(delta, self.w[i].T) * self.activations[i].prime(z[i])
|
||||
dw = np.dot(a[i - 1].T, delta)
|
||||
update_params[i - 1] = (dw, delta)
|
||||
|
||||
for k, v in update_params.items():
|
||||
self._update_w_b(k, v[0], v[1])
|
||||
|
||||
def _update_w_b(self, index, dw, delta):
|
||||
"""
|
||||
Update weights and biases.
|
||||
|
||||
:param index: (int) Number of the layer
|
||||
:param dw: (array) Partial derivatives
|
||||
:param delta: (array) Delta error.
|
||||
"""
|
||||
|
||||
self.w[index] -= self.learning_rate * dw
|
||||
self.b[index] -= self.learning_rate * np.mean(delta, 0)
|
||||
|
||||
def fit(self, x, y_true, loss, epochs, batch_size, learning_rate=1e-3):
|
||||
"""
|
||||
:param x: (array) Containing parameters
|
||||
:param y_true: (array) Containing one hot encoded labels.
|
||||
:param loss: Loss class (MSE, CrossEntropy etc.)
|
||||
:param epochs: (int) Number of epochs.
|
||||
:param batch_size: (int)
|
||||
:param learning_rate: (flt)
|
||||
"""
|
||||
if not x.shape[0] == y_true.shape[0]:
|
||||
raise ValueError("Length of x and y arrays don't match")
|
||||
# Initiate the loss object with the final activation function
|
||||
self.loss = loss(self.activations[self.n_layers])
|
||||
self.learning_rate = learning_rate
|
||||
|
||||
for i in range(epochs):
|
||||
# Shuffle the data
|
||||
seed = np.arange(x.shape[0])
|
||||
np.random.shuffle(seed)
|
||||
x_ = x[seed]
|
||||
y_ = y_true[seed]
|
||||
|
||||
for j in range(x.shape[0] // batch_size):
|
||||
k = j * batch_size
|
||||
l = (j + 1) * batch_size
|
||||
z, a = self._feed_forward(x_[k:l])
|
||||
self._back_prop(z, a, y_[k:l])
|
||||
|
||||
if (i + 1) % 10 == 0:
|
||||
_, a = self._feed_forward(x)
|
||||
print("Loss:", self.loss.loss(y_true, a[self.n_layers]))
|
||||
|
||||
def predict(self, x):
|
||||
"""
|
||||
:param x: (array) Containing parameters
|
||||
:return: (array) A 2D array of shape (n_cases, n_classes).
|
||||
"""
|
||||
_, a = self._feed_forward(x)
|
||||
return a[self.n_layers]
|
||||
|
||||
if __name__ == "__main__":
|
||||
from sklearn import datasets
|
||||
import sklearn.metrics
|
||||
np.random.seed(1)
|
||||
data = datasets.load_digits()
|
||||
|
||||
x = data["data"]
|
||||
y = data["target"]
|
||||
y = np.eye(10)[y]
|
||||
|
||||
nn = Network((64, 15, 10), (Relu, Sigmoid))
|
||||
nn.fit(x, y, loss=MSE, epochs=50, batch_size=15, learning_rate=1e-3)
|
||||
|
||||
prediction = nn.predict(x)
|
||||
|
||||
y_true = []
|
||||
y_pred = []
|
||||
for i in range(len(y)):
|
||||
y_pred.append(np.argmax(prediction[i]))
|
||||
y_true.append(np.argmax(y[i]))
|
||||
|
||||
print(sklearn.metrics.classification_report(y_true, y_pred))
|
||||
Reference in New Issue
Block a user