247 lines
7.1 KiB
Python
247 lines
7.1 KiB
Python
import numpy as np
|
|
|
|
|
|
class Relu:
|
|
@staticmethod
|
|
def activation(z):
|
|
z[z < 0] = 0
|
|
return z
|
|
|
|
@staticmethod
|
|
def prime(z):
|
|
z[z < 0] = 0
|
|
z[z > 0] = 1
|
|
return z
|
|
|
|
|
|
class Sigmoid:
|
|
@staticmethod
|
|
def activation(z):
|
|
return 1 / (1 + np.exp(-z))
|
|
|
|
@staticmethod
|
|
def prime(z):
|
|
return Sigmoid.activation(z) * (1 - Sigmoid.activation(z))
|
|
|
|
|
|
class MSE:
|
|
def __init__(self, activation_fn=None):
|
|
"""
|
|
|
|
:param activation_fn: Class object of the activation function.
|
|
"""
|
|
if activation_fn:
|
|
self.activation_fn = activation_fn
|
|
else:
|
|
self.activation_fn = NoActivation
|
|
|
|
def activation(self, z):
|
|
return self.activation_fn.activation(z)
|
|
|
|
@staticmethod
|
|
def loss(y_true, y_pred):
|
|
"""
|
|
:param y_true: (array) One hot encoded truth vector.
|
|
:param y_pred: (array) Prediction vector
|
|
:return: (flt)
|
|
"""
|
|
return np.mean((y_pred - y_true)**2)
|
|
|
|
@staticmethod
|
|
def prime(y_true, y_pred):
|
|
return y_pred - y_true
|
|
|
|
def delta(self, y_true, y_pred):
|
|
"""
|
|
Back propagation error delta
|
|
:return: (array)
|
|
"""
|
|
return self.prime(y_true, y_pred) * self.activation_fn.prime(y_pred)
|
|
|
|
|
|
class NoActivation:
|
|
"""
|
|
This is a plugin function for no activation.
|
|
|
|
f(x) = x * 1
|
|
"""
|
|
@staticmethod
|
|
def activation(z):
|
|
"""
|
|
:param z: (array) w(x) + b
|
|
:return: z (array)
|
|
"""
|
|
return z
|
|
|
|
@staticmethod
|
|
def prime(z):
|
|
"""
|
|
The prime of z * 1 = 1
|
|
:param z: (array)
|
|
:return: z': (array)
|
|
"""
|
|
return np.ones_like(z)
|
|
|
|
|
|
class Network:
|
|
def __init__(self, dimensions, activations):
|
|
"""
|
|
:param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output)
|
|
:param activations: (tpl/ list) Activations functions.
|
|
|
|
Example of one hidden layer with
|
|
- 2 inputs
|
|
- 3 hidden nodes
|
|
- 3 outputs
|
|
|
|
|
|
layers --> [1, 2, 3]
|
|
----------------------------------------
|
|
|
|
dimensions = (2, 3, 3)
|
|
activations = ( Relu, Sigmoid)
|
|
"""
|
|
self.n_layers = len(dimensions)
|
|
self.loss = None
|
|
self.learning_rate = None
|
|
# Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2]
|
|
self.w = {}
|
|
self.b = {}
|
|
|
|
# Activations are also initiated by index. For the example we will have activations[2] and activations[3]
|
|
self.activations = {}
|
|
for i in range(len(dimensions) - 1):
|
|
self.w[i + 1] = np.random.randn(dimensions[i], dimensions[i + 1]) / np.sqrt(dimensions[i])
|
|
self.b[i + 1] = np.zeros(dimensions[i + 1])
|
|
self.activations[i + 2] = activations[i]
|
|
|
|
def _feed_forward(self, x):
|
|
"""
|
|
Execute a forward feed through the network.
|
|
:param x: (array) Batch of input data vectors.
|
|
:return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
|
|
"""
|
|
|
|
# w(x) + b
|
|
z = {}
|
|
|
|
# activations: f(z)
|
|
a = {1: x} # First layer has no activations as input. The input x is the input.
|
|
|
|
for i in range(1, self.n_layers):
|
|
# current layer = i
|
|
# activation layer = i + 1
|
|
z[i + 1] = np.dot(a[i], self.w[i]) + self.b[i]
|
|
a[i + 1] = self.activations[i + 1].activation(z[i + 1])
|
|
|
|
return z, a
|
|
|
|
def _back_prop(self, z, a, y_true):
|
|
"""
|
|
The input dicts keys represent the layers of the net.
|
|
|
|
a = { 1: x,
|
|
2: f(w1(x) + b1)
|
|
3: f(w2(a2) + b2)
|
|
}
|
|
|
|
:param z: (dict) w(x) + b
|
|
:param a: (dict) f(z)
|
|
:param y_true: (array) One hot encoded truth vector.
|
|
:return:
|
|
"""
|
|
|
|
# Determine partial derivative and delta for the output layer.
|
|
# delta output layer
|
|
delta = self.loss.delta(y_true, a[self.n_layers])
|
|
dw = np.dot(a[self.n_layers - 1].T, delta)
|
|
|
|
update_params = {
|
|
self.n_layers - 1: (dw, delta)
|
|
}
|
|
|
|
# In case of three layer net will iterate over i = 2 and i = 1
|
|
# Determine partial derivative and delta for the rest of the layers.
|
|
# Each iteration requires the delta from the previous layer, propagating backwards.
|
|
for i in reversed(range(2, self.n_layers)):
|
|
delta = np.dot(delta, self.w[i].T) * self.activations[i].prime(z[i])
|
|
dw = np.dot(a[i - 1].T, delta)
|
|
update_params[i - 1] = (dw, delta)
|
|
|
|
for k, v in update_params.items():
|
|
self._update_w_b(k, v[0], v[1])
|
|
|
|
def _update_w_b(self, index, dw, delta):
|
|
"""
|
|
Update weights and biases.
|
|
|
|
:param index: (int) Number of the layer
|
|
:param dw: (array) Partial derivatives
|
|
:param delta: (array) Delta error.
|
|
"""
|
|
|
|
self.w[index] -= self.learning_rate * dw
|
|
self.b[index] -= self.learning_rate * np.mean(delta, 0)
|
|
|
|
def fit(self, x, y_true, loss, epochs, batch_size, learning_rate=1e-3):
|
|
"""
|
|
:param x: (array) Containing parameters
|
|
:param y_true: (array) Containing one hot encoded labels.
|
|
:param loss: Loss class (MSE, CrossEntropy etc.)
|
|
:param epochs: (int) Number of epochs.
|
|
:param batch_size: (int)
|
|
:param learning_rate: (flt)
|
|
"""
|
|
if not x.shape[0] == y_true.shape[0]:
|
|
raise ValueError("Length of x and y arrays don't match")
|
|
# Initiate the loss object with the final activation function
|
|
self.loss = loss(self.activations[self.n_layers])
|
|
self.learning_rate = learning_rate
|
|
|
|
for i in range(epochs):
|
|
# Shuffle the data
|
|
seed = np.arange(x.shape[0])
|
|
np.random.shuffle(seed)
|
|
x_ = x[seed]
|
|
y_ = y_true[seed]
|
|
|
|
for j in range(x.shape[0] // batch_size):
|
|
k = j * batch_size
|
|
l = (j + 1) * batch_size
|
|
z, a = self._feed_forward(x_[k:l])
|
|
self._back_prop(z, a, y_[k:l])
|
|
|
|
if (i + 1) % 10 == 0:
|
|
_, a = self._feed_forward(x)
|
|
print("Loss:", self.loss.loss(y_true, a[self.n_layers]))
|
|
|
|
def predict(self, x):
|
|
"""
|
|
:param x: (array) Containing parameters
|
|
:return: (array) A 2D array of shape (n_cases, n_classes).
|
|
"""
|
|
_, a = self._feed_forward(x)
|
|
return a[self.n_layers]
|
|
|
|
if __name__ == "__main__":
|
|
from sklearn import datasets
|
|
import sklearn.metrics
|
|
np.random.seed(1)
|
|
data = datasets.load_digits()
|
|
|
|
x = data["data"]
|
|
y = data["target"]
|
|
y = np.eye(10)[y]
|
|
|
|
nn = Network((64, 15, 10), (Relu, Sigmoid))
|
|
nn.fit(x, y, loss=MSE, epochs=50, batch_size=15, learning_rate=1e-3)
|
|
|
|
prediction = nn.predict(x)
|
|
|
|
y_true = []
|
|
y_pred = []
|
|
for i in range(len(y)):
|
|
y_pred.append(np.argmax(prediction[i]))
|
|
y_true.append(np.argmax(y[i]))
|
|
|
|
print(sklearn.metrics.classification_report(y_true, y_pred)) |