vanilla-machine-learning/neural_networks/functions.py

340 lines
9.3 KiB
Python
Raw Normal View History

2017-06-25 22:05:55 +02:00
import numpy as np
class Relu:
@staticmethod
def activation(z):
z[z < 0] = 0
return z
@staticmethod
def prime(z):
z[z < 0] = 0
z[z > 0] = 1
return z
class Sigmoid:
@staticmethod
def activation(z):
return 1 / (1 + np.exp(-z))
@staticmethod
def prime(z):
return Sigmoid.activation(z) * (1 - Sigmoid.activation(z))
class Softmax:
@staticmethod
def activation(z):
"""
https://stackoverflow.com/questions/34968722/softmax-function-python
Numerically stable version
"""
e_x = np.exp(z - np.max(z))
return e_x / e_x.sum()
# https://stackoverflow.com/questions/33541930/how-to-implement-the-softmax-derivative-independently-from-any-loss-function
# http://cs231n.github.io/neural-networks-case-study/#loss
class CrossEntropy:
"""
Used with Softmax activation in final layer
"""
@staticmethod
def activation(z):
return Softmax.activation(z)
@staticmethod
2017-06-30 17:03:58 +02:00
def delta(y_true, y):
2017-06-25 22:05:55 +02:00
"""
2017-06-30 14:11:02 +02:00
http://cs231n.github.io/linear-classify/#softmax
2017-06-25 22:05:55 +02:00
https://stackoverflow.com/questions/27089932/cross-entropy-softmax-and-the-derivative-term-in-backpropagation
2017-06-30 17:03:58 +02:00
:param y_true: (array) One hot encoded truth vector.
2017-06-25 22:05:55 +02:00
:param y: (array) Prediction vector.
:return: (array) Delta vector.
y are softmax probabilitys
2017-06-30 17:03:58 +02:00
y_true is truth vector one hot encoded
2017-06-25 22:05:55 +02:00
2017-06-30 17:03:58 +02:00
y y_true
2017-06-25 22:05:55 +02:00
[0.8] [1]
[0.1] [0]
[0.1] [0]
result:
[-0.2]
[0.1]
[0.1]
"""
2017-06-30 17:03:58 +02:00
return y - y_true
2017-06-25 22:05:55 +02:00
@staticmethod
2017-06-30 17:03:58 +02:00
def loss(y_true, y):
2017-06-25 22:05:55 +02:00
"""
https://datascience.stackexchange.com/questions/9302/the-cross-entropy-error-function-in-neural-networks
2017-06-30 17:03:58 +02:00
:param y_true: (array) One hot encoded truth vector.
2017-06-25 22:05:55 +02:00
:param y: (array) Prediction vector
:return: (flt)
"""
2017-06-30 17:03:58 +02:00
return -np.dot(y_true, np.log(y))
2017-06-25 22:05:55 +02:00
2017-06-30 14:11:02 +02:00
class MSE:
2017-06-30 17:03:58 +02:00
def __init__(self, activation_fn=None):
2017-06-30 14:11:02 +02:00
"""
:param activation_fn: Class object of the activation function.
"""
if activation_fn:
self.activation_fn = activation_fn
else:
self.activation_fn = NoActivation
def activation(self, z):
return self.activation_fn.activation(z)
@staticmethod
2017-06-30 17:03:58 +02:00
def loss(y_true, y_pred):
2017-06-30 14:11:02 +02:00
"""
2017-06-30 17:03:58 +02:00
:param y_true: (array) One hot encoded truth vector.
:param y_pred: (array) Prediction vector
2017-06-30 14:11:02 +02:00
:return: (flt)
"""
2017-06-30 17:03:58 +02:00
return np.mean((y_pred - y_true)**2)
2017-06-30 14:11:02 +02:00
@staticmethod
2017-06-30 17:03:58 +02:00
def prime(y_true, y_pred):
return y_pred - y_true
2017-06-30 14:11:02 +02:00
2017-06-30 17:03:58 +02:00
def delta(self, y_true, y_pred):
"""
Back propagation error delta
:return: (array)
"""
return self.prime(y_true, y_pred) * self.activation_fn.prime(y_pred)
2017-06-30 14:11:02 +02:00
class NoActivation:
2017-07-01 12:20:00 +02:00
"""
This is a plugin function for no activation.
f(x) = x * 1
"""
2017-06-30 14:11:02 +02:00
@staticmethod
def activation(z):
"""
:param z: (array) w(x) + b
:return: z (array)
"""
return z
@staticmethod
2017-07-01 12:20:00 +02:00
def prime(z):
2017-06-30 14:11:02 +02:00
"""
2017-07-01 12:20:00 +02:00
The prime of z * 1 = 1
:param z: (array)
:return: z': (array)
2017-06-30 14:11:02 +02:00
"""
2017-07-01 12:20:00 +02:00
return np.ones_like(z)
2017-06-30 14:11:02 +02:00
class Network:
def __init__(self, dimensions, activations):
"""
:param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output)
:param activations: (tpl/ list) Activations functions.
Example of one hidden layer with
- 2 inputs
- 3 hidden nodes
- 3 outputs
layers --> [1, 2, 3]
----------------------------------------
dimensions = (2, 3, 3)
activations = ( Relu, Sigmoid)
"""
self.n_layers = len(dimensions)
2017-06-30 17:03:58 +02:00
self.loss = None
self.learning_rate = None
2017-06-30 14:11:02 +02:00
# Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2]
self.w = {}
self.b = {}
# Activations are also initiated by index. For the example we will have activations[2] and activations[3]
self.activations = {}
for i in range(len(dimensions) - 1):
self.w[i + 1] = np.random.randn(dimensions[i], dimensions[i + 1]) / np.sqrt(dimensions[i])
self.b[i + 1] = np.zeros(dimensions[i + 1])
self.activations[i + 2] = activations[i]
2017-07-13 11:28:12 +02:00
def _feed_forward(self, x):
2017-06-30 14:11:02 +02:00
"""
Execute a forward feed through the network.
:param x: (array) Batch of input data vectors.
2017-07-13 11:28:12 +02:00
:return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
2017-06-30 14:11:02 +02:00
"""
# w(x) + b
z = {}
# activations: f(z)
a = {1: x} # First layer has no activations as input. The input x is the input.
for i in range(1, self.n_layers):
# current layer = i
# activation layer = i + 1
z[i + 1] = np.dot(a[i], self.w[i]) + self.b[i]
a[i + 1] = self.activations[i + 1].activation(z[i + 1])
return z, a
2017-07-13 11:28:12 +02:00
def _back_prop(self, z, a, y_true):
2017-06-30 17:03:58 +02:00
"""
The input dicts keys represent the layers of the net.
a = { 1: x,
2: f(w1(x) + b1)
3: f(w2(a2) + b2)
}
:param z: (dict) w(x) + b
:param a: (dict) f(z)
:param y_true: (array) One hot encoded truth vector.
:return:
"""
# Determine partial derivative and delta for the output layer.
# delta output layer
2017-06-30 17:49:09 +02:00
delta = self.loss.delta(y_true, a[self.n_layers])
2017-06-30 17:03:58 +02:00
dw = np.dot(a[self.n_layers - 1].T, delta)
2017-06-30 17:49:09 +02:00
update_params = {
self.n_layers - 1: (dw, delta)
}
2017-06-30 17:03:58 +02:00
# In case of three layer net will iterate over i = 2 and i = 1
# Determine partial derivative and delta for the rest of the layers.
# Each iteration requires the delta from the previous layer, propagating backwards.
for i in reversed(range(2, self.n_layers)):
delta = np.dot(delta, self.w[i].T) * self.activations[i].prime(z[i])
dw = np.dot(a[i - 1].T, delta)
2017-06-30 17:49:09 +02:00
update_params[i - 1] = (dw, delta)
for k, v in update_params.items():
2017-07-13 11:28:12 +02:00
self._update_w_b(k, v[0], v[1])
2017-06-30 17:03:58 +02:00
2017-07-13 11:28:12 +02:00
def _update_w_b(self, index, dw, delta):
2017-06-30 17:03:58 +02:00
"""
Update weights and biases.
:param index: (int) Number of the layer
:param dw: (array) Partial derivatives
:param delta: (array) Delta error.
"""
2017-07-01 12:01:07 +02:00
self.w[index] -= self.learning_rate * dw
self.b[index] -= self.learning_rate * np.mean(delta, 0)
2017-06-30 17:03:58 +02:00
2017-09-03 12:50:43 +02:00
def fit(self, x, y_true, loss, epochs, batch_size, learning_rate=2e-2):
2017-06-30 17:03:58 +02:00
"""
2017-07-13 11:28:12 +02:00
:param x: (array) Containing parameters
:param y_true: (array) Containing one hot encoded labels.
2017-06-30 17:03:58 +02:00
:param loss: Loss class (MSE, CrossEntropy etc.)
2017-07-13 11:28:12 +02:00
:param epochs: (int) Number of epochs.
:param batch_size: (int)
:param learning_rate: (flt)
2017-06-30 17:03:58 +02:00
"""
if not x.shape[0] == y_true.shape[0]:
raise ValueError("Length of x and y arrays don't match")
# Initiate the loss object with the final activation function
self.loss = loss(self.activations[self.n_layers])
self.learning_rate = learning_rate
for i in range(epochs):
# Shuffle the data
seed = np.arange(x.shape[0])
np.random.shuffle(seed)
x_ = x[seed]
y_ = y_true[seed]
for j in range(x.shape[0] // batch_size):
k = j * batch_size
l = (j + 1) * batch_size
2017-07-13 11:28:12 +02:00
z, a = self._feed_forward(x_[k:l])
self._back_prop(z, a, y_[k:l])
2017-06-30 17:03:58 +02:00
2017-06-30 18:13:59 +02:00
if (i + 1) % 10 == 0:
2017-07-13 11:28:12 +02:00
_, a = self._feed_forward(x)
2017-07-01 12:01:07 +02:00
print("Loss:", self.loss.loss(y_true, a[self.n_layers]))
2017-06-30 17:03:58 +02:00
2017-06-30 18:13:59 +02:00
def predict(self, x):
2017-07-13 11:28:12 +02:00
"""
:param x: (array) Containing parameters
:return: (array) A 2D array of shape (n_cases, n_classes).
"""
_, a = self._feed_forward(x)
2017-06-30 18:13:59 +02:00
return a[self.n_layers]
2017-06-30 14:11:02 +02:00
if __name__ == "__main__":
from sklearn import datasets
2017-07-01 12:01:07 +02:00
import sklearn.metrics
2017-06-30 17:49:09 +02:00
np.random.seed(1)
2017-09-03 12:50:43 +02:00
# Load data
data = datasets.load_iris()
2017-07-01 12:20:00 +02:00
x = data["data"]
2017-09-03 12:50:43 +02:00
x = (x - x.mean()) / x.std()
2017-07-01 12:20:00 +02:00
y = data["target"]
2017-09-03 12:50:43 +02:00
#y = np.expand_dims(data["target"], 1)
# one hot encoding
y = np.eye(3)[y]
from pprint import pprint
nn = Network((2, 3, 1), (Relu, Sigmoid))
print("Weights:")
pprint(nn.w)
print("Biases:")
pprint(nn.b)
pprint(nn.activations)
pprint()
#nn.fit(x[:2], y[:2], MSE, 1, batch_size=2)
# nn.fit(x, y, MSE, 1000, 16)
# data = datasets.load_digits()
#
# x = data["data"]
# y = data["target"]
# y = np.eye(10)[y]
#
# nn = Network((64, 32, 10), (Relu, Sigmoid))
# nn.fit(x, y, MSE, 100, 2)
#
# y_ = nn.predict(x)
# a = np.argmax(y_, 1)
#
# for i in range(a.size):
# print(a[i], y[i], "\t", np.round(y_[i], 3))
#
# y_true = []
# y_pred = []
# for i in range(len(y)):
# y_pred.append(np.argmax(y_[i]))
# y_true.append(np.argmax(y[i]))
#
# print(sklearn.metrics.classification_report(y_true, y_pred))