This commit is contained in:
Ritchie 2017-06-24 21:13:41 +02:00
commit add5381051
2 changed files with 235 additions and 0 deletions

25
.gitignore vendored Normal file
View File

@ -0,0 +1,25 @@
MANIFEST
build
dist
_build
docs/man/*.gz
docs/source/api/generated
docs/source/config/options
docs/source/interactive/magics-generated.txt
docs/gh-pages
jupyter_notebook/notebook/static/mathjax
jupyter_notebook/static/style/*.map
*.py[co]
__pycache__
*.egg-info
*~
*.bak
.ipynb_checkpoints
.tox
.DS_Store
\#*#
.#*
.coverage
*.swp
ignore
.idea

210
simple_mlp.py Normal file
View File

@ -0,0 +1,210 @@
import numpy as np
from sklearn import datasets
import sklearn.metrics
def new_model(in_dim, h_dim, out_dim):
w = {
1: np.random.randn(in_dim, h_dim) / np.sqrt(in_dim),
2: np.random.randn(h_dim, out_dim) / np.sqrt(h_dim)
}
b = {
1: np.zeros(h_dim),
2: np.zeros(out_dim)
}
return w, b
def relu(a):
"""
Rectifier unit
:param a: (array) activation vector.
:return: (array) Relu activation.
"""
a[a < 0] = 0
return a
def sigmoid(a):
"""
Sigmoid activation function.
:param a: (array) activation vector.
:return: (array) Sigmoid activation.
"""
return 1 / (1 + np.exp(-a))
def diff_sigmoid(a):
"""
Derivative of the sigmoid function.
:param a: (array) activation vector.
:return: (array)
"""
return sigmoid(a) * (1 - sigmoid(a))
def diff_relu(a):
"""
Derivative of the relu function.
:param a: (array) activation vector.
:return: (array)
"""
a[a < 0] = 0
a[a > 0] = 1
return a
def feed_forward(p, w, b):
"""
Feed forward propagation.
:param p: (array) Parameters.
:param w: (dict) Weights.
:param b: (dict) Biases
:return: (array) Output.
"""
a = {}
z = {}
z[2] = np.dot(p, w[1]) + b[1]
a[2] = relu(z[2])
z[3] = sigmoid(np.dot(a[2], w[2]) + b[2])
return a, z
def cost_mse(a, y):
"""
Cost function.
:param a: (array) Predictions
:param y: (array) Ground truth labels
:return: (flt) Loss
"""
return np.mean((a - y)**2)
def diff_cost_mse(a, y):
return (a - y)
def bpe_delta(a, y):
"""
Back propagating error delta
:param a: (array) Predictions
:param y: (array) Ground truth labels
:return: (array)
"""
return diff_cost_mse(a, y) * diff_sigmoid(a)
class NeuralNetwork:
def __init__(self, in_dim, h_dim, out_dim, learning_rate=1e-4):
"""
Simple one hidden layer net with relu activation in the hidden layer and sigmoid activation at the output
layer.
:param in_dim: (int) Size of the input vector.
:param h_dim: (int) No. of hidden nodes.
:param out_dim: (int) No. of output nodes.
:param learning_rate: (flt)
"""
self.w, self.b = new_model(in_dim, h_dim, out_dim)
self.x = None
self.a = None # activations
self.z = None # xi * wi + bi
self.learning_rate = learning_rate
def feed_forward(self, p):
"""
Compute the activations and z's. z = w(x) + b
:param p: (array)
"""
self.x = p
self.a, self.z = feed_forward(p, self.w, self.b)
def backprop(self, labels):
"""
Backpropagate the error and update the weights and biases
:param labels: (array) Ground truth vector.
"""
# partial derivative with respect to layer 2
delta3 = bpe_delta(self.z[3], labels)
# dc_db2 = delta3
dc_dw2 = np.dot(self.a[2].T, delta3)
# partial derivative with respect to layer 1
delta2 = np.dot(delta3, self.w[2].T) * diff_relu(self.z[2])
# dc_db1 = delta2
dc_dw1 = np.dot(self.x.T, delta2)
# update weights and biases
self.w[2] -= self.learning_rate * np.mean(dc_dw2, 1)
self.b[2] -= self.learning_rate * np.mean(np.mean(delta3, 1), 0)
self.w[1] -= self.learning_rate * np.mean(dc_dw1, 1)
self.b[1] -= self.learning_rate * np.mean(np.mean(delta2, 1), 0)
def stats(self):
"""
Prints some weights and biases
"""
for i in range(1, 3):
print("Weights layer {}:\n".format(i), self.w[i], "\nBiases layer {}:\n".format(i), self.b[i], "\n")
def fit(self, x, labels, batch_size, epochs):
"""
Train the net.
:param x: (array) Input vector.
:param labels: (array) Ground truth vector.
:param batch_size: (int) Size of mini batch
:param epochs: (int) No. of epochs to train.
"""
for i in range(epochs):
# Shuffle the data
seed = np.arange(x.shape[0])
np.random.shuffle(seed)
x_ = x[seed]
labels_ = labels[seed]
for j in range(x.shape[0] // batch_size):
self.feed_forward(x_[j * batch_size: (j + 1) * batch_size])
self.backprop(labels_[j * batch_size: (j + 1) * batch_size])
_, y = feed_forward(x, self.w, self.b)
if i % epochs // 20 == 0:
print(cost_mse(y[3], labels))
if __name__ == "__main__":
np.random.seed(1)
# Load data
data = datasets.load_iris()
x = data["data"]
x = (x - x.mean()) / x.std()
y = np.expand_dims(data["target"], 1)
# one hot encoding
y = np.eye(3)[y]
nn = NeuralNetwork(4, 8, 3, 2e-2)
nn.fit(x, y, 10, int(1e3))
# result
_, y_ = feed_forward(x, nn.w, nn.b)
y_true = []
y_pred = []
for i in range(len(y)):
y_pred.append(np.argmax(y_[3][i]))
y_true.append(np.argmax(y[i]))
print(y_pred[-1], y_true[-1])
print(sklearn.metrics.classification_report(y_true, y_pred))