refactor

2019-09-07 14:45:59 +02:00
parent b0306e3231
commit 2abe1dc891
5 changed files with 0 additions and 46 deletions
--- a/neural_networks/init.py
+++ b/neural_networks/init.py
--- a/neural_networks/functions.py
+++ b/neural_networks/functions.py
@@ -0,0 +1,340 @@
+import numpy as np
+
+
+class Relu:
+    @staticmethod
+    def activation(z):
+        z[z < 0] = 0
+        return z
+
+    @staticmethod
+    def prime(z):
+        z[z < 0] = 0
+        z[z > 0] = 1
+        return z
+
+
+class Sigmoid:
+    @staticmethod
+    def activation(z):
+        return 1 / (1 + np.exp(-z))
+
+    @staticmethod
+    def prime(z):
+        return Sigmoid.activation(z) * (1 - Sigmoid.activation(z))
+
+
+class Softmax:
+    @staticmethod
+    def activation(z):
+        """
+        https://stackoverflow.com/questions/34968722/softmax-function-python
+
+        Numerically stable version
+        """
+        e_x = np.exp(z - np.max(z))
+        return e_x / e_x.sum()
+
+    # https://stackoverflow.com/questions/33541930/how-to-implement-the-softmax-derivative-independently-from-any-loss-function
+    # http://cs231n.github.io/neural-networks-case-study/#loss
+
+
+class CrossEntropy:
+    """
+    Used with Softmax activation in final layer
+    """
+
+    @staticmethod
+    def activation(z):
+        return Softmax.activation(z)
+
+    @staticmethod
+    def delta(y_true, y):
+        """
+        http://cs231n.github.io/linear-classify/#softmax
+        https://stackoverflow.com/questions/27089932/cross-entropy-softmax-and-the-derivative-term-in-backpropagation
+        :param y_true: (array) One hot encoded truth vector.
+        :param y: (array) Prediction vector.
+        :return: (array) Delta vector.
+
+        y are softmax probabilitys
+        y_true is truth vector one hot encoded
+
+        y         y_true
+        [0.8]     [1]
+        [0.1]     [0]
+        [0.1]     [0]
+
+        result:
+
+        [-0.2]
+        [0.1]
+        [0.1]
+
+        """
+        return y - y_true
+
+    @staticmethod
+    def loss(y_true, y):
+        """
+        https://datascience.stackexchange.com/questions/9302/the-cross-entropy-error-function-in-neural-networks
+
+        :param y_true: (array) One hot encoded truth vector.
+        :param y: (array) Prediction vector
+        :return: (flt)
+        """
+        return -np.dot(y_true, np.log(y))
+
+
+class MSE:
+    def __init__(self, activation_fn=None):
+        """
+
+        :param activation_fn: Class object of the activation function.
+        """
+        if activation_fn:
+            self.activation_fn = activation_fn
+        else:
+            self.activation_fn = NoActivation
+
+    def activation(self, z):
+        return self.activation_fn.activation(z)
+
+    @staticmethod
+    def loss(y_true, y_pred):
+        """
+        :param y_true: (array) One hot encoded truth vector.
+        :param y_pred: (array) Prediction vector
+        :return: (flt)
+        """
+        return np.mean((y_pred - y_true)**2)
+
+    @staticmethod
+    def prime(y_true, y_pred):
+        return y_pred - y_true
+
+    def delta(self, y_true, y_pred):
+        """
+        Back propagation error delta
+        :return: (array)
+        """
+        return self.prime(y_true, y_pred) * self.activation_fn.prime(y_pred)
+
+
+class NoActivation:
+    """
+    This is a plugin function for no activation.
+
+    f(x) = x * 1
+    """
+    @staticmethod
+    def activation(z):
+        """
+        :param z: (array) w(x) + b
+        :return: z (array)
+        """
+        return z
+
+    @staticmethod
+    def prime(z):
+        """
+        The prime of z * 1 = 1
+        :param z: (array)
+        :return: z': (array)
+        """
+        return np.ones_like(z)
+
+
+class Network:
+    def __init__(self, dimensions, activations):
+        """
+        :param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output)
+        :param activations: (tpl/ list) Activations functions.
+
+        Example of one hidden layer with
+        - 2 inputs
+        - 3 hidden nodes
+        - 3 outputs
+
+
+        layers -->    [1,        2,          3]
+        ----------------------------------------
+
+        dimensions =  (2,     3,          3)
+        activations = (      Relu,      Sigmoid)
+        """
+        self.n_layers = len(dimensions)
+        self.loss = None
+        self.learning_rate = None
+        # Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2]
+        self.w = {}
+        self.b = {}
+
+        # Activations are also initiated by index. For the example we will have activations[2] and activations[3]
+        self.activations = {}
+        for i in range(len(dimensions) - 1):
+            self.w[i + 1] = np.random.randn(dimensions[i], dimensions[i + 1]) / np.sqrt(dimensions[i])
+            self.b[i + 1] = np.zeros(dimensions[i + 1])
+            self.activations[i + 2] = activations[i]
+
+    def _feed_forward(self, x):
+        """
+        Execute a forward feed through the network.
+        :param x: (array) Batch of input data vectors.
+        :return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
+        """
+
+        # w(x) + b
+        z = {}
+
+        # activations: f(z)
+        a = {1: x}  # First layer has no activations as input. The input x is the input.
+
+        for i in range(1, self.n_layers):
+            # current layer = i
+            # activation layer = i + 1
+            z[i + 1] = np.dot(a[i], self.w[i]) + self.b[i]
+            a[i + 1] = self.activations[i + 1].activation(z[i + 1])
+
+        return z, a
+
+    def _back_prop(self, z, a, y_true):
+        """
+        The input dicts keys represent the layers of the net.
+
+        a = { 1: x,
+              2: f(w1(x) + b1)
+              3: f(w2(a2) + b2)
+              }
+
+        :param z: (dict) w(x) + b
+        :param a: (dict) f(z)
+        :param y_true: (array) One hot encoded truth vector.
+        :return:
+        """
+
+        # Determine partial derivative and delta for the output layer.
+        # delta output layer
+        delta = self.loss.delta(y_true, a[self.n_layers])
+        dw = np.dot(a[self.n_layers - 1].T, delta)
+
+        update_params = {
+            self.n_layers - 1: (dw, delta)
+        }
+
+        # In case of three layer net will iterate over i = 2 and i = 1
+        # Determine partial derivative and delta for the rest of the layers.
+        # Each iteration requires the delta from the previous layer, propagating backwards.
+        for i in reversed(range(2, self.n_layers)):
+            delta = np.dot(delta, self.w[i].T) * self.activations[i].prime(z[i])
+            dw = np.dot(a[i - 1].T, delta)
+            update_params[i - 1] = (dw, delta)
+
+        for k, v in update_params.items():
+            self._update_w_b(k, v[0], v[1])
+
+    def _update_w_b(self, index, dw, delta):
+        """
+        Update weights and biases.
+
+        :param index: (int) Number of the layer
+        :param dw: (array) Partial derivatives
+        :param delta: (array) Delta error.
+        """
+
+        self.w[index] -= self.learning_rate * dw
+        self.b[index] -= self.learning_rate * np.mean(delta, 0)
+
+    def fit(self, x, y_true, loss, epochs, batch_size, learning_rate=2e-2):
+        """
+        :param x: (array) Containing parameters
+        :param y_true: (array) Containing one hot encoded labels.
+        :param loss: Loss class (MSE, CrossEntropy etc.)
+        :param epochs: (int) Number of epochs.
+        :param batch_size: (int)
+        :param learning_rate: (flt)
+        """
+        if not x.shape[0] == y_true.shape[0]:
+            raise ValueError("Length of x and y arrays don't match")
+        # Initiate the loss object with the final activation function
+        self.loss = loss(self.activations[self.n_layers])
+        self.learning_rate = learning_rate
+
+        for i in range(epochs):
+            # Shuffle the data
+            seed = np.arange(x.shape[0])
+            np.random.shuffle(seed)
+            x_ = x[seed]
+            y_ = y_true[seed]
+
+            for j in range(x.shape[0] // batch_size):
+                k = j * batch_size
+                l = (j + 1) * batch_size
+                z, a = self._feed_forward(x_[k:l])
+                self._back_prop(z, a, y_[k:l])
+
+            if (i + 1) % 10 == 0:
+                _, a = self._feed_forward(x)
+                print("Loss:", self.loss.loss(y_true, a[self.n_layers]))
+
+    def predict(self, x):
+        """
+        :param x: (array) Containing parameters
+        :return: (array) A 2D array of shape (n_cases, n_classes).
+        """
+        _, a = self._feed_forward(x)
+        return a[self.n_layers]
+
+if __name__ == "__main__":
+    from sklearn import datasets
+    import sklearn.metrics
+    np.random.seed(1)
+    # Load data
+    data = datasets.load_iris()
+    x = data["data"]
+    x = (x - x.mean()) / x.std()
+    y = data["target"]
+    #y = np.expand_dims(data["target"], 1)
+
+    # one hot encoding
+    y = np.eye(3)[y]
+
+    from pprint import pprint
+
+    nn = Network((2, 3, 1), (Relu, Sigmoid))
+
+    print("Weights:")
+    pprint(nn.w)
+
+    print("Biases:")
+    pprint(nn.b)
+
+    pprint(nn.activations)
+
+    pprint()
+
+    #nn.fit(x[:2], y[:2], MSE, 1, batch_size=2)
+    # nn.fit(x, y, MSE, 1000, 16)
+
+    # data = datasets.load_digits()
+    #
+    # x = data["data"]
+    # y = data["target"]
+    # y = np.eye(10)[y]
+    #
+    # nn = Network((64, 32, 10), (Relu, Sigmoid))
+    # nn.fit(x, y, MSE, 100, 2)
+    #
+    # y_ = nn.predict(x)
+    # a = np.argmax(y_, 1)
+    #
+    # for i in range(a.size):
+    #     print(a[i], y[i], "\t", np.round(y_[i], 3))
+    #
+    # y_true = []
+    # y_pred = []
+    # for i in range(len(y)):
+    #     y_pred.append(np.argmax(y_[i]))
+    #     y_true.append(np.argmax(y[i]))
+    #
+    # print(sklearn.metrics.classification_report(y_true, y_pred))
--- a/neural_networks/simple_mlp.py
+++ b/neural_networks/simple_mlp.py
@@ -0,0 +1,211 @@
+import numpy as np
+
+
+def new_model(in_dim, h_dim, out_dim):
+    w = {
+        1: np.random.randn(in_dim, h_dim) / np.sqrt(in_dim),
+        2: np.random.randn(h_dim, out_dim) / np.sqrt(h_dim)
+    }
+
+    b = {
+        1: np.zeros(h_dim),
+        2: np.zeros(out_dim)
+    }
+
+    return w, b
+
+
+def relu(a):
+    """
+    Rectifier unit
+    :param a: (array) activation vector.
+    :return: (array) Relu activation.
+    """
+    a[a < 0] = 0
+    return a
+
+
+def sigmoid(a):
+    """
+    Sigmoid activation function.
+    :param a: (array) activation vector.
+    :return: (array) Sigmoid activation.
+    """
+    return 1 / (1 + np.exp(-a))
+
+
+def diff_sigmoid(a):
+    """
+    Derivative of the sigmoid function.
+    :param a: (array) activation vector.
+    :return: (array)
+    """
+    return sigmoid(a) * (1 - sigmoid(a))
+
+
+def diff_relu(a):
+    """
+    Derivative of the relu function.
+    :param a: (array) activation vector.
+    :return: (array)
+    """
+    a[a < 0] = 0
+    a[a > 0] = 1
+    return a
+
+
+def feed_forward(p, w, b):
+    """
+    Feed forward propagation.
+    :param p: (array) Parameters.
+    :param w: (dict) Weights.
+    :param b: (dict) Biases
+    :return: (array) Output.
+    """
+    a = {}
+    z = {}
+
+    z[2] = np.dot(p, w[1]) + b[1]
+    a[2] = relu(z[2])
+    z[3] = sigmoid(np.dot(a[2], w[2]) + b[2])
+
+    return a, z
+
+
+def cost_mse(a, y):
+    """
+    Cost function.
+    :param a: (array) Predictions
+    :param y: (array) Ground truth labels
+    :return: (flt) Loss
+    """
+    return np.mean((a - y)**2)
+
+
+def diff_cost_mse(a, y):
+    return a - y
+
+
+def bpe_delta(a, y):
+    """
+    Back propagating error delta
+    :param a: (array) Predictions
+    :param y: (array) Ground truth labels
+    :return: (array)
+    """
+    return diff_cost_mse(a, y) * diff_sigmoid(a)
+
+
+class NeuralNetwork:
+    def __init__(self, in_dim, h_dim, out_dim, learning_rate=1e-4):
+        """
+        Simple one hidden layer net with relu activation in the hidden layer and sigmoid activation at the output
+        layer.
+
+        :param in_dim: (int) Size of the input vector.
+        :param h_dim: (int) No. of hidden nodes.
+        :param out_dim: (int) No. of output nodes.
+        :param learning_rate: (flt)
+        """
+        self.w, self.b = new_model(in_dim, h_dim, out_dim)
+        self.x = None
+        self.a = None  # activations
+        self.z = None  # xi * wi + bi
+        self.learning_rate = learning_rate
+
+    def feed_forward(self, p):
+        """
+        Compute the activations and z's. z = w(x) + b
+        :param p: (array)
+        """
+        self.x = p
+        self.a, self.z = feed_forward(p, self.w, self.b)
+
+    def backprop(self, labels):
+        """
+        Backpropagate the error and update the weights and biases
+
+        :param labels: (array) Ground truth vector.
+        """
+        # partial derivative with respect to layer 2
+        delta3 = bpe_delta(self.z[3], labels)
+
+        # dc_db2 = delta3
+        dc_dw2 = np.dot(self.a[2].T, delta3)
+
+        # partial derivative with respect to layer 1
+        delta2 = np.dot(delta3, self.w[2].T) * diff_relu(self.z[2])
+
+        # dc_db1 = delta2
+        dc_dw1 = np.dot(self.x.T, delta2)
+
+        # update weights and biases
+        self.w[2] -= self.learning_rate * dc_dw2
+        self.b[2] -= self.learning_rate * np.mean(delta3, 0)
+        self.w[1] -= self.learning_rate * dc_dw1
+        self.b[1] -= self.learning_rate * np.mean(delta2, 0)
+
+    def stats(self):
+        """
+        Prints some weights and biases
+        """
+        for i in range(1, 3):
+            print("Weights layer {}:\n".format(i), self.w[i], "\nBiases layer {}:\n".format(i), self.b[i], "\n")
+
+    def fit(self, x, labels, batch_size, epochs):
+        """
+        Train the net.
+
+        :param x: (array) Input vector.
+        :param labels: (array) Ground truth vector.
+        :param batch_size: (int) Size of mini batch
+        :param epochs: (int) No. of epochs to train.
+        """
+
+        for i in range(epochs):
+            # Shuffle the data
+            seed = np.arange(x.shape[0])
+            np.random.shuffle(seed)
+            x_ = x[seed]
+            labels_ = labels[seed]
+
+            for j in range(x.shape[0] // batch_size):
+                self.feed_forward(x_[j * batch_size: (j + 1) * batch_size])
+                self.backprop(labels_[j * batch_size: (j + 1) * batch_size])
+
+            _, y = feed_forward(x, self.w, self.b)
+
+            if i % 100:
+                print("Loss:", cost_mse(y[3], labels))
+
+
+if __name__ == "__main__":
+    from sklearn import datasets
+    import sklearn.metrics
+    np.random.seed(1)
+
+    # Load data
+    data = datasets.load_iris()
+    x = data["data"]
+    x = (x - x.mean()) / x.std()
+    y = data["target"]
+
+    # one hot encoding
+    y = np.eye(3)[y]
+
+    nn = NeuralNetwork(4, 4, 3, 1e-2)
+    #nn.fit(x[:2], y[:2], 2, 1)
+    nn.fit(x, y, 8, 1000)
+    _, y_ = feed_forward(x, nn.w, nn.b)
+    print(y_[3])
+
+    # # result
+    # _, y_ = feed_forward(x, nn.w, nn.b)
+    y_true = []
+    y_pred = []
+    for i in range(len(y)):
+        y_pred.append(np.argmax(y_[3][i]))
+        y_true.append(np.argmax(y[i]))
+
+    print(sklearn.metrics.classification_report(y_true, y_pred))
+
--- a/neural_networks/vanilla_mlp.py
+++ b/neural_networks/vanilla_mlp.py
@@ -0,0 +1,247 @@
+import numpy as np
+
+
+class Relu:
+    @staticmethod
+    def activation(z):
+        z[z < 0] = 0
+        return z
+
+    @staticmethod
+    def prime(z):
+        z[z < 0] = 0
+        z[z > 0] = 1
+        return z
+
+
+class Sigmoid:
+    @staticmethod
+    def activation(z):
+        return 1 / (1 + np.exp(-z))
+
+    @staticmethod
+    def prime(z):
+        return Sigmoid.activation(z) * (1 - Sigmoid.activation(z))
+
+
+class MSE:
+    def __init__(self, activation_fn=None):
+        """
+
+        :param activation_fn: Class object of the activation function.
+        """
+        if activation_fn:
+            self.activation_fn = activation_fn
+        else:
+            self.activation_fn = NoActivation
+
+    def activation(self, z):
+        return self.activation_fn.activation(z)
+
+    @staticmethod
+    def loss(y_true, y_pred):
+        """
+        :param y_true: (array) One hot encoded truth vector.
+        :param y_pred: (array) Prediction vector
+        :return: (flt)
+        """
+        return np.mean((y_pred - y_true)**2)
+
+    @staticmethod
+    def prime(y_true, y_pred):
+        return y_pred - y_true
+
+    def delta(self, y_true, y_pred):
+        """
+        Back propagation error delta
+        :return: (array)
+        """
+        return self.prime(y_true, y_pred) * self.activation_fn.prime(y_pred)
+
+
+class NoActivation:
+    """
+    This is a plugin function for no activation.
+
+    f(x) = x * 1
+    """
+    @staticmethod
+    def activation(z):
+        """
+        :param z: (array) w(x) + b
+        :return: z (array)
+        """
+        return z
+
+    @staticmethod
+    def prime(z):
+        """
+        The prime of z * 1 = 1
+        :param z: (array)
+        :return: z': (array)
+        """
+        return np.ones_like(z)
+
+
+class Network:
+    def __init__(self, dimensions, activations):
+        """
+        :param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output)
+        :param activations: (tpl/ list) Activations functions.
+
+        Example of one hidden layer with
+        - 2 inputs
+        - 3 hidden nodes
+        - 3 outputs
+
+
+        layers -->    [1,        2,          3]
+        ----------------------------------------
+
+        dimensions =  (2,     3,          3)
+        activations = (      Relu,      Sigmoid)
+        """
+        self.n_layers = len(dimensions)
+        self.loss = None
+        self.learning_rate = None
+        # Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2]
+        self.w = {}
+        self.b = {}
+
+        # Activations are also initiated by index. For the example we will have activations[2] and activations[3]
+        self.activations = {}
+        for i in range(len(dimensions) - 1):
+            self.w[i + 1] = np.random.randn(dimensions[i], dimensions[i + 1]) / np.sqrt(dimensions[i])
+            self.b[i + 1] = np.zeros(dimensions[i + 1])
+            self.activations[i + 2] = activations[i]
+
+    def _feed_forward(self, x):
+        """
+        Execute a forward feed through the network.
+        :param x: (array) Batch of input data vectors.
+        :return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
+        """
+
+        # w(x) + b
+        z = {}
+
+        # activations: f(z)
+        a = {1: x}  # First layer has no activations as input. The input x is the input.
+
+        for i in range(1, self.n_layers):
+            # current layer = i
+            # activation layer = i + 1
+            z[i + 1] = np.dot(a[i], self.w[i]) + self.b[i]
+            a[i + 1] = self.activations[i + 1].activation(z[i + 1])
+
+        return z, a
+
+    def _back_prop(self, z, a, y_true):
+        """
+        The input dicts keys represent the layers of the net.
+
+        a = { 1: x,
+              2: f(w1(x) + b1)
+              3: f(w2(a2) + b2)
+              }
+
+        :param z: (dict) w(x) + b
+        :param a: (dict) f(z)
+        :param y_true: (array) One hot encoded truth vector.
+        :return:
+        """
+
+        # Determine partial derivative and delta for the output layer.
+        # delta output layer
+        delta = self.loss.delta(y_true, a[self.n_layers])
+        dw = np.dot(a[self.n_layers - 1].T, delta)
+
+        update_params = {
+            self.n_layers - 1: (dw, delta)
+        }
+
+        # In case of three layer net will iterate over i = 2 and i = 1
+        # Determine partial derivative and delta for the rest of the layers.
+        # Each iteration requires the delta from the previous layer, propagating backwards.
+        for i in reversed(range(2, self.n_layers)):
+            delta = np.dot(delta, self.w[i].T) * self.activations[i].prime(z[i])
+            dw = np.dot(a[i - 1].T, delta)
+            update_params[i - 1] = (dw, delta)
+
+        for k, v in update_params.items():
+            self._update_w_b(k, v[0], v[1])
+
+    def _update_w_b(self, index, dw, delta):
+        """
+        Update weights and biases.
+
+        :param index: (int) Number of the layer
+        :param dw: (array) Partial derivatives
+        :param delta: (array) Delta error.
+        """
+
+        self.w[index] -= self.learning_rate * dw
+        self.b[index] -= self.learning_rate * np.mean(delta, 0)
+
+    def fit(self, x, y_true, loss, epochs, batch_size, learning_rate=1e-3):
+        """
+        :param x: (array) Containing parameters
+        :param y_true: (array) Containing one hot encoded labels.
+        :param loss: Loss class (MSE, CrossEntropy etc.)
+        :param epochs: (int) Number of epochs.
+        :param batch_size: (int)
+        :param learning_rate: (flt)
+        """
+        if not x.shape[0] == y_true.shape[0]:
+            raise ValueError("Length of x and y arrays don't match")
+        # Initiate the loss object with the final activation function
+        self.loss = loss(self.activations[self.n_layers])
+        self.learning_rate = learning_rate
+
+        for i in range(epochs):
+            # Shuffle the data
+            seed = np.arange(x.shape[0])
+            np.random.shuffle(seed)
+            x_ = x[seed]
+            y_ = y_true[seed]
+
+            for j in range(x.shape[0] // batch_size):
+                k = j * batch_size
+                l = (j + 1) * batch_size
+                z, a = self._feed_forward(x_[k:l])
+                self._back_prop(z, a, y_[k:l])
+
+            if (i + 1) % 10 == 0:
+                _, a = self._feed_forward(x)
+                print("Loss:", self.loss.loss(y_true, a[self.n_layers]))
+
+    def predict(self, x):
+        """
+        :param x: (array) Containing parameters
+        :return: (array) A 2D array of shape (n_cases, n_classes).
+        """
+        _, a = self._feed_forward(x)
+        return a[self.n_layers]
+
+if __name__ == "__main__":
+    from sklearn import datasets
+    import sklearn.metrics
+    np.random.seed(1)
+    data = datasets.load_digits()
+
+    x = data["data"]
+    y = data["target"]
+    y = np.eye(10)[y]
+
+    nn = Network((64, 15, 10), (Relu, Sigmoid))
+    nn.fit(x, y, loss=MSE, epochs=50, batch_size=15, learning_rate=1e-3)
+
+    prediction = nn.predict(x)
+
+    y_true = []
+    y_pred = []
+    for i in range(len(y)):
+        y_pred.append(np.argmax(prediction[i]))
+        y_true.append(np.argmax(y[i]))
+
+    print(sklearn.metrics.classification_report(y_true, y_pred))