vanilla_mlp

2017-07-13 11:28:12 +02:00 · 2017-07-13 11:28:12 +02:00 · 7dfe24aa76
commit 7dfe24aa76
parent 36ca828e6b
2 changed files with 268 additions and 30 deletions
--- a/functions.py
+++ b/functions.py
@ -177,11 +177,11 @@ class Network:
            self.b[i + 1] = np.zeros(dimensions[i + 1])
            self.activations[i + 2] = activations[i]

-    def feed_forward(self, x):
+    def _feed_forward(self, x):
        """
        Execute a forward feed through the network.
        :param x: (array) Batch of input data vectors.
-        :return: Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
+        :return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
        """

        # w(x) + b
@ -198,7 +198,7 @@ class Network:

        return z, a

-    def back_prop(self, z, a, y_true):
+    def _back_prop(self, z, a, y_true):
        """
        The input dicts keys represent the layers of the net.

@ -231,9 +231,9 @@ class Network:
            update_params[i - 1] = (dw, delta)

        for k, v in update_params.items():
-            self.update_w_b(k, v[0], v[1])
+            self._update_w_b(k, v[0], v[1])

-    def update_w_b(self, index, dw, delta):
+    def _update_w_b(self, index, dw, delta):
        """
        Update weights and biases.

@ -245,9 +245,14 @@ class Network:
        self.w[index] -= self.learning_rate * dw
        self.b[index] -= self.learning_rate * np.mean(delta, 0)

-    def fit(self, x, y_true, loss, epochs, batch_size, learning_rate=2e-2):
+    def fit(self, x, y_true, loss, epochs, batch_size, learning_rate=1e-3):
        """
+        :param x: (array) Containing parameters
+        :param y_true: (array) Containing one hot encoded labels.
        :param loss: Loss class (MSE, CrossEntropy etc.)
+        :param epochs: (int) Number of epochs.
+        :param batch_size: (int)
+        :param learning_rate: (flt)
        """
        if not x.shape[0] == y_true.shape[0]:
            raise ValueError("Length of x and y arrays don't match")
@ -265,51 +270,37 @@ class Network:
            for j in range(x.shape[0] // batch_size):
                k = j * batch_size
                l = (j + 1) * batch_size
-                z, a = self.feed_forward(x_[k:l])
-                self.back_prop(z, a, y_[k:l])
+                z, a = self._feed_forward(x_[k:l])
+                self._back_prop(z, a, y_[k:l])

            if (i + 1) % 10 == 0:
-                _, a = self.feed_forward(x)
+                _, a = self._feed_forward(x)
                print("Loss:", self.loss.loss(y_true, a[self.n_layers]))

    def predict(self, x):
-        _, a = self.feed_forward(x)
+        """
+        :param x: (array) Containing parameters
+        :return: (array) A 2D array of shape (n_cases, n_classes).
+        """
+        _, a = self._feed_forward(x)
        return a[self.n_layers]

 if __name__ == "__main__":
    from sklearn import datasets
    import sklearn.metrics
    np.random.seed(1)
-    # # Load data
-    # data = datasets.load_iris()
-    # x = data["data"]
-    # x = (x - x.mean()) / x.std()
-    # y = data["target"]
-    # #y = np.expand_dims(data["target"], 1)
-    #
-    # # one hot encoding
-    # y = np.eye(3)[y]
-    #
-    # nn = Network((4, 8, 3), (Relu, Sigmoid))
-    #
-    # #nn.fit(x[:2], y[:2], MSE, 1, batch_size=2)
-    # nn.fit(x, y, MSE, 1000, 16)
-
    data = datasets.load_digits()

    x = data["data"]
    y = data["target"]
    y = np.eye(10)[y]

-    nn = Network((64, 32, 10), (Relu, Sigmoid))
-    nn.fit(x, y, MSE, 100, 2)
+    nn = Network((64, 10, 10), (Relu, Sigmoid))
+    nn.fit(x, y, MSE, 100, 15, learning_rate=1e-3)

    y_ = nn.predict(x)
    a = np.argmax(y_, 1)

-    for i in range(a.size):
-        print(a[i], y[i], "\t", np.round(y_[i], 3))
-
    y_true = []
    y_pred = []
    for i in range(len(y)):
--- a/vanilla_mlp.py
+++ b/vanilla_mlp.py
@ -0,0 +1,247 @@
+import numpy as np
+
+
+class Relu:
+    @staticmethod
+    def activation(z):
+        z[z < 0] = 0
+        return z
+
+    @staticmethod
+    def prime(z):
+        z[z < 0] = 0
+        z[z > 0] = 1
+        return z
+
+
+class Sigmoid:
+    @staticmethod
+    def activation(z):
+        return 1 / (1 + np.exp(-z))
+
+    @staticmethod
+    def prime(z):
+        return Sigmoid.activation(z) * (1 - Sigmoid.activation(z))
+
+
+class MSE:
+    def __init__(self, activation_fn=None):
+        """
+
+        :param activation_fn: Class object of the activation function.
+        """
+        if activation_fn:
+            self.activation_fn = activation_fn
+        else:
+            self.activation_fn = NoActivation
+
+    def activation(self, z):
+        return self.activation_fn.activation(z)
+
+    @staticmethod
+    def loss(y_true, y_pred):
+        """
+        :param y_true: (array) One hot encoded truth vector.
+        :param y_pred: (array) Prediction vector
+        :return: (flt)
+        """
+        return np.mean((y_pred - y_true)**2)
+
+    @staticmethod
+    def prime(y_true, y_pred):
+        return y_pred - y_true
+
+    def delta(self, y_true, y_pred):
+        """
+        Back propagation error delta
+        :return: (array)
+        """
+        return self.prime(y_true, y_pred) * self.activation_fn.prime(y_pred)
+
+
+class NoActivation:
+    """
+    This is a plugin function for no activation.
+
+    f(x) = x * 1
+    """
+    @staticmethod
+    def activation(z):
+        """
+        :param z: (array) w(x) + b
+        :return: z (array)
+        """
+        return z
+
+    @staticmethod
+    def prime(z):
+        """
+        The prime of z * 1 = 1
+        :param z: (array)
+        :return: z': (array)
+        """
+        return np.ones_like(z)
+
+
+class Network:
+    def __init__(self, dimensions, activations):
+        """
+        :param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output)
+        :param activations: (tpl/ list) Activations functions.
+
+        Example of one hidden layer with
+        - 2 inputs
+        - 3 hidden nodes
+        - 3 outputs
+
+
+        layers -->    [1,        2,          3]
+        ----------------------------------------
+
+        dimensions =  (2,     3,          3)
+        activations = (      Relu,      Sigmoid)
+        """
+        self.n_layers = len(dimensions)
+        self.loss = None
+        self.learning_rate = None
+        # Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2]
+        self.w = {}
+        self.b = {}
+
+        # Activations are also initiated by index. For the example we will have activations[2] and activations[3]
+        self.activations = {}
+        for i in range(len(dimensions) - 1):
+            self.w[i + 1] = np.random.randn(dimensions[i], dimensions[i + 1]) / np.sqrt(dimensions[i])
+            self.b[i + 1] = np.zeros(dimensions[i + 1])
+            self.activations[i + 2] = activations[i]
+
+    def _feed_forward(self, x):
+        """
+        Execute a forward feed through the network.
+        :param x: (array) Batch of input data vectors.
+        :return: (tpl) Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
+        """
+
+        # w(x) + b
+        z = {}
+
+        # activations: f(z)
+        a = {1: x}  # First layer has no activations as input. The input x is the input.
+
+        for i in range(1, self.n_layers):
+            # current layer = i
+            # activation layer = i + 1
+            z[i + 1] = np.dot(a[i], self.w[i]) + self.b[i]
+            a[i + 1] = self.activations[i + 1].activation(z[i + 1])
+
+        return z, a
+
+    def _back_prop(self, z, a, y_true):
+        """
+        The input dicts keys represent the layers of the net.
+
+        a = { 1: x,
+              2: f(w1(x) + b1)
+              3: f(w2(a2) + b2)
+              }
+
+        :param z: (dict) w(x) + b
+        :param a: (dict) f(z)
+        :param y_true: (array) One hot encoded truth vector.
+        :return:
+        """
+
+        # Determine partial derivative and delta for the output layer.
+        # delta output layer
+        delta = self.loss.delta(y_true, a[self.n_layers])
+        dw = np.dot(a[self.n_layers - 1].T, delta)
+
+        update_params = {
+            self.n_layers - 1: (dw, delta)
+        }
+
+        # In case of three layer net will iterate over i = 2 and i = 1
+        # Determine partial derivative and delta for the rest of the layers.
+        # Each iteration requires the delta from the previous layer, propagating backwards.
+        for i in reversed(range(2, self.n_layers)):
+            delta = np.dot(delta, self.w[i].T) * self.activations[i].prime(z[i])
+            dw = np.dot(a[i - 1].T, delta)
+            update_params[i - 1] = (dw, delta)
+
+        for k, v in update_params.items():
+            self._update_w_b(k, v[0], v[1])
+
+    def _update_w_b(self, index, dw, delta):
+        """
+        Update weights and biases.
+
+        :param index: (int) Number of the layer
+        :param dw: (array) Partial derivatives
+        :param delta: (array) Delta error.
+        """
+
+        self.w[index] -= self.learning_rate * dw
+        self.b[index] -= self.learning_rate * np.mean(delta, 0)
+
+    def fit(self, x, y_true, loss, epochs, batch_size, learning_rate=1e-3):
+        """
+        :param x: (array) Containing parameters
+        :param y_true: (array) Containing one hot encoded labels.
+        :param loss: Loss class (MSE, CrossEntropy etc.)
+        :param epochs: (int) Number of epochs.
+        :param batch_size: (int)
+        :param learning_rate: (flt)
+        """
+        if not x.shape[0] == y_true.shape[0]:
+            raise ValueError("Length of x and y arrays don't match")
+        # Initiate the loss object with the final activation function
+        self.loss = loss(self.activations[self.n_layers])
+        self.learning_rate = learning_rate
+
+        for i in range(epochs):
+            # Shuffle the data
+            seed = np.arange(x.shape[0])
+            np.random.shuffle(seed)
+            x_ = x[seed]
+            y_ = y_true[seed]
+
+            for j in range(x.shape[0] // batch_size):
+                k = j * batch_size
+                l = (j + 1) * batch_size
+                z, a = self._feed_forward(x_[k:l])
+                self._back_prop(z, a, y_[k:l])
+
+            if (i + 1) % 10 == 0:
+                _, a = self._feed_forward(x)
+                print("Loss:", self.loss.loss(y_true, a[self.n_layers]))
+
+    def predict(self, x):
+        """
+        :param x: (array) Containing parameters
+        :return: (array) A 2D array of shape (n_cases, n_classes).
+        """
+        _, a = self._feed_forward(x)
+        return a[self.n_layers]
+
+if __name__ == "__main__":
+    from sklearn import datasets
+    import sklearn.metrics
+    np.random.seed(1)
+    data = datasets.load_digits()
+
+    x = data["data"]
+    y = data["target"]
+    y = np.eye(10)[y]
+
+    nn = Network((64, 15, 10), (Relu, Sigmoid))
+    nn.fit(x, y, loss=MSE, epochs=50, batch_size=15, learning_rate=1e-3)
+
+    prediction = nn.predict(x)
+
+    y_true = []
+    y_pred = []
+    for i in range(len(y)):
+        y_pred.append(np.argmax(prediction[i]))
+        y_true.append(np.argmax(y[i]))
+
+    print(sklearn.metrics.classification_report(y_true, y_pred))