diff --git a/functions.py b/functions.py
index d77cf95..68d7389 100644
--- a/functions.py
+++ b/functions.py
@@ -51,6 +51,7 @@ class CrossEntropy:
     @staticmethod
     def delta(y_hat, y):
         """
+        http://cs231n.github.io/linear-classify/#softmax
         https://stackoverflow.com/questions/27089932/cross-entropy-softmax-and-the-derivative-term-in-backpropagation
         :param y_hat: (array) One hot encoded truth vector.
         :param y: (array) Prediction vector.
@@ -85,3 +86,121 @@ class CrossEntropy:
         return -np.dot(y_hat, np.log(y))
 
 
+class MSE:
+    def __int__(self, activation_fn=None):
+        """
+
+        :param activation_fn: Class object of the activation function.
+        """
+        if activation_fn:
+            self.activation_fn = activation_fn
+        else:
+            self.activation_fn = NoActivation
+
+    def activation(self, z):
+        return self.activation_fn.activation(z)
+
+    @staticmethod
+    def loss(y_hat, y):
+        """
+        :param y_hat: (array) One hot encoded truth vector.
+        :param y: (array) Prediction vector
+        :return: (flt)
+        """
+        return np.mean((y - y_hat)**2)
+
+    @staticmethod
+    def prime(y_hat, y):
+        return y - y_hat
+
+    def delta(self, y_hat, y):
+        self.prime(y_hat, y) * self.activation_fn.prime(y)
+
+
+class NoActivation:
+    @staticmethod
+    def activation(z):
+        """
+        :param z: (array) w(x) + b
+        :return: z (array)
+        """
+        return z
+
+    @staticmethod
+    def prime(x):
+        """
+        Linear relation. The prime is the input variable.
+        z = w(x) + b
+        z' = x
+        :param x: (array) Input variable x
+        :return: x: (array)
+        """
+        return x
+
+
+class Network:
+    def __init__(self, dimensions, activations):
+        """
+        :param dimensions: (tpl/ list) Dimensions of the neural net. (input, hidden layer, output)
+        :param activations: (tpl/ list) Activations functions.
+
+        Example of one hidden layer with
+        - 2 inputs
+        - 3 hidden nodes
+        - 3 outputs
+
+
+        layers -->    [1,        2,          3]
+        ----------------------------------------
+
+        dimensions =  (2,     3,          3)
+        activations = (      Relu,      Sigmoid)
+        """
+        self.n_layers = len(dimensions)
+        # Weights and biases are initiated by index. For a one hidden layer net you will have a w[1] and w[2]
+        self.w = {}
+        self.b = {}
+
+        # Activations are also initiated by index. For the example we will have activations[2] and activations[3]
+        self.activations = {}
+        for i in range(len(dimensions) - 1):
+            self.w[i + 1] = np.random.randn(dimensions[i], dimensions[i + 1]) / np.sqrt(dimensions[i])
+            self.b[i + 1] = np.zeros(dimensions[i + 1])
+            self.activations[i + 2] = activations[i]
+
+    def feed_forward(self, x):
+        """
+        Execute a forward feed through the network.
+        :param x: (array) Batch of input data vectors.
+        :return: Node outputs and activations per layer. The numbering of the output is equivalent to the layer numbers.
+        """
+
+        # w(x) + b
+        z = {}
+
+        # activations: f(z)
+        a = {1: x}  # First layer has no activations as input. The input x is the input.
+
+        for i in range(1, self.n_layers):
+            # current layer = i
+            # activation layer = i + 1
+            z[i + 1] = np.dot(a[i], self.w[i]) + self.b[i]
+            a[i + 1] = self.activations[i + 1].activation(z[i + 1])
+
+        return z, a
+
+if __name__ == "__main__":
+    from sklearn import datasets
+    import sklearn.metrics
+
+    # Load data
+    data = datasets.load_iris()
+    x = data["data"]
+    x = (x - x.mean()) / x.std()
+    y = np.expand_dims(data["target"], 1)
+
+    # one hot encoding
+    y = np.eye(3)[y]
+
+    nn = Network((4, 2, 2, 1), (Relu, Relu, Sigmoid))
+    nn.feed_forward(x[:1])
\ No newline at end of file
diff --git a/simple_mlp.py b/simple_mlp.py
index dc90e6f..b7e53fd 100644
--- a/simple_mlp.py
+++ b/simple_mlp.py
@@ -1,6 +1,4 @@
 import numpy as np
-from sklearn import datasets
-import sklearn.metrics
 
 
 def new_model(in_dim, h_dim, out_dim):
@@ -81,11 +79,11 @@ def cost_mse(a, y):
     :param y: (array) Ground truth labels
     :return: (flt) Loss
     """
-    return np.mean((a - y)**2)
+    return 0.5 * np.sum((a - y)**2)
 
 
 def diff_cost_mse(a, y):
-    return (a - y)
+    return a - y
 
 
 def bpe_delta(a, y):
@@ -178,11 +176,13 @@ class NeuralNetwork:
 
             _, y = feed_forward(x, self.w, self.b)
 
-            if i % epochs // 20 == 0:
-                print(cost_mse(y[3], labels))
+            if i % epochs // 10 == 0:
+                print("Loss:", cost_mse(y[3], labels))
 
 
 if __name__ == "__main__":
+    from sklearn import datasets
+    import sklearn.metrics
     np.random.seed(1)
 
     # Load data
@@ -204,7 +204,6 @@ if __name__ == "__main__":
     for i in range(len(y)):
         y_pred.append(np.argmax(y_[3][i]))
         y_true.append(np.argmax(y[i]))
-        print(y_pred[-1], y_true[-1])
 
     print(sklearn.metrics.classification_report(y_true, y_pred))