import numpy as np def new_model(in_dim, h_dim, out_dim): w = { 1: np.random.randn(in_dim, h_dim) / np.sqrt(in_dim), 2: np.random.randn(h_dim, out_dim) / np.sqrt(h_dim) } b = { 1: np.zeros(h_dim), 2: np.zeros(out_dim) } return w, b def relu(a): """ Rectifier unit :param a: (array) activation vector. :return: (array) Relu activation. """ a[a < 0] = 0 return a def sigmoid(a): """ Sigmoid activation function. :param a: (array) activation vector. :return: (array) Sigmoid activation. """ return 1 / (1 + np.exp(-a)) def diff_sigmoid(a): """ Derivative of the sigmoid function. :param a: (array) activation vector. :return: (array) """ return sigmoid(a) * (1 - sigmoid(a)) def diff_relu(a): """ Derivative of the relu function. :param a: (array) activation vector. :return: (array) """ a[a < 0] = 0 a[a > 0] = 1 return a def feed_forward(p, w, b): """ Feed forward propagation. :param p: (array) Parameters. :param w: (dict) Weights. :param b: (dict) Biases :return: (array) Output. """ a = {} z = {} z[2] = np.dot(p, w[1]) + b[1] a[2] = relu(z[2]) z[3] = sigmoid(np.dot(a[2], w[2]) + b[2]) return a, z def cost_mse(a, y): """ Cost function. :param a: (array) Predictions :param y: (array) Ground truth labels :return: (flt) Loss """ return np.mean((a - y)**2) def diff_cost_mse(a, y): return a - y def bpe_delta(a, y): """ Back propagating error delta :param a: (array) Predictions :param y: (array) Ground truth labels :return: (array) """ return diff_cost_mse(a, y) * diff_sigmoid(a) class NeuralNetwork: def __init__(self, in_dim, h_dim, out_dim, learning_rate=1e-4): """ Simple one hidden layer net with relu activation in the hidden layer and sigmoid activation at the output layer. :param in_dim: (int) Size of the input vector. :param h_dim: (int) No. of hidden nodes. :param out_dim: (int) No. of output nodes. :param learning_rate: (flt) """ self.w, self.b = new_model(in_dim, h_dim, out_dim) self.x = None self.a = None # activations self.z = None # xi * wi + bi self.learning_rate = learning_rate def feed_forward(self, p): """ Compute the activations and z's. z = w(x) + b :param p: (array) """ self.x = p self.a, self.z = feed_forward(p, self.w, self.b) def backprop(self, labels): """ Backpropagate the error and update the weights and biases :param labels: (array) Ground truth vector. """ # partial derivative with respect to layer 2 delta3 = bpe_delta(self.z[3], labels) # dc_db2 = delta3 dc_dw2 = np.dot(self.a[2].T, delta3) # partial derivative with respect to layer 1 delta2 = np.dot(delta3, self.w[2].T) * diff_relu(self.z[2]) # dc_db1 = delta2 dc_dw1 = np.dot(self.x.T, delta2) # update weights and biases self.w[2] -= self.learning_rate * dc_dw2 self.b[2] -= self.learning_rate * np.mean(delta3, 0) self.w[1] -= self.learning_rate * dc_dw1 self.b[1] -= self.learning_rate * np.mean(delta2, 0) def stats(self): """ Prints some weights and biases """ for i in range(1, 3): print("Weights layer {}:\n".format(i), self.w[i], "\nBiases layer {}:\n".format(i), self.b[i], "\n") def fit(self, x, labels, batch_size, epochs): """ Train the net. :param x: (array) Input vector. :param labels: (array) Ground truth vector. :param batch_size: (int) Size of mini batch :param epochs: (int) No. of epochs to train. """ for i in range(epochs): # Shuffle the data seed = np.arange(x.shape[0]) np.random.shuffle(seed) x_ = x[seed] labels_ = labels[seed] for j in range(x.shape[0] // batch_size): self.feed_forward(x_[j * batch_size: (j + 1) * batch_size]) self.backprop(labels_[j * batch_size: (j + 1) * batch_size]) _, y = feed_forward(x, self.w, self.b) if i % 100: print("Loss:", cost_mse(y[3], labels)) if __name__ == "__main__": from sklearn import datasets import sklearn.metrics np.random.seed(1) # Load data data = datasets.load_iris() x = data["data"] x = (x - x.mean()) / x.std() y = data["target"] # one hot encoding y = np.eye(3)[y] nn = NeuralNetwork(4, 4, 3, 1e-2) #nn.fit(x[:2], y[:2], 2, 1) nn.fit(x, y, 8, 1000) _, y_ = feed_forward(x, nn.w, nn.b) print(y_[3]) # # result # _, y_ = feed_forward(x, nn.w, nn.b) y_true = [] y_pred = [] for i in range(len(y)): y_pred.append(np.argmax(y_[3][i])) y_true.append(np.argmax(y[i])) print(sklearn.metrics.classification_report(y_true, y_pred))