diff --git a/reinforcement_learning/deep_Q_bridge.ipynb b/reinforcement_learning/deep_Q_bridge.ipynb new file mode 100644 index 0000000..d4b5ce3 --- /dev/null +++ b/reinforcement_learning/deep_Q_bridge.ipynb @@ -0,0 +1,841 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pickle\n", + "import tensorflow as tf\n", + "import sys\n", + "\n", + "if sys.platform == \"win32\":\n", + " sys.path.append(r\"C:\\Users\\vik\\Dropbox\\Code\\Python\\structural_engineering\")\n", + "else:\n", + " sys.path.append(\"/home/ritchie46/Dropbox/Code/Python/structural_engineering\")\n", + "\n", + "from anastruct.fem.system import SystemElements\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import math" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 0. 0. 0. 0. 1. 0. 0. 0.] 8\n", + "(array([ 0., 2., 0., 0., 1., 0., 0., 0.]), -0.1, False)\n", + "[[ 0. 2. 3. 0.]\n", + " [ 1. 0. 0. 0.]] \n", + "\n", + "[[ 0. 2. 3. 0.]\n", + " [ 1. 0. 0. 4.]]\n" + ] + } + ], + "source": [ + "\n", + "class Environment:\n", + " def __init__(self, length=3, height=2, optimize='moment'):\n", + " self.length = length\n", + " self.height = height\n", + " self.state = None\n", + " self.n = None\n", + " self.actions_chosen = None\n", + " self.no_action = None\n", + " self.action_space = {0, 1, 2, 3, 4, 5, 6, 7}\n", + " self.valid_actions = None\n", + " self.optimize = optimize\n", + " self.result_map = {}\n", + " \n", + " # actions\n", + " right = 0\n", + " left = 4\n", + " up = 2\n", + " down = 6\n", + " up_right = 1\n", + " up_left = 3\n", + " down_right = 7\n", + " down_left = 5\n", + " \n", + " # If the state is a flattened array. This maps to the index displacements.\n", + " self.move_map = {right: 1,\n", + " left: -1,\n", + " up: -length,\n", + " down: length,\n", + " up_right: -length + 1,\n", + " up_left: -length - 1,\n", + " down_right: length + 1,\n", + " down_left: length -1}\n", + " \n", + " def reset(self):\n", + " self.state = np.zeros((self.height, self.length))\n", + " self.n = 1\n", + " self.actions_chosen = 0\n", + " self.state[-1][0] = self.n\n", + " self.det_valid_actions()\n", + " \n", + " return self.state.ravel()\n", + "# # valid action encoding\n", + "# a = np.zeros(8)\n", + "# a[np.array(self.valid_actions)] = 1\n", + " \n", + "# return np.concatenate((self.state.ravel(), a))\n", + " \n", + " def return_action(self, r):\n", + " done = False\n", + " \n", + " # Bridge is build\n", + " if self.state[-1][-1] != 0:\n", + " r = r + 10 - self.structure()**2 # that is moment to the power 2\n", + " done = True\n", + " return self.state, r, done\n", + "\n", + " s = self.state / np.max(self.state)\n", + " zero_mask = np.where(s == 0)\n", + " s[s < 1] = 0.5\n", + " s[zero_mask] = 0\n", + " self.det_valid_actions()\n", + " \n", + " # valid action encoding\n", + " a = np.zeros(8)\n", + " \n", + " try:\n", + " a[np.array(self.valid_actions)] = 1\n", + " except IndexError:\n", + " done = True\n", + " r -= 2\n", + " \n", + " return self.state.ravel(), r, done\n", + " \n", + "# return np.concatenate((self.state.ravel(), a)), r, done\n", + " \n", + " def det_valid_actions(self):\n", + " no_action = set()\n", + " right = 0\n", + " left = 4\n", + " top = 2\n", + " down = 6\n", + " top_right = 1\n", + " top_left = 3\n", + " down_right = 7\n", + " down_left = 5\n", + " \n", + " # current location\n", + " row, col = np.where(self.state == self.n)\n", + " \n", + " # right:\n", + " try:\n", + " if self.state[row, col + 1] != 0:\n", + " no_action.add(right)\n", + " except IndexError:\n", + " no_action.add(right)\n", + " \n", + " if col - 1 < 0:\n", + " no_action.add(left)\n", + " elif self.state[row, col - 1] != 0:\n", + " no_action.add(left)\n", + "\n", + " if row - 1 < 0:\n", + " no_action.add(top)\n", + " elif self.state[row - 1, col] != 0:\n", + " no_action.add(top)\n", + " \n", + " try:\n", + " if self.state[row + 1, col] != 0:\n", + " no_action.add(down)\n", + " except IndexError:\n", + " no_action.add(down)\n", + " \n", + " if col -1 < 0 or row + 1 == self.height:\n", + " no_action.add(down_left)\n", + " elif self.state[row + 1, col - 1] != 0:\n", + " no_action.add(down_left)\n", + "\n", + " try:\n", + " if self.state[row + 1, col + 1] != 0:\n", + " no_action.add(down_right)\n", + " except IndexError:\n", + " no_action.add(down_right)\n", + " \n", + " if row - 1 < 0 or col - 1 < 0:\n", + " no_action.add(top_left)\n", + " elif self.state[row -1, col - 1] != 0:\n", + " no_action.add(top_left)\n", + "\n", + " if row - 1 < 0 or col + 1 == self.length:\n", + " no_action.add(top_right)\n", + " elif self.state[row - 1, col + 1] != 0:\n", + " no_action.add(top_right)\n", + " \n", + " self.no_action = no_action\n", + " self.valid_actions = list(self.action_space - no_action)\n", + " \n", + " \n", + " def step(self, a):\n", + " \"\"\"\n", + " :param a: (int) action direction\n", + " \n", + " → 0\n", + " ↗ 1\n", + " ↑ 2\n", + " ↖ 3\n", + " ← 4\n", + " ↙ 5\n", + " ↓ 6\n", + " ↘ 7\n", + " \"\"\"\n", + " self.actions_chosen += 1\n", + "\n", + " flat_location_index = np.argwhere(self.state.ravel() == self.n)\n", + " \n", + " if a in self.no_action:\n", + " return self.return_action(-0.2)\n", + " \n", + " # there is a valid action\n", + " self.n += 1\n", + " \n", + " move = self.move_map[a]\n", + " self.state.ravel()[flat_location_index + move] = self.n\n", + " \n", + " return self.return_action(-0.1)\n", + " \n", + " def structure(self):\n", + " ss = SystemElements()\n", + " last_loc = [0, 0]\n", + " for i in range(2, self.n + 1):\n", + " row, col = np.where(i == self.state)\n", + " \n", + " y = self.height - 1 - row[0]\n", + " x = col[0] \n", + "\n", + " current_loc = [x, y]\n", + " ss.add_element([last_loc, [x, y]])\n", + " last_loc = current_loc\n", + " \n", + " n_nodes = len(ss.node_map)\n", + " forces = -5 / (n_nodes - 2)\n", + " for i in range(2, n_nodes):\n", + " ss.point_load(node_id=i, Fz=forces)\n", + " \n", + " ss.add_support_hinged(1)\n", + " ss.add_support_hinged(len(ss.node_map))\n", + " ss.solve()\n", + " \n", + " f_max = np.max(np.abs(ss.get_element_result_range(self.optimize)))\n", + "# if f_max not in self.result_map:\n", + "# self.result_map[f_max] = ss\n", + " \n", + " return f_max\n", + "\n", + " \n", + "\n", + " \n", + "def test_env():\n", + " env = Environment(4, 2)\n", + " s = env.reset()\n", + " print(s, s.size)\n", + " print(env.step(1))\n", + "\n", + " env.step(0)\n", + " print(env.state, \"\\n\")\n", + " env.step(7)\n", + " print(env.state)\n", + " env.structure()\n", + " \n", + "test_env()" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# https://theneuralperspective.com/2016/11/25/reinforcement-learning-rl-policy-gradients-i/\n", + "\n", + "class Agent:\n", + " def __init__(self, data_size, hidden_size, action_space, learning_rate):\n", + " \"\"\"\n", + " :param data_size: (int) Columns of the data vector.\n", + " :param hidden_size: (int) No. of hidden nodes.\n", + " :param action_space: (int) No. of outputs.\n", + " :param learning_rate: (flt)\n", + " \"\"\"\n", + " # Step 1: Feed forward\n", + " # The argmax is the maximum Q-value.\n", + " self.input_s = tf.placeholder(tf.float32, [None, data_size], name=\"input_s\")\n", + " self.w1 = tf.get_variable(\"w1\", shape=[data_size, hidden_size[0]], initializer=tf.contrib.layers.xavier_initializer())\n", + " self.b1 = tf.get_variable(\"b1\", shape=(hidden_size[0], ), initializer=tf.zeros_initializer())\n", + " self.layer_1 = tf.nn.relu(tf.matmul(self.input_s, self.w1) + self.b1)\n", + " \n", + " self.w2 = tf.get_variable(\"w2\", shape=[hidden_size[0], hidden_size[1]], initializer=tf.contrib.layers.xavier_initializer())\n", + " self.b2 = tf.get_variable(\"b2\", shape=(hidden_size[1], ), initializer=tf.zeros_initializer())\n", + " self.layer_2 = tf.nn.relu(tf.matmul(self.layer_1, self.w2) + self.b2)\n", + " \n", + " self.w_out = tf.get_variable(\"w_out\", shape=[hidden_size[1], action_space], initializer=tf.contrib.layers.xavier_initializer())\n", + " self.b_out = tf.get_variable(\"b_out\", shape=(action_space, ), initializer=tf.zeros_initializer())\n", + " \n", + " # argmax(Q(s, a)) \n", + " self.predict_Q = tf.matmul(self.layer_2, self.w_out) + self.b_out # actual Q-value\n", + " self.p = tf.nn.softmax(self.predict_Q)\n", + " self.Q_a = tf.argmax(self.predict_Q, 1)\n", + " self.saver = tf.train.Saver()\n", + "\n", + " \n", + " # Step 2: Determine loss / gradients. \n", + " # One hot encoded actions\n", + " self.executed_actions = tf.placeholder(tf.int32, name=\"executed_actions\")\n", + " \n", + " self.one_hot = tf.one_hot(self.executed_actions, 8)\n", + " self.Q = tf.reduce_sum(tf.multiply(self.predict_Q, self.one_hot), axis=1)\n", + " self.next_Q_r = tf.placeholder(tf.float32, name=\"next_Q\")\n", + "\n", + " # Loss\n", + " # mse: ( target - prediction)^2\n", + " # r + max(Q(s', a') - Q(s, a) )^2\n", + " \n", + " self.loss = tf.reduce_sum(tf.square(self.next_Q_r - self.Q)) \n", + " optimizer = tf.train.AdamOptimizer(learning_rate)\n", + " self.train_count = tf.Variable(0, trainable=False)\n", + " self.train = optimizer.minimize(self.loss, self.train_count)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def discounted_reward(r, gamma):\n", + " \"\"\"\n", + " The reward for a given state. Is the reward for that state + the discounted sum of future rewards.\n", + " \n", + " :param r: (array) Rewards.\n", + " :param gamma: (flt) Discount factor\n", + " \"\"\"\n", + " return np.cumsum(r * gamma**(np.arange(len(r)))[::-1])[::-1]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "env = Environment(4, 2)\n", + "env.reset().size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.3054 train_count 15 loss 3898.21\n", + "0.4478 train_count 70 loss 16990.0\n", + "0.7954 train_count 107 loss 48269.6\n", + "0.9927 train_count 130 loss 88762.8\n", + "1.2139 train_count 144 loss 129566.0\n", + "1.2376 train_count 145 loss 131087.0\n", + "1.2771 train_count 148 loss 135273.0\n", + "1.2852 train_count 152 loss 153039.0\n", + "1.3482 train_count 154 loss 163115.0\n", + "1.3247 train_count 158 loss 173414.0\n", + "1.4037 train_count 159 loss 180652.0\n" + ] + } + ], + "source": [ + "from collections import deque\n", + "# %matplotlib inline\n", + "# %matplotlib notebook\n", + "# # %load_ext autoreload\n", + "# # %autoreload 2\n", + "\n", + "\n", + "# fig = plt.figure(figsize=(12, 6))\n", + "# ax = fig.add_subplot(111)\n", + "# fig.show()\n", + "# fig.canvas.draw()\n", + "\n", + "\n", + "env = Environment(3, 1, \"moment\")\n", + "\n", + "H = [16, 16] # hidden neurons\n", + "D = 3 # input (state of the environment)\n", + "learning_rate = 1e-3\n", + "gamma = 0.99 # discount factor\n", + "epochs = 50000\n", + "max_frames = 50\n", + "action_space = 8\n", + "\n", + "contin = 0\n", + "\n", + "if not contin:\n", + " tf.reset_default_graph()\n", + " agent = Agent(D, H, action_space, learning_rate)\n", + " init = tf.global_variables_initializer()\n", + " #with tf.Session() as sess:\n", + " sess = tf.Session()\n", + " sess.run(init)\n", + " buffer = deque()\n", + "\n", + "scores= []\n", + "n_done = 0\n", + "\n", + "last_ep = 0\n", + "\n", + "#https://github.com/awjuliani/DeepRL-Agents/blob/master/Q-Network.ipynb\n", + "n_updates = 0\n", + "for ep in range(epochs):\n", + " if (ep + 1) % 300 == 0:\n", + " print(np.mean(scores[-500:]), \"train_count\", train_count, \"loss\", loss)\n", + " \n", + " s = env.reset()\n", + " s = [s]\n", + " for c in range(max_frames):\n", + " \n", + " p, Q = sess.run([agent.p, agent.predict_Q], {agent.input_s: s})\n", + " \n", + " a = np.random.choice(np.arange(8), p=p[0]) # choose an action index\n", + " s_new, r, done = env.step(a)\n", + " scores.append(r)\n", + "\n", + " buffer.append([s, a, r, s_new])\n", + " \n", + " if len(buffer) > 5000:\n", + " buffer.pop()\n", + " s = [s_new]\n", + " \n", + " if done:\n", + " \n", + " if len(buffer) > 2000 and c % 5 == 0:\n", + " batch = np.vstack(buffer)\n", + " batch = batch[np.random.randint(0, 2000, size=1500)]\n", + "\n", + " s = np.vstack(batch[:, 0])\n", + " s_new = np.vstack(batch[:, 3])\n", + " r = batch[:, 2]\n", + " a = batch[:, 1] \n", + " Q = sess.run(agent.predict_Q, {agent.input_s: s})\n", + " Q_new = sess.run(agent.predict_Q, {agent.input_s: s_new})\n", + " max_Q_new = np.max(Q_new, 1)\n", + "\n", + " target_Q = (r + gamma * max_Q_new)\n", + " \n", + " \n", + " train_count, Q_, one_hot, loss, _ = sess.run([agent.train_count, agent.Q, agent.one_hot, agent.loss, agent.train], \n", + " feed_dict={agent.input_s: s, \n", + " agent.executed_actions: a, \n", + " agent.next_Q_r: target_Q})\n", + " \n", + "# print(one_hot.shape)\n", + "# print(one_hot[0, :])\n", + "# print(\"predict_Q\", Q[0,:])\n", + "# print(Q_.shape)\n", + "# print(Q_[0])\n", + " \n", + "\n", + " break\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.26007631, -0.34215826, 0.26007631, ..., 0.52565402,\n", + " 0.80317271, 0.80317271])" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.sum(Q * np.eye(8)[np.array(a, dtype=int)], 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0. , 0. , 0. , ..., 0. ,\n", + " -0. , 0.26007631],\n", + " [ 0. , 0. , 0. , ..., 0. ,\n", + " -0.34215826, 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. ,\n", + " -0. , 0.26007631],\n", + " ..., \n", + " [ 0. , 0.52565402, 0. , ..., 0. ,\n", + " -0. , 0. ],\n", + " [ 0.80317271, 0. , 0. , ..., 0. ,\n", + " -0. , 0. ],\n", + " [ 0.80317271, 0. , 0. , ..., 0. ,\n", + " -0. , 0. ]])" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = Q * np.eye(8)[np.array(a, dtype=int)]\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "7" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/ritchie46/Downloads/model_anastruct/model_bridge_4_2_moment.ckpt'" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.saver.save(sess, \"/home/ritchie46/Downloads/model_anastruct/model_bridge_4_2_moment.ckpt\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'G:\\\\bridge_builder\\\\model_bridge_4_2_axial\\\\model.ckpt'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent.saver.save(sess, r\"G:\\bridge_builder\\model_bridge_4_2_axial\\model.ckpt\")" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " [[ 1. 0. 0.]]\n", + "5\n", + "[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n", + " 0.0517213 -0.13007079]]\n", + "\n", + " [[ 1. 0. 0.]]\n", + "5\n", + "[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n", + " 0.0517213 -0.13007079]]\n", + "\n", + " [[ 1. 0. 0.]]\n", + "5\n", + "[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n", + " 0.0517213 -0.13007079]]\n", + "\n", + " [[ 1. 0. 0.]]\n", + "5\n", + "[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n", + " 0.0517213 -0.13007079]]\n", + "\n", + " [[ 1. 0. 0.]]\n", + "5\n", + "[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n", + " 0.0517213 -0.13007079]]\n", + "\n", + " [[ 1. 0. 0.]]\n", + "5\n", + "[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n", + " 0.0517213 -0.13007079]]\n", + "\n", + " [[ 1. 0. 0.]]\n", + "5\n", + "[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n", + " 0.0517213 -0.13007079]]\n", + "\r", + " -1.4" + ] + } + ], + "source": [ + "s = env.reset()\n", + "\n", + "\n", + "\"\"\" \n", + " → 0\n", + " ↗ 1\n", + " ↑ 2\n", + " ↖ 3\n", + " ← 4\n", + " ↙ 5\n", + " ↓ 6\n", + " ↘ 7\n", + "\"\"\"\n", + "\n", + "total_r = 0\n", + "j = 0\n", + "for a in [0, 0, 1, 1, 0, 0, 0]:\n", + " j += 1\n", + "\n", + " print(\"\\n\", env.state)\n", + " a_dst = sess.run(agent.predict_Q, {agent.input_s: [s]})\n", + " a = np.argmax(a_dst)\n", + " #a = np.random.choice(np.arange(8), p=a_dst[0])\n", + "\n", + " s, r, d = env.step(a)\n", + " print(a)\n", + " print(a_dst)\n", + " total_r += r\n", + " \n", + "# if d == True:\n", + "# j = 0\n", + "# print(env.state)\n", + "# break\n", + "# #env.reset()\n", + " \n", + "print(\"\\r\", total_r, end=\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0.76904085, 0.296608 , 0.9456555 , 0.73741889, 0.83164985,\n", + " 0.82735085, 0.58143395, 0.64800572])" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.random(8)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1., 1., 1., 0., 0., 0., 0., 0.])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "env = Environment(5, 4)\n", + "s = env.reset()\n", + "actions = s[-8:]\n", + "actions" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1. 1. 1. 1. 1. 0. 0. 1.]\n", + "[0, 1, 2, 3, 4, 7]\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 0., 0., 0., 0., 0.],\n", + " [ 0., 0., 5., 0., 0.],\n", + " [ 0., 4., 3., 0., 0.],\n", + " [ 1., 2., 0., 0., 0.]])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = env.step(0)[0]\n", + "s = env.step(1)[0]\n", + "s = env.step(4)[0]\n", + "s = env.step(1)[0]\n", + "actions = s[-8:]\n", + "print(actions)\n", + "print(env.valid_actions)\n", + "env.state" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "noise = np.random.random(8)\n", + "noise /= noise.sum()\n", + "noise.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{5, 6, 7}" + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\"\"\" \n", + " → 0\n", + " ↗ 1\n", + " ↑ 2\n", + " ↖ 3\n", + " ← 4\n", + " ↙ 5\n", + " ↓ 6\n", + " ↘ 7\n", + "\"\"\"\n", + "env.no_action" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + }, + "latex_envs": { + "bibliofile": "biblio.bib", + "cite_by": "apalike", + "current_citInitial": 1, + "eqLabelWithNumbers": true, + "eqNumInitial": 0 + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}