Q algorithm learns
This commit is contained in:
841
reinforcement_learning/deep_Q_bridge.ipynb
Normal file
841
reinforcement_learning/deep_Q_bridge.ipynb
Normal file
@@ -0,0 +1,841 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pickle\n",
|
||||
"import tensorflow as tf\n",
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"if sys.platform == \"win32\":\n",
|
||||
" sys.path.append(r\"C:\\Users\\vik\\Dropbox\\Code\\Python\\structural_engineering\")\n",
|
||||
"else:\n",
|
||||
" sys.path.append(\"/home/ritchie46/Dropbox/Code/Python/structural_engineering\")\n",
|
||||
"\n",
|
||||
"from anastruct.fem.system import SystemElements\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"%matplotlib inline\n",
|
||||
"import math"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[ 0. 0. 0. 0. 1. 0. 0. 0.] 8\n",
|
||||
"(array([ 0., 2., 0., 0., 1., 0., 0., 0.]), -0.1, False)\n",
|
||||
"[[ 0. 2. 3. 0.]\n",
|
||||
" [ 1. 0. 0. 0.]] \n",
|
||||
"\n",
|
||||
"[[ 0. 2. 3. 0.]\n",
|
||||
" [ 1. 0. 0. 4.]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"class Environment:\n",
|
||||
" def __init__(self, length=3, height=2, optimize='moment'):\n",
|
||||
" self.length = length\n",
|
||||
" self.height = height\n",
|
||||
" self.state = None\n",
|
||||
" self.n = None\n",
|
||||
" self.actions_chosen = None\n",
|
||||
" self.no_action = None\n",
|
||||
" self.action_space = {0, 1, 2, 3, 4, 5, 6, 7}\n",
|
||||
" self.valid_actions = None\n",
|
||||
" self.optimize = optimize\n",
|
||||
" self.result_map = {}\n",
|
||||
" \n",
|
||||
" # actions\n",
|
||||
" right = 0\n",
|
||||
" left = 4\n",
|
||||
" up = 2\n",
|
||||
" down = 6\n",
|
||||
" up_right = 1\n",
|
||||
" up_left = 3\n",
|
||||
" down_right = 7\n",
|
||||
" down_left = 5\n",
|
||||
" \n",
|
||||
" # If the state is a flattened array. This maps to the index displacements.\n",
|
||||
" self.move_map = {right: 1,\n",
|
||||
" left: -1,\n",
|
||||
" up: -length,\n",
|
||||
" down: length,\n",
|
||||
" up_right: -length + 1,\n",
|
||||
" up_left: -length - 1,\n",
|
||||
" down_right: length + 1,\n",
|
||||
" down_left: length -1}\n",
|
||||
" \n",
|
||||
" def reset(self):\n",
|
||||
" self.state = np.zeros((self.height, self.length))\n",
|
||||
" self.n = 1\n",
|
||||
" self.actions_chosen = 0\n",
|
||||
" self.state[-1][0] = self.n\n",
|
||||
" self.det_valid_actions()\n",
|
||||
" \n",
|
||||
" return self.state.ravel()\n",
|
||||
"# # valid action encoding\n",
|
||||
"# a = np.zeros(8)\n",
|
||||
"# a[np.array(self.valid_actions)] = 1\n",
|
||||
" \n",
|
||||
"# return np.concatenate((self.state.ravel(), a))\n",
|
||||
" \n",
|
||||
" def return_action(self, r):\n",
|
||||
" done = False\n",
|
||||
" \n",
|
||||
" # Bridge is build\n",
|
||||
" if self.state[-1][-1] != 0:\n",
|
||||
" r = r + 10 - self.structure()**2 # that is moment to the power 2\n",
|
||||
" done = True\n",
|
||||
" return self.state, r, done\n",
|
||||
"\n",
|
||||
" s = self.state / np.max(self.state)\n",
|
||||
" zero_mask = np.where(s == 0)\n",
|
||||
" s[s < 1] = 0.5\n",
|
||||
" s[zero_mask] = 0\n",
|
||||
" self.det_valid_actions()\n",
|
||||
" \n",
|
||||
" # valid action encoding\n",
|
||||
" a = np.zeros(8)\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" a[np.array(self.valid_actions)] = 1\n",
|
||||
" except IndexError:\n",
|
||||
" done = True\n",
|
||||
" r -= 2\n",
|
||||
" \n",
|
||||
" return self.state.ravel(), r, done\n",
|
||||
" \n",
|
||||
"# return np.concatenate((self.state.ravel(), a)), r, done\n",
|
||||
" \n",
|
||||
" def det_valid_actions(self):\n",
|
||||
" no_action = set()\n",
|
||||
" right = 0\n",
|
||||
" left = 4\n",
|
||||
" top = 2\n",
|
||||
" down = 6\n",
|
||||
" top_right = 1\n",
|
||||
" top_left = 3\n",
|
||||
" down_right = 7\n",
|
||||
" down_left = 5\n",
|
||||
" \n",
|
||||
" # current location\n",
|
||||
" row, col = np.where(self.state == self.n)\n",
|
||||
" \n",
|
||||
" # right:\n",
|
||||
" try:\n",
|
||||
" if self.state[row, col + 1] != 0:\n",
|
||||
" no_action.add(right)\n",
|
||||
" except IndexError:\n",
|
||||
" no_action.add(right)\n",
|
||||
" \n",
|
||||
" if col - 1 < 0:\n",
|
||||
" no_action.add(left)\n",
|
||||
" elif self.state[row, col - 1] != 0:\n",
|
||||
" no_action.add(left)\n",
|
||||
"\n",
|
||||
" if row - 1 < 0:\n",
|
||||
" no_action.add(top)\n",
|
||||
" elif self.state[row - 1, col] != 0:\n",
|
||||
" no_action.add(top)\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" if self.state[row + 1, col] != 0:\n",
|
||||
" no_action.add(down)\n",
|
||||
" except IndexError:\n",
|
||||
" no_action.add(down)\n",
|
||||
" \n",
|
||||
" if col -1 < 0 or row + 1 == self.height:\n",
|
||||
" no_action.add(down_left)\n",
|
||||
" elif self.state[row + 1, col - 1] != 0:\n",
|
||||
" no_action.add(down_left)\n",
|
||||
"\n",
|
||||
" try:\n",
|
||||
" if self.state[row + 1, col + 1] != 0:\n",
|
||||
" no_action.add(down_right)\n",
|
||||
" except IndexError:\n",
|
||||
" no_action.add(down_right)\n",
|
||||
" \n",
|
||||
" if row - 1 < 0 or col - 1 < 0:\n",
|
||||
" no_action.add(top_left)\n",
|
||||
" elif self.state[row -1, col - 1] != 0:\n",
|
||||
" no_action.add(top_left)\n",
|
||||
"\n",
|
||||
" if row - 1 < 0 or col + 1 == self.length:\n",
|
||||
" no_action.add(top_right)\n",
|
||||
" elif self.state[row - 1, col + 1] != 0:\n",
|
||||
" no_action.add(top_right)\n",
|
||||
" \n",
|
||||
" self.no_action = no_action\n",
|
||||
" self.valid_actions = list(self.action_space - no_action)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" def step(self, a):\n",
|
||||
" \"\"\"\n",
|
||||
" :param a: (int) action direction\n",
|
||||
" \n",
|
||||
" → 0\n",
|
||||
" ↗ 1\n",
|
||||
" ↑ 2\n",
|
||||
" ↖ 3\n",
|
||||
" ← 4\n",
|
||||
" ↙ 5\n",
|
||||
" ↓ 6\n",
|
||||
" ↘ 7\n",
|
||||
" \"\"\"\n",
|
||||
" self.actions_chosen += 1\n",
|
||||
"\n",
|
||||
" flat_location_index = np.argwhere(self.state.ravel() == self.n)\n",
|
||||
" \n",
|
||||
" if a in self.no_action:\n",
|
||||
" return self.return_action(-0.2)\n",
|
||||
" \n",
|
||||
" # there is a valid action\n",
|
||||
" self.n += 1\n",
|
||||
" \n",
|
||||
" move = self.move_map[a]\n",
|
||||
" self.state.ravel()[flat_location_index + move] = self.n\n",
|
||||
" \n",
|
||||
" return self.return_action(-0.1)\n",
|
||||
" \n",
|
||||
" def structure(self):\n",
|
||||
" ss = SystemElements()\n",
|
||||
" last_loc = [0, 0]\n",
|
||||
" for i in range(2, self.n + 1):\n",
|
||||
" row, col = np.where(i == self.state)\n",
|
||||
" \n",
|
||||
" y = self.height - 1 - row[0]\n",
|
||||
" x = col[0] \n",
|
||||
"\n",
|
||||
" current_loc = [x, y]\n",
|
||||
" ss.add_element([last_loc, [x, y]])\n",
|
||||
" last_loc = current_loc\n",
|
||||
" \n",
|
||||
" n_nodes = len(ss.node_map)\n",
|
||||
" forces = -5 / (n_nodes - 2)\n",
|
||||
" for i in range(2, n_nodes):\n",
|
||||
" ss.point_load(node_id=i, Fz=forces)\n",
|
||||
" \n",
|
||||
" ss.add_support_hinged(1)\n",
|
||||
" ss.add_support_hinged(len(ss.node_map))\n",
|
||||
" ss.solve()\n",
|
||||
" \n",
|
||||
" f_max = np.max(np.abs(ss.get_element_result_range(self.optimize)))\n",
|
||||
"# if f_max not in self.result_map:\n",
|
||||
"# self.result_map[f_max] = ss\n",
|
||||
" \n",
|
||||
" return f_max\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" \n",
|
||||
"def test_env():\n",
|
||||
" env = Environment(4, 2)\n",
|
||||
" s = env.reset()\n",
|
||||
" print(s, s.size)\n",
|
||||
" print(env.step(1))\n",
|
||||
"\n",
|
||||
" env.step(0)\n",
|
||||
" print(env.state, \"\\n\")\n",
|
||||
" env.step(7)\n",
|
||||
" print(env.state)\n",
|
||||
" env.structure()\n",
|
||||
" \n",
|
||||
"test_env()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 158,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# https://theneuralperspective.com/2016/11/25/reinforcement-learning-rl-policy-gradients-i/\n",
|
||||
"\n",
|
||||
"class Agent:\n",
|
||||
" def __init__(self, data_size, hidden_size, action_space, learning_rate):\n",
|
||||
" \"\"\"\n",
|
||||
" :param data_size: (int) Columns of the data vector.\n",
|
||||
" :param hidden_size: (int) No. of hidden nodes.\n",
|
||||
" :param action_space: (int) No. of outputs.\n",
|
||||
" :param learning_rate: (flt)\n",
|
||||
" \"\"\"\n",
|
||||
" # Step 1: Feed forward\n",
|
||||
" # The argmax is the maximum Q-value.\n",
|
||||
" self.input_s = tf.placeholder(tf.float32, [None, data_size], name=\"input_s\")\n",
|
||||
" self.w1 = tf.get_variable(\"w1\", shape=[data_size, hidden_size[0]], initializer=tf.contrib.layers.xavier_initializer())\n",
|
||||
" self.b1 = tf.get_variable(\"b1\", shape=(hidden_size[0], ), initializer=tf.zeros_initializer())\n",
|
||||
" self.layer_1 = tf.nn.relu(tf.matmul(self.input_s, self.w1) + self.b1)\n",
|
||||
" \n",
|
||||
" self.w2 = tf.get_variable(\"w2\", shape=[hidden_size[0], hidden_size[1]], initializer=tf.contrib.layers.xavier_initializer())\n",
|
||||
" self.b2 = tf.get_variable(\"b2\", shape=(hidden_size[1], ), initializer=tf.zeros_initializer())\n",
|
||||
" self.layer_2 = tf.nn.relu(tf.matmul(self.layer_1, self.w2) + self.b2)\n",
|
||||
" \n",
|
||||
" self.w_out = tf.get_variable(\"w_out\", shape=[hidden_size[1], action_space], initializer=tf.contrib.layers.xavier_initializer())\n",
|
||||
" self.b_out = tf.get_variable(\"b_out\", shape=(action_space, ), initializer=tf.zeros_initializer())\n",
|
||||
" \n",
|
||||
" # argmax(Q(s, a)) \n",
|
||||
" self.predict_Q = tf.matmul(self.layer_2, self.w_out) + self.b_out # actual Q-value\n",
|
||||
" self.p = tf.nn.softmax(self.predict_Q)\n",
|
||||
" self.Q_a = tf.argmax(self.predict_Q, 1)\n",
|
||||
" self.saver = tf.train.Saver()\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" # Step 2: Determine loss / gradients. \n",
|
||||
" # One hot encoded actions\n",
|
||||
" self.executed_actions = tf.placeholder(tf.int32, name=\"executed_actions\")\n",
|
||||
" \n",
|
||||
" self.one_hot = tf.one_hot(self.executed_actions, 8)\n",
|
||||
" self.Q = tf.reduce_sum(tf.multiply(self.predict_Q, self.one_hot), axis=1)\n",
|
||||
" self.next_Q_r = tf.placeholder(tf.float32, name=\"next_Q\")\n",
|
||||
"\n",
|
||||
" # Loss\n",
|
||||
" # mse: ( target - prediction)^2\n",
|
||||
" # r + max(Q(s', a') - Q(s, a) )^2\n",
|
||||
" \n",
|
||||
" self.loss = tf.reduce_sum(tf.square(self.next_Q_r - self.Q)) \n",
|
||||
" optimizer = tf.train.AdamOptimizer(learning_rate)\n",
|
||||
" self.train_count = tf.Variable(0, trainable=False)\n",
|
||||
" self.train = optimizer.minimize(self.loss, self.train_count)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def discounted_reward(r, gamma):\n",
|
||||
" \"\"\"\n",
|
||||
" The reward for a given state. Is the reward for that state + the discounted sum of future rewards.\n",
|
||||
" \n",
|
||||
" :param r: (array) Rewards.\n",
|
||||
" :param gamma: (flt) Discount factor\n",
|
||||
" \"\"\"\n",
|
||||
" return np.cumsum(r * gamma**(np.arange(len(r)))[::-1])[::-1]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"8"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"env = Environment(4, 2)\n",
|
||||
"env.reset().size"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.3054 train_count 15 loss 3898.21\n",
|
||||
"0.4478 train_count 70 loss 16990.0\n",
|
||||
"0.7954 train_count 107 loss 48269.6\n",
|
||||
"0.9927 train_count 130 loss 88762.8\n",
|
||||
"1.2139 train_count 144 loss 129566.0\n",
|
||||
"1.2376 train_count 145 loss 131087.0\n",
|
||||
"1.2771 train_count 148 loss 135273.0\n",
|
||||
"1.2852 train_count 152 loss 153039.0\n",
|
||||
"1.3482 train_count 154 loss 163115.0\n",
|
||||
"1.3247 train_count 158 loss 173414.0\n",
|
||||
"1.4037 train_count 159 loss 180652.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from collections import deque\n",
|
||||
"# %matplotlib inline\n",
|
||||
"# %matplotlib notebook\n",
|
||||
"# # %load_ext autoreload\n",
|
||||
"# # %autoreload 2\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# fig = plt.figure(figsize=(12, 6))\n",
|
||||
"# ax = fig.add_subplot(111)\n",
|
||||
"# fig.show()\n",
|
||||
"# fig.canvas.draw()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"env = Environment(3, 1, \"moment\")\n",
|
||||
"\n",
|
||||
"H = [16, 16] # hidden neurons\n",
|
||||
"D = 3 # input (state of the environment)\n",
|
||||
"learning_rate = 1e-3\n",
|
||||
"gamma = 0.99 # discount factor\n",
|
||||
"epochs = 50000\n",
|
||||
"max_frames = 50\n",
|
||||
"action_space = 8\n",
|
||||
"\n",
|
||||
"contin = 0\n",
|
||||
"\n",
|
||||
"if not contin:\n",
|
||||
" tf.reset_default_graph()\n",
|
||||
" agent = Agent(D, H, action_space, learning_rate)\n",
|
||||
" init = tf.global_variables_initializer()\n",
|
||||
" #with tf.Session() as sess:\n",
|
||||
" sess = tf.Session()\n",
|
||||
" sess.run(init)\n",
|
||||
" buffer = deque()\n",
|
||||
"\n",
|
||||
"scores= []\n",
|
||||
"n_done = 0\n",
|
||||
"\n",
|
||||
"last_ep = 0\n",
|
||||
"\n",
|
||||
"#https://github.com/awjuliani/DeepRL-Agents/blob/master/Q-Network.ipynb\n",
|
||||
"n_updates = 0\n",
|
||||
"for ep in range(epochs):\n",
|
||||
" if (ep + 1) % 300 == 0:\n",
|
||||
" print(np.mean(scores[-500:]), \"train_count\", train_count, \"loss\", loss)\n",
|
||||
" \n",
|
||||
" s = env.reset()\n",
|
||||
" s = [s]\n",
|
||||
" for c in range(max_frames):\n",
|
||||
" \n",
|
||||
" p, Q = sess.run([agent.p, agent.predict_Q], {agent.input_s: s})\n",
|
||||
" \n",
|
||||
" a = np.random.choice(np.arange(8), p=p[0]) # choose an action index\n",
|
||||
" s_new, r, done = env.step(a)\n",
|
||||
" scores.append(r)\n",
|
||||
"\n",
|
||||
" buffer.append([s, a, r, s_new])\n",
|
||||
" \n",
|
||||
" if len(buffer) > 5000:\n",
|
||||
" buffer.pop()\n",
|
||||
" s = [s_new]\n",
|
||||
" \n",
|
||||
" if done:\n",
|
||||
" \n",
|
||||
" if len(buffer) > 2000 and c % 5 == 0:\n",
|
||||
" batch = np.vstack(buffer)\n",
|
||||
" batch = batch[np.random.randint(0, 2000, size=1500)]\n",
|
||||
"\n",
|
||||
" s = np.vstack(batch[:, 0])\n",
|
||||
" s_new = np.vstack(batch[:, 3])\n",
|
||||
" r = batch[:, 2]\n",
|
||||
" a = batch[:, 1] \n",
|
||||
" Q = sess.run(agent.predict_Q, {agent.input_s: s})\n",
|
||||
" Q_new = sess.run(agent.predict_Q, {agent.input_s: s_new})\n",
|
||||
" max_Q_new = np.max(Q_new, 1)\n",
|
||||
"\n",
|
||||
" target_Q = (r + gamma * max_Q_new)\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" train_count, Q_, one_hot, loss, _ = sess.run([agent.train_count, agent.Q, agent.one_hot, agent.loss, agent.train], \n",
|
||||
" feed_dict={agent.input_s: s, \n",
|
||||
" agent.executed_actions: a, \n",
|
||||
" agent.next_Q_r: target_Q})\n",
|
||||
" \n",
|
||||
"# print(one_hot.shape)\n",
|
||||
"# print(one_hot[0, :])\n",
|
||||
"# print(\"predict_Q\", Q[0,:])\n",
|
||||
"# print(Q_.shape)\n",
|
||||
"# print(Q_[0])\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" break\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 140,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([ 0.26007631, -0.34215826, 0.26007631, ..., 0.52565402,\n",
|
||||
" 0.80317271, 0.80317271])"
|
||||
]
|
||||
},
|
||||
"execution_count": 140,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.sum(Q * np.eye(8)[np.array(a, dtype=int)], 1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 138,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 0. , 0. , 0. , ..., 0. ,\n",
|
||||
" -0. , 0.26007631],\n",
|
||||
" [ 0. , 0. , 0. , ..., 0. ,\n",
|
||||
" -0.34215826, 0. ],\n",
|
||||
" [ 0. , 0. , 0. , ..., 0. ,\n",
|
||||
" -0. , 0.26007631],\n",
|
||||
" ..., \n",
|
||||
" [ 0. , 0.52565402, 0. , ..., 0. ,\n",
|
||||
" -0. , 0. ],\n",
|
||||
" [ 0.80317271, 0. , 0. , ..., 0. ,\n",
|
||||
" -0. , 0. ],\n",
|
||||
" [ 0.80317271, 0. , 0. , ..., 0. ,\n",
|
||||
" -0. , 0. ]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 138,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"x = Q * np.eye(8)[np.array(a, dtype=int)]\n",
|
||||
"x"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 136,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"7"
|
||||
]
|
||||
},
|
||||
"execution_count": 136,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"a"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 161,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'/home/ritchie46/Downloads/model_anastruct/model_bridge_4_2_moment.ckpt'"
|
||||
]
|
||||
},
|
||||
"execution_count": 161,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.saver.save(sess, \"/home/ritchie46/Downloads/model_anastruct/model_bridge_4_2_moment.ckpt\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'G:\\\\bridge_builder\\\\model_bridge_4_2_axial\\\\model.ckpt'"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"agent.saver.save(sess, r\"G:\\bridge_builder\\model_bridge_4_2_axial\\model.ckpt\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 124,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
" [[ 1. 0. 0.]]\n",
|
||||
"5\n",
|
||||
"[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n",
|
||||
" 0.0517213 -0.13007079]]\n",
|
||||
"\n",
|
||||
" [[ 1. 0. 0.]]\n",
|
||||
"5\n",
|
||||
"[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n",
|
||||
" 0.0517213 -0.13007079]]\n",
|
||||
"\n",
|
||||
" [[ 1. 0. 0.]]\n",
|
||||
"5\n",
|
||||
"[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n",
|
||||
" 0.0517213 -0.13007079]]\n",
|
||||
"\n",
|
||||
" [[ 1. 0. 0.]]\n",
|
||||
"5\n",
|
||||
"[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n",
|
||||
" 0.0517213 -0.13007079]]\n",
|
||||
"\n",
|
||||
" [[ 1. 0. 0.]]\n",
|
||||
"5\n",
|
||||
"[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n",
|
||||
" 0.0517213 -0.13007079]]\n",
|
||||
"\n",
|
||||
" [[ 1. 0. 0.]]\n",
|
||||
"5\n",
|
||||
"[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n",
|
||||
" 0.0517213 -0.13007079]]\n",
|
||||
"\n",
|
||||
" [[ 1. 0. 0.]]\n",
|
||||
"5\n",
|
||||
"[[-0.10357568 -0.11130837 0.08943851 -0.03797921 -0.03640079 0.09984766\n",
|
||||
" 0.0517213 -0.13007079]]\n",
|
||||
"\r",
|
||||
" -1.4"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"s = env.reset()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\"\"\" \n",
|
||||
" → 0\n",
|
||||
" ↗ 1\n",
|
||||
" ↑ 2\n",
|
||||
" ↖ 3\n",
|
||||
" ← 4\n",
|
||||
" ↙ 5\n",
|
||||
" ↓ 6\n",
|
||||
" ↘ 7\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"total_r = 0\n",
|
||||
"j = 0\n",
|
||||
"for a in [0, 0, 1, 1, 0, 0, 0]:\n",
|
||||
" j += 1\n",
|
||||
"\n",
|
||||
" print(\"\\n\", env.state)\n",
|
||||
" a_dst = sess.run(agent.predict_Q, {agent.input_s: [s]})\n",
|
||||
" a = np.argmax(a_dst)\n",
|
||||
" #a = np.random.choice(np.arange(8), p=a_dst[0])\n",
|
||||
"\n",
|
||||
" s, r, d = env.step(a)\n",
|
||||
" print(a)\n",
|
||||
" print(a_dst)\n",
|
||||
" total_r += r\n",
|
||||
" \n",
|
||||
"# if d == True:\n",
|
||||
"# j = 0\n",
|
||||
"# print(env.state)\n",
|
||||
"# break\n",
|
||||
"# #env.reset()\n",
|
||||
" \n",
|
||||
"print(\"\\r\", total_r, end=\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([ 0.76904085, 0.296608 , 0.9456555 , 0.73741889, 0.83164985,\n",
|
||||
" 0.82735085, 0.58143395, 0.64800572])"
|
||||
]
|
||||
},
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"np.random.random(8)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([ 1., 1., 1., 0., 0., 0., 0., 0.])"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"env = Environment(5, 4)\n",
|
||||
"s = env.reset()\n",
|
||||
"actions = s[-8:]\n",
|
||||
"actions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[ 1. 1. 1. 1. 1. 0. 0. 1.]\n",
|
||||
"[0, 1, 2, 3, 4, 7]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"array([[ 0., 0., 0., 0., 0.],\n",
|
||||
" [ 0., 0., 5., 0., 0.],\n",
|
||||
" [ 0., 4., 3., 0., 0.],\n",
|
||||
" [ 1., 2., 0., 0., 0.]])"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"s = env.step(0)[0]\n",
|
||||
"s = env.step(1)[0]\n",
|
||||
"s = env.step(4)[0]\n",
|
||||
"s = env.step(1)[0]\n",
|
||||
"actions = s[-8:]\n",
|
||||
"print(actions)\n",
|
||||
"print(env.valid_actions)\n",
|
||||
"env.state"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"1.0"
|
||||
]
|
||||
},
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"noise = np.random.random(8)\n",
|
||||
"noise /= noise.sum()\n",
|
||||
"noise.sum()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 149,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{5, 6, 7}"
|
||||
]
|
||||
},
|
||||
"execution_count": 149,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"\"\"\" \n",
|
||||
" → 0\n",
|
||||
" ↗ 1\n",
|
||||
" ↑ 2\n",
|
||||
" ↖ 3\n",
|
||||
" ← 4\n",
|
||||
" ↙ 5\n",
|
||||
" ↓ 6\n",
|
||||
" ↘ 7\n",
|
||||
"\"\"\"\n",
|
||||
"env.no_action"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.3"
|
||||
},
|
||||
"latex_envs": {
|
||||
"bibliofile": "biblio.bib",
|
||||
"cite_by": "apalike",
|
||||
"current_citInitial": 1,
|
||||
"eqLabelWithNumbers": true,
|
||||
"eqNumInitial": 0
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user