From c40fb5f2aac484070164a077b43dc0df2cc2a601 Mon Sep 17 00:00:00 2001 From: Sylvain Gugger Date: Tue, 14 Apr 2020 16:39:27 -0700 Subject: [PATCH] Clean --- 01_intro.ipynb | 12 - 02_production.ipynb | 12 - 03_ethics.ipynb | 12 - 04_mnist_basics.ipynb | 751 +++++++++++++--------------------- 05_pet_breeds.ipynb | 25 -- 06_multicat.ipynb | 12 - 07_sizing_and_tta.ipynb | 12 - 08_collab.ipynb | 12 - 09_tabular.ipynb | 12 - 10_nlp.ipynb | 12 - 11_midlevel_data.ipynb | 12 - 12_nlp_dive.ipynb | 12 - 15_arch_details.ipynb | 28 +- 16_accel_sgd.ipynb | 12 - 17_foundations.ipynb | 12 - 18_CAM.ipynb | 12 - app_blog.ipynb | 12 - clean/01_intro.ipynb | 12 - clean/02_production.ipynb | 12 - clean/03_ethics.ipynb | 12 - clean/05_pet_breeds.ipynb | 12 - clean/06_multicat.ipynb | 12 - clean/07_sizing_and_tta.ipynb | 12 - clean/08_collab.ipynb | 12 - clean/09_tabular.ipynb | 12 - clean/10_nlp.ipynb | 12 - clean/11_midlevel_data.ipynb | 12 - clean/12_nlp_dive.ipynb | 12 - clean/15_arch_details.ipynb | 28 +- clean/16_accel_sgd.ipynb | 12 - clean/17_foundations.ipynb | 12 - clean/18_CAM.ipynb | 12 - clean/app_blog.ipynb | 12 - 33 files changed, 298 insertions(+), 882 deletions(-) diff --git a/01_intro.ipynb b/01_intro.ipynb index 8be9f17..b995075 100644 --- a/01_intro.ipynb +++ b/01_intro.ipynb @@ -2914,18 +2914,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/02_production.ipynb b/02_production.ipynb index 349ed73..ea6e64c 100644 --- a/02_production.ipynb +++ b/02_production.ipynb @@ -1957,18 +1957,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/03_ethics.ipynb b/03_ethics.ipynb index 59be409..66bc391 100644 --- a/03_ethics.ipynb +++ b/03_ethics.ipynb @@ -1041,18 +1041,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/04_mnist_basics.ipynb b/04_mnist_basics.ipynb index 3e5829b..09b5aae 100644 --- a/04_mnist_basics.ipynb +++ b/04_mnist_basics.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -101,7 +101,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -127,7 +127,7 @@ "(#9) [Path('cleaned.csv'),Path('item_list.txt'),Path('trained_model.pkl'),Path('models'),Path('valid'),Path('labels.csv'),Path('export.pkl'),Path('history.csv'),Path('train')]" ] }, - "execution_count": 4, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -145,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -154,7 +154,7 @@ "(#2) [Path('train/7'),Path('train/3')]" ] }, - "execution_count": 5, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -172,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -181,7 +181,7 @@ "(#6131) [Path('train/3/10.png'),Path('train/3/10000.png'),Path('train/3/10011.png'),Path('train/3/10031.png'),Path('train/3/10034.png'),Path('train/3/10042.png'),Path('train/3/10052.png'),Path('train/3/1007.png'),Path('train/3/10074.png'),Path('train/3/10091.png')...]" ] }, - "execution_count": 6, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -201,7 +201,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -211,7 +211,7 @@ "" ] }, - "execution_count": 7, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -233,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -247,7 +247,7 @@ " [ 0, 3, 20, 20, 15, 0]], dtype=uint8)" ] }, - "execution_count": 8, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -265,7 +265,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -279,7 +279,7 @@ " [ 0, 3, 20, 20, 15, 0]], dtype=torch.uint8)" ] }, - "execution_count": 9, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -297,7 +297,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1334,7 +1334,7 @@ "" ] }, - "execution_count": 10, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1398,7 +1398,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1407,7 +1407,7 @@ "(6131, 6265)" ] }, - "execution_count": 11, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1434,7 +1434,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1467,7 +1467,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1476,7 +1476,7 @@ "torch.Size([6131, 28, 28])" ] }, - "execution_count": 13, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1498,7 +1498,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1507,7 +1507,7 @@ "3" ] }, - "execution_count": 14, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1534,7 +1534,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1543,7 +1543,7 @@ "3" ] }, - "execution_count": 15, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1563,7 +1563,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1595,7 +1595,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1629,7 +1629,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1675,7 +1675,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1684,7 +1684,7 @@ "(tensor(0.1114), tensor(0.2021))" ] }, - "execution_count": 19, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1697,7 +1697,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1706,7 +1706,7 @@ "(tensor(0.1586), tensor(0.3021))" ] }, - "execution_count": 20, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1740,7 +1740,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1749,7 +1749,7 @@ "(tensor(0.1586), tensor(0.3021))" ] }, - "execution_count": 21, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1816,7 +1816,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1827,7 +1827,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1837,7 +1837,7 @@ " [4, 5, 6]])" ] }, - "execution_count": 23, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1848,7 +1848,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1858,7 +1858,7 @@ " [4, 5, 6]])" ] }, - "execution_count": 24, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1878,7 +1878,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1887,7 +1887,7 @@ "tensor([4, 5, 6])" ] }, - "execution_count": 25, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1905,7 +1905,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1914,7 +1914,7 @@ "tensor([2, 5])" ] }, - "execution_count": 26, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1932,7 +1932,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1941,7 +1941,7 @@ "tensor([5, 6])" ] }, - "execution_count": 27, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1959,7 +1959,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1969,7 +1969,7 @@ " [5, 6, 7]])" ] }, - "execution_count": 28, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -1987,7 +1987,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -1996,7 +1996,7 @@ "'torch.LongTensor'" ] }, - "execution_count": 29, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2014,7 +2014,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2024,7 +2024,7 @@ " [6.0000, 7.5000, 9.0000]])" ] }, - "execution_count": 30, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2062,7 +2062,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2071,7 +2071,7 @@ "(torch.Size([1010, 28, 28]), torch.Size([1028, 28, 28]))" ] }, - "execution_count": 31, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2099,7 +2099,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2108,7 +2108,7 @@ "tensor(0.1114)" ] }, - "execution_count": 32, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2131,7 +2131,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2141,7 +2141,7 @@ " torch.Size([1010]))" ] }, - "execution_count": 33, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2166,7 +2166,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2175,7 +2175,7 @@ "tensor([2, 3, 4])" ] }, - "execution_count": 34, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2193,7 +2193,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2202,7 +2202,7 @@ "torch.Size([1010, 28, 28])" ] }, - "execution_count": 35, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2235,7 +2235,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2253,7 +2253,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2262,7 +2262,7 @@ "(tensor(True), tensor(1.))" ] }, - "execution_count": 37, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2280,7 +2280,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2289,7 +2289,7 @@ "tensor([True, True, True, ..., True, True, True])" ] }, - "execution_count": 38, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2307,7 +2307,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2316,7 +2316,7 @@ "(tensor(0.9168), tensor(0.9854), tensor(0.9511))" ] }, - "execution_count": 39, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2384,7 +2384,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": { "hide_input": true }, @@ -2483,7 +2483,7 @@ "" ] }, - "execution_count": 40, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2521,7 +2521,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2537,7 +2537,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2566,7 +2566,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2624,18 +2624,14 @@ }, { "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, + "metadata": {}, "source": [ "### The gradient" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "The one magic step is the bit where we calculate the *gradients*. As we mentioned, we use calculus as a performance optimization; it allows us to more quickly calculate whether our loss will go up or down when we adjust our parameters up or down. In other words, the gradients will tell us how much we have to change each weight to make our model better.\n", "\n", @@ -2646,9 +2642,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "One important thing to be aware of: our function has lots of weights that we need to adjust, so when we calculate the derivative we won't get back one number, but lots of them — a gradient for every weight. But there is nothing mathematically tricky here; you can calculate the derivative with respect to one weight, and treat all the other ones as constant. Then repeat that for each weight. This is how all of the gradients are calculated, for every weight.\n", "\n", @@ -2659,10 +2653,8 @@ }, { "cell_type": "code", - "execution_count": 44, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "xt = tensor(3.).requires_grad_()" @@ -2670,9 +2662,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Notice the special method `requires_grad_`? That's the magical incantation we use to tell PyTorch that we want to calculate gradients with respect to that variable at that value. It is essentially tagging the variable, so PyTorch will remember to keep track of how to compute gradients of the other, direct calculations on it which you will ask for.\n", "\n", @@ -2683,10 +2673,8 @@ }, { "cell_type": "code", - "execution_count": 45, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2694,7 +2682,7 @@ "tensor(9., grad_fn=)" ] }, - "execution_count": 45, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2706,19 +2694,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Finally, we tell PyTorch to calculate the gradients for us:" ] }, { "cell_type": "code", - "execution_count": 46, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "yt.backward()" @@ -2726,28 +2710,22 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "The \"backward\" here refers to \"back propagation\", which is the name given to the process of calculating the derivative of each layer. We'll see how this is done exactly in chapter , when we calculate the gradients of a deep neural net from scratch. This is called the \"backward pass\" of the network, as opposed to the \"forward pass\", which is where the activations are calculated. Life would probably be easier if `backward` was just called `calculate_grad`, but deep learning folks really do like to add jargon everywhere they can!" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "We can now view the gradients by checking the `grad` attribute of our tensor:" ] }, { "cell_type": "code", - "execution_count": 47, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2755,7 +2733,7 @@ "tensor(6.)" ] }, - "execution_count": 47, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2766,9 +2744,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "If you remember your high school calculus rules, the derivative of `x**2` is `2*x`, and we have `x=3`, so the gradient should be `2*3=6`, which is what PyTorch calculated for us!\n", "\n", @@ -2777,10 +2753,8 @@ }, { "cell_type": "code", - "execution_count": 48, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2788,7 +2762,7 @@ "tensor([ 3., 4., 10.], requires_grad=True)" ] }, - "execution_count": 48, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2800,19 +2774,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "...and adding `sum()` to our function so it can take a vector (i.e. a *rank-1 tensor*), and return a scalar (i.e. a *rank-0 tensor*):" ] }, { "cell_type": "code", - "execution_count": 49, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2820,7 +2790,7 @@ "tensor(125., grad_fn=)" ] }, - "execution_count": 49, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2834,19 +2804,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Our gradients are `2*xt`, as we'd expect!" ] }, { "cell_type": "code", - "execution_count": 50, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2854,7 +2820,7 @@ "tensor([ 6., 8., 20.])" ] }, - "execution_count": 50, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2866,27 +2832,21 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "The gradient only tells us the slope of our function, it doesn't actually tell us exactly how far to adjust the parameters. But it gives us some idea of how far; if the slope is very large, then that may suggest that we have more adjustments to do, whereas if the slope is very small, that may suggest that we are close to the optimal value." ] }, { "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, + "metadata": {}, "source": [ "### Stepping with a learning rate" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Deciding how to change our parameters based on the value of the gradients is an important part of the deep learning process. Nearly all approaches start with the basic idea of multiplying the gradient by some small number, called the *learning rate* (LR). The learning rate is often a number between 0.001 and 0.1, although it could be anything. Often, people select a learning rate just by trying a few, and finding which results in the best model after training (we'll show you a better approach later in this book, called the *learning rate finder*). Once you've picked a learning rate, you can adjust your parameters using this simple function:\n", "\n", @@ -2901,72 +2861,56 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "\"An" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Although picking a learning rate that's too high is even worse--it can actually result in the loss getting *worse* as we see in <>!" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "\"An" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "If the learning rate is too high, it may also \"bounce\" around, rather than actually diverging; <> shows how this has the result of taking many steps to train successfully." ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "\"An" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Now let's apply all of this on an end-to-end example." ] }, { "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, + "metadata": {}, "source": [ "### An end-to-end SGD example" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "We've seen how to use gradients to find a minimum. Now it's time to look at an SGD example, and see how finding a minimum can be used to train a model to fit data better.\n", "\n", @@ -2975,10 +2919,8 @@ }, { "cell_type": "code", - "execution_count": 51, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -2986,7 +2928,7 @@ "tensor([ 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.])" ] }, - "execution_count": 51, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2997,10 +2939,8 @@ }, { "cell_type": "code", - "execution_count": 52, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3022,9 +2962,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "We've added a bit of random noise, since measuring things manually isn't precise. This means it's not that easy to answer the question: what was the roller coaster's speed? Using SGD we can try to find a function that matches our observations. We can't consider every possible function, so let's use a guess that it will be quadratic, i.e. a function of the form `a*(time**2)+(b*time)+c`.\n", "\n", @@ -3033,10 +2971,8 @@ }, { "cell_type": "code", - "execution_count": 53, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "def f(t, params):\n", @@ -3046,9 +2982,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "In other words, we've restricted the problem of finding the best imaginable function that fits the data, to finding the best *quadratic* function. This greatly simplifies the problem, since every quadratic function is fully defined by the three parameters `a`, `b`, and `c`. So to find the best quadratic function, we only need to find the best values for `a`, `b`, and `c`.\n", "\n", @@ -3059,10 +2993,8 @@ }, { "cell_type": "code", - "execution_count": 54, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "def mse(preds, targets): return ((preds-targets)**2).mean()" @@ -3070,9 +3002,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Now, let's work through our 7 step process.\n", "\n", @@ -3081,10 +3011,8 @@ }, { "cell_type": "code", - "execution_count": 55, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "params = torch.randn(3).requires_grad_()" @@ -3092,10 +3020,8 @@ }, { "cell_type": "code", - "execution_count": 56, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#hide\n", @@ -3104,19 +3030,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Step 2--Calculate the *predictions*:" ] }, { "cell_type": "code", - "execution_count": 57, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "preds = f(time, params)" @@ -3124,19 +3046,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Let's create a little function to see how close our predictions are to our targets, and take a look:" ] }, { "cell_type": "code", - "execution_count": 58, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "def show_preds(preds, ax=None):\n", @@ -3148,10 +3066,8 @@ }, { "cell_type": "code", - "execution_count": 59, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3172,9 +3088,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "This doesn't look very close--our random parameters suggest that the roller coaster will end up going backwards, since we have negative speeds!\n", "\n", @@ -3183,10 +3097,8 @@ }, { "cell_type": "code", - "execution_count": 60, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3194,7 +3106,7 @@ "tensor(25823.8086, grad_fn=)" ] }, - "execution_count": 60, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3206,9 +3118,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Our goal is now to improve this. To do that, we'll need to know the gradients.\n", "\n", @@ -3217,10 +3127,8 @@ }, { "cell_type": "code", - "execution_count": 61, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3228,7 +3136,7 @@ "tensor([-53195.8594, -3419.7146, -253.8908])" ] }, - "execution_count": 61, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3240,10 +3148,8 @@ }, { "cell_type": "code", - "execution_count": 62, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3251,7 +3157,7 @@ "tensor([-0.5320, -0.0342, -0.0025])" ] }, - "execution_count": 62, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3262,19 +3168,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "We can use these gradients to improve our parameters. We'll need to pick a learning rate (we'll discuss how to do that in practice in the next chapter; for now we'll just pick `1e-5`(0.00001)):" ] }, { "cell_type": "code", - "execution_count": 63, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3282,7 +3184,7 @@ "tensor([-0.7658, -0.7506, 1.3525], requires_grad=True)" ] }, - "execution_count": 63, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3293,9 +3195,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Step 5--*Step* the weights. In other words, update the parameters based on the gradients we just calculated.\n", "\n", @@ -3304,10 +3204,8 @@ }, { "cell_type": "code", - "execution_count": 64, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "lr = 1e-5\n", @@ -3317,19 +3215,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Let's see if the loss has improved:" ] }, { "cell_type": "code", - "execution_count": 65, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3337,7 +3231,7 @@ "tensor(5435.5366, grad_fn=)" ] }, - "execution_count": 65, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3349,19 +3243,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "...and take a look at the plot:" ] }, { "cell_type": "code", - "execution_count": 66, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3382,19 +3272,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "We need to repeat this a few times, so we'll create a function to apply one step:" ] }, { "cell_type": "code", - "execution_count": 67, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "def apply_step(params, prn=True):\n", @@ -3409,9 +3295,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "...now we're ready for step 6!\n", "\n", @@ -3420,10 +3304,8 @@ }, { "cell_type": "code", - "execution_count": 68, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -3448,10 +3330,8 @@ }, { "cell_type": "code", - "execution_count": 69, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "#hide\n", @@ -3460,19 +3340,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Loss is going down, just as we hoped! But looking only at these loss numbers disguises the fact that each iteration represents an entirely different quadratic function being tried, on the way to find the best possible quadratic function. We can see this process visually if, instead of printing out the loss function, we plot the function at every step. Then we can see how the shape is approaching the best possible quadratic function for our data:" ] }, { "cell_type": "code", - "execution_count": 70, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -3495,9 +3371,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Step 7 is to *stop*. We just decided to stop after 10 epochs arbitrarily. In practice, we watch the training and validation losses and our metrics to decide when to stop, as we've discussed." ] @@ -3511,7 +3385,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": null, "metadata": { "hide_input": false }, @@ -3610,7 +3484,7 @@ "" ] }, - "execution_count": 71, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3652,7 +3526,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3668,7 +3542,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3677,7 +3551,7 @@ "(torch.Size([12396, 784]), torch.Size([12396, 1]))" ] }, - "execution_count": 73, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3696,7 +3570,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3705,7 +3579,7 @@ "(torch.Size([784]), tensor([1]))" ] }, - "execution_count": 74, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3718,7 +3592,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3736,7 +3610,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3745,7 +3619,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3761,7 +3635,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3791,7 +3665,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3800,7 +3674,7 @@ "tensor([20.2336], grad_fn=)" ] }, - "execution_count": 79, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3836,7 +3710,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3851,7 +3725,7 @@ " [28.6816]], grad_fn=)" ] }, - "execution_count": 80, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3878,7 +3752,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3893,7 +3767,7 @@ " [False]])" ] }, - "execution_count": 81, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3905,7 +3779,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3914,7 +3788,7 @@ "0.4912068545818329" ] }, - "execution_count": 82, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3932,7 +3806,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3941,7 +3815,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3950,7 +3824,7 @@ "0.4912068545818329" ] }, - "execution_count": 84, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -3992,7 +3866,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4009,7 +3883,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4030,7 +3904,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4039,7 +3913,7 @@ "tensor([0.1000, 0.4000, 0.8000])" ] }, - "execution_count": 87, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4057,7 +3931,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4066,7 +3940,7 @@ "tensor(0.4333)" ] }, - "execution_count": 88, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4084,7 +3958,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4093,7 +3967,7 @@ "tensor(0.2333)" ] }, - "execution_count": 89, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4111,28 +3985,22 @@ }, { "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, + "metadata": {}, "source": [ "### Sigmoid" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "The function called *sigmoid* is defined by:" ] }, { "cell_type": "code", - "execution_count": 90, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "def sigmoid(x): return 1/(1+torch.exp(-x))" @@ -4140,19 +4008,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Pytorch actually already defines this for us, so we don’t really need our own version. This is an important function in deep learning, since we often want to ensure values are between zero and one. This is what it looks like:" ] }, { "cell_type": "code", - "execution_count": 91, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -4173,9 +4037,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "As you can see, it takes any input value, positive or negative, and smooshes it onto an output value between 0 and 1. It's also a smooth curve that only goes up, which makes it easier for SGD to find meaningful gradients. \n", "\n", @@ -4184,10 +4046,8 @@ }, { "cell_type": "code", - "execution_count": 92, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "def mnist_loss(predictions, targets):\n", @@ -4197,9 +4057,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Now we can be confident our loss function will work, even if the predictions are not between 0 and 1. All that is required is that a higher prediction corresponds to more confidence an image is a 3.\n", "\n", @@ -4212,18 +4070,14 @@ }, { "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, + "metadata": {}, "source": [ "### SGD and mini-batches" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Now that we have a loss function which is suitable to drive SGD, we can consider some of the details involved in the next phase of the learning process, which is to *step* (i.e., change or update) the weights based on the gradients. This is called an optimisation step.\n", "\n", @@ -4240,10 +4094,8 @@ }, { "cell_type": "code", - "execution_count": 93, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -4253,7 +4105,7 @@ " tensor([ 1, 13, 0, 6, 11])]" ] }, - "execution_count": 93, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4266,19 +4118,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "For training a model, we don't just want any Python collection, but a collection containing independent and dependent variables (that is, the inputs and targets of the model). A collection that contains tuples of independent and dependent variables is known in PyTorch as a Dataset. Here's an example of an extremely simple Dataset:" ] }, { "cell_type": "code", - "execution_count": 94, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -4286,7 +4134,7 @@ "(#26) [(0, 'a'),(1, 'b'),(2, 'c'),(3, 'd'),(4, 'e'),(5, 'f'),(6, 'g'),(7, 'h'),(8, 'i'),(9, 'j')...]" ] }, - "execution_count": 94, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4298,19 +4146,15 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "When we pass a Dataset to a DataLoader we will get back many batches which are themselves tuples of tensors representing batches of independent and dependent variables:" ] }, { "cell_type": "code", - "execution_count": 95, - "metadata": { - "hidden": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -4322,7 +4166,7 @@ " (tensor([2, 4]), ('c', 'e'))]" ] }, - "execution_count": 95, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4334,9 +4178,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "We are now ready to write our first training loop for a model using SGD!" ] @@ -4372,7 +4214,7 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4389,7 +4231,7 @@ }, { "cell_type": "code", - "execution_count": 142, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4398,7 +4240,7 @@ "(torch.Size([256, 784]), torch.Size([256, 1]))" ] }, - "execution_count": 142, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4418,7 +4260,7 @@ }, { "cell_type": "code", - "execution_count": 143, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4434,7 +4276,7 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4443,7 +4285,7 @@ "torch.Size([4, 784])" ] }, - "execution_count": 146, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4455,7 +4297,7 @@ }, { "cell_type": "code", - "execution_count": 147, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4467,7 +4309,7 @@ " [ -8.1484]], grad_fn=)" ] }, - "execution_count": 147, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4479,7 +4321,7 @@ }, { "cell_type": "code", - "execution_count": 148, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4488,7 +4330,7 @@ "tensor(0.5006, grad_fn=)" ] }, - "execution_count": 148, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4507,7 +4349,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4516,7 +4358,7 @@ "(torch.Size([784, 1]), tensor(-0.0001), tensor([-0.0008]))" ] }, - "execution_count": 149, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4535,7 +4377,7 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4554,7 +4396,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4563,7 +4405,7 @@ "(tensor(-0.0002), tensor([-0.0015]))" ] }, - "execution_count": 151, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4582,7 +4424,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4591,7 +4433,7 @@ "(tensor(-0.0003), tensor([-0.0023]))" ] }, - "execution_count": 152, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4610,7 +4452,7 @@ }, { "cell_type": "code", - "execution_count": 153, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4634,7 +4476,7 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4655,7 +4497,7 @@ }, { "cell_type": "code", - "execution_count": 155, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4667,7 +4509,7 @@ " [False]])" ] }, - "execution_count": 155, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4685,7 +4527,7 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4704,7 +4546,7 @@ }, { "cell_type": "code", - "execution_count": 157, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4713,7 +4555,7 @@ "tensor(0.5000)" ] }, - "execution_count": 157, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4731,7 +4573,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4742,7 +4584,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4751,7 +4593,7 @@ "0.5219" ] }, - "execution_count": 159, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4769,7 +4611,7 @@ }, { "cell_type": "code", - "execution_count": 160, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4778,7 +4620,7 @@ "0.6883" ] }, - "execution_count": 160, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4799,7 +4641,7 @@ }, { "cell_type": "code", - "execution_count": 161, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4841,7 +4683,7 @@ }, { "cell_type": "code", - "execution_count": 162, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4857,7 +4699,7 @@ }, { "cell_type": "code", - "execution_count": 163, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4866,7 +4708,7 @@ "(torch.Size([1, 784]), torch.Size([1]))" ] }, - "execution_count": 163, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4885,7 +4727,7 @@ }, { "cell_type": "code", - "execution_count": 164, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4908,7 +4750,7 @@ }, { "cell_type": "code", - "execution_count": 165, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4924,7 +4766,7 @@ }, { "cell_type": "code", - "execution_count": 166, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4944,7 +4786,7 @@ }, { "cell_type": "code", - "execution_count": 167, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4953,7 +4795,7 @@ "0.4157" ] }, - "execution_count": 167, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -4971,7 +4813,7 @@ }, { "cell_type": "code", - "execution_count": 168, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4990,7 +4832,7 @@ }, { "cell_type": "code", - "execution_count": 169, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -5014,7 +4856,7 @@ }, { "cell_type": "code", - "execution_count": 170, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -5040,7 +4882,7 @@ }, { "cell_type": "code", - "execution_count": 171, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -5056,7 +4898,7 @@ }, { "cell_type": "code", - "execution_count": 172, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -5073,7 +4915,7 @@ }, { "cell_type": "code", - "execution_count": 173, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -5202,7 +5044,7 @@ }, { "cell_type": "code", - "execution_count": 174, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -5224,7 +5066,7 @@ }, { "cell_type": "code", - "execution_count": 175, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -5245,7 +5087,7 @@ }, { "cell_type": "code", - "execution_count": 176, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -5299,7 +5141,7 @@ }, { "cell_type": "code", - "execution_count": 177, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -5323,7 +5165,7 @@ }, { "cell_type": "code", - "execution_count": 178, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -5333,7 +5175,7 @@ }, { "cell_type": "code", - "execution_count": 179, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -5655,7 +5497,7 @@ }, { "cell_type": "code", - "execution_count": 180, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -5684,7 +5526,7 @@ }, { "cell_type": "code", - "execution_count": 181, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -5693,7 +5535,7 @@ "0.982826292514801" ] }, - "execution_count": 181, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -5724,7 +5566,7 @@ }, { "cell_type": "code", - "execution_count": 182, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -5918,31 +5760,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.5" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": false, - "sideBar": true, - "skip_h1_title": true, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false } }, "nbformat": 4, diff --git a/05_pet_breeds.ipynb b/05_pet_breeds.ipynb index 4219d85..737879b 100644 --- a/05_pet_breeds.ipynb +++ b/05_pet_breeds.ipynb @@ -2532,31 +2532,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.5" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": false, - "sideBar": true, - "skip_h1_title": true, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false } }, "nbformat": 4, diff --git a/06_multicat.ipynb b/06_multicat.ipynb index fc1a91c..4eec808 100644 --- a/06_multicat.ipynb +++ b/06_multicat.ipynb @@ -1928,18 +1928,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/07_sizing_and_tta.ipynb b/07_sizing_and_tta.ipynb index f5c4814..dcd0749 100644 --- a/07_sizing_and_tta.ipynb +++ b/07_sizing_and_tta.ipynb @@ -1012,18 +1012,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/08_collab.ipynb b/08_collab.ipynb index 831cedd..d0d6122 100644 --- a/08_collab.ipynb +++ b/08_collab.ipynb @@ -2296,18 +2296,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/09_tabular.ipynb b/09_tabular.ipynb index 136c8f5..e34984b 100644 --- a/09_tabular.ipynb +++ b/09_tabular.ipynb @@ -9737,18 +9737,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/10_nlp.ipynb b/10_nlp.ipynb index e7d51e7..b1f81c8 100644 --- a/10_nlp.ipynb +++ b/10_nlp.ipynb @@ -2251,18 +2251,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/11_midlevel_data.ipynb b/11_midlevel_data.ipynb index 908f4f2..ffba7ce 100644 --- a/11_midlevel_data.ipynb +++ b/11_midlevel_data.ipynb @@ -1264,18 +1264,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/12_nlp_dive.ipynb b/12_nlp_dive.ipynb index 0e1b988..ecd7855 100644 --- a/12_nlp_dive.ipynb +++ b/12_nlp_dive.ipynb @@ -2347,18 +2347,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/15_arch_details.ipynb b/15_arch_details.ipynb index f544694..aa29289 100644 --- a/15_arch_details.ipynb +++ b/15_arch_details.ipynb @@ -247,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -377,7 +377,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -396,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -413,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -431,10 +431,8 @@ }, { "cell_type": "code", - "execution_count": 24, - "metadata": { - "scrolled": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -502,7 +500,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -810,18 +808,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/16_accel_sgd.ipynb b/16_accel_sgd.ipynb index 106316b..6c226ee 100644 --- a/16_accel_sgd.ipynb +++ b/16_accel_sgd.ipynb @@ -1307,18 +1307,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/17_foundations.ipynb b/17_foundations.ipynb index 3b313f9..e6a3f0a 100644 --- a/17_foundations.ipynb +++ b/17_foundations.ipynb @@ -2448,18 +2448,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/18_CAM.ipynb b/18_CAM.ipynb index bf4318f..0f3d897 100644 --- a/18_CAM.ipynb +++ b/18_CAM.ipynb @@ -653,18 +653,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/app_blog.ipynb b/app_blog.ipynb index 343be9b..840eef5 100644 --- a/app_blog.ipynb +++ b/app_blog.ipynb @@ -292,18 +292,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/01_intro.ipynb b/clean/01_intro.ipynb index e6e9d95..6493daa 100644 --- a/clean/01_intro.ipynb +++ b/clean/01_intro.ipynb @@ -1567,18 +1567,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/02_production.ipynb b/clean/02_production.ipynb index 6952b0b..6c950e9 100644 --- a/clean/02_production.ipynb +++ b/clean/02_production.ipynb @@ -1080,18 +1080,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/03_ethics.ipynb b/clean/03_ethics.ipynb index 26971be..8ac88a3 100644 --- a/clean/03_ethics.ipynb +++ b/clean/03_ethics.ipynb @@ -311,18 +311,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/05_pet_breeds.ipynb b/clean/05_pet_breeds.ipynb index 43afa87..4ea4308 100644 --- a/clean/05_pet_breeds.ipynb +++ b/clean/05_pet_breeds.ipynb @@ -1750,18 +1750,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/06_multicat.ipynb b/clean/06_multicat.ipynb index 3f1012c..eaeecd3 100644 --- a/clean/06_multicat.ipynb +++ b/clean/06_multicat.ipynb @@ -1344,18 +1344,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/07_sizing_and_tta.ipynb b/clean/07_sizing_and_tta.ipynb index 553a6e0..8d23a61 100644 --- a/clean/07_sizing_and_tta.ipynb +++ b/clean/07_sizing_and_tta.ipynb @@ -655,18 +655,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/08_collab.ipynb b/clean/08_collab.ipynb index 7ec2fb5..3b311d7 100644 --- a/clean/08_collab.ipynb +++ b/clean/08_collab.ipynb @@ -1703,18 +1703,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/09_tabular.ipynb b/clean/09_tabular.ipynb index 74a1026..7825e04 100644 --- a/clean/09_tabular.ipynb +++ b/clean/09_tabular.ipynb @@ -8340,18 +8340,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/10_nlp.ipynb b/clean/10_nlp.ipynb index 6dfca3e..b5b8067 100644 --- a/clean/10_nlp.ipynb +++ b/clean/10_nlp.ipynb @@ -1548,18 +1548,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/11_midlevel_data.ipynb b/clean/11_midlevel_data.ipynb index 748c04d..d78cb56 100644 --- a/clean/11_midlevel_data.ipynb +++ b/clean/11_midlevel_data.ipynb @@ -843,18 +843,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/12_nlp_dive.ipynb b/clean/12_nlp_dive.ipynb index 9f50802..f047bec 100644 --- a/clean/12_nlp_dive.ipynb +++ b/clean/12_nlp_dive.ipynb @@ -1626,18 +1626,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/15_arch_details.ipynb b/clean/15_arch_details.ipynb index 7fab841..b30d6c4 100644 --- a/clean/15_arch_details.ipynb +++ b/clean/15_arch_details.ipynb @@ -102,7 +102,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -205,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -215,7 +215,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -226,10 +226,8 @@ }, { "cell_type": "code", - "execution_count": 24, - "metadata": { - "scrolled": true - }, + "execution_count": null, + "metadata": {}, "outputs": [ { "data": { @@ -290,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -437,18 +435,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/16_accel_sgd.ipynb b/clean/16_accel_sgd.ipynb index 67eae65..fd88648 100644 --- a/clean/16_accel_sgd.ipynb +++ b/clean/16_accel_sgd.ipynb @@ -754,18 +754,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/17_foundations.ipynb b/clean/17_foundations.ipynb index 4230adc..ac15d43 100644 --- a/clean/17_foundations.ipynb +++ b/clean/17_foundations.ipynb @@ -1602,18 +1602,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/18_CAM.ipynb b/clean/18_CAM.ipynb index 925b961..cf07b2e 100644 --- a/clean/18_CAM.ipynb +++ b/clean/18_CAM.ipynb @@ -470,18 +470,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4, diff --git a/clean/app_blog.ipynb b/clean/app_blog.ipynb index e32898e..2cda237 100644 --- a/clean/app_blog.ipynb +++ b/clean/app_blog.ipynb @@ -76,18 +76,6 @@ "display_name": "Python 3", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" } }, "nbformat": 4,