clean

2020-11-29 10:40:59 -08:00 · 2020-11-29 10:40:59 -08:00 · cf9fae191c
commit cf9fae191c
parent 929e5f075b
14 changed files with 4846 additions and 4651 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
+models/
 tmp/
 *.bak
 *.pkl
--- a/09_tabular.ipynb
+++ b/09_tabular.ipynb
--- a/clean/01_intro.ipynb
+++ b/clean/01_intro.ipynb
@ -162,7 +162,7 @@
   "source": [
    "#hide\n",
    "# For the book, we can't actually click an upload button, so we fake it\n",
-    "# uploader = SimpleNamespace(data = ['images/chapter1_cat_example.jpg'])"
+    "uploader = SimpleNamespace(data = ['images/chapter1_cat_example.jpg'])"
   ]
  },
  {
@ -479,7 +479,9 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "It can be hard to know in pages and pages of prose what the key things are that you really need to focus on and remember. So, we've prepared a list of questions and suggested steps to complete at the end of each chapter. All the answers are in the text of the chapter, so if you're not sure about anything here, reread that part of the text and make sure you understand it. Answers to all these questions are also available on the [book's website](https://book.fast.ai). You can also visit [the forums](https://forums.fast.ai) if you get stuck to get help from other folks studying this material."
+    "It can be hard to know in pages and pages of prose what the key things are that you really need to focus on and remember. So, we've prepared a list of questions and suggested steps to complete at the end of each chapter. All the answers are in the text of the chapter, so if you're not sure about anything here, reread that part of the text and make sure you understand it. Answers to all these questions are also available on the [book's website](https://book.fast.ai). You can also visit [the forums](https://forums.fast.ai) if you get stuck to get help from other folks studying this material.\n",
+    "\n",
+    "For more questions, including detailed answers and links to the video timeline, have a look at Radek Osmulski's [aiquizzes](http://aiquizzes.com/howto)."
   ]
  },
  {
--- a/clean/02_production.ipynb
+++ b/clean/02_production.ipynb
@ -7,7 +7,7 @@
   "outputs": [],
   "source": [
    "#hide\n",
-    "!pip install -Uqq fastbook\n",
+    "# !pip install -Uqq fastbook\n",
    "import fastbook\n",
    "fastbook.setup_book()"
   ]
@ -196,7 +196,7 @@
    "        dest = (path/o)\n",
    "        dest.mkdir(exist_ok=True)\n",
    "        results = search_images_bing(key, f'{o} bear')\n",
-    "        download_images(dest, urls=results.attrgot('content_url'))"
+    "        download_images(dest, urls=results.attrgot('contentUrl'))"
   ]
  },
  {
@ -592,7 +592,7 @@
   "source": [
    "#hide\n",
    "# !pip install voila\n",
-    "# !jupyter serverextension enable voila —sys-prefix"
+    "# !jupyter serverextension enable --sys-prefix voila "
   ]
  },
  {
@ -700,4 +700,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 4
-}
+}
--- a/clean/04_mnist_basics.ipynb
+++ b/clean/04_mnist_basics.ipynb
@ -620,7 +620,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "def mse(preds, targets): return ((preds-targets)**2).mean()"
+    "def mse(preds, targets): return ((preds-targets)**2).mean().sqrt()"
   ]
  },
  {
--- a/clean/05_pet_breeds.ipynb
+++ b/clean/05_pet_breeds.ipynb
@ -625,7 +625,7 @@
    "1. Why do we first resize to a large size on the CPU, and then to a smaller size on the GPU?\n",
    "1. If you are not familiar with regular expressions, find a regular expression tutorial, and some problem sets, and complete them. Have a look on the book's website for suggestions.\n",
    "1. What are the two ways in which data is most commonly provided, for most deep learning datasets?\n",
-    "1. Look up the documentation for `L` and try using a few of the new methods is that it adds.\n",
+    "1. Look up the documentation for `L` and try using a few of the new methods that it adds.\n",
    "1. Look up the documentation for the Python `pathlib` module and try using a few methods of the `Path` class.\n",
    "1. Give two examples of ways that image transformations can degrade the quality of the data.\n",
    "1. What method does fastai provide to view the data in a `DataLoaders`?\n",
--- a/clean/07_sizing_and_tta.ipynb
+++ b/clean/07_sizing_and_tta.ipynb
@ -66,7 +66,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "model = xresnet50()\n",
+    "model = xresnet50(n_out=dls.c)\n",
    "learn = Learner(dls, model, loss_func=CrossEntropyLossFlat(), metrics=accuracy)\n",
    "learn.fit_one_cycle(5, 3e-3)"
   ]
@ -129,7 +129,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "model = xresnet50()\n",
+    "model = xresnet50(n_out=dls.c)\n",
    "learn = Learner(dls, model, loss_func=CrossEntropyLossFlat(), metrics=accuracy)\n",
    "learn.fit_one_cycle(5, 3e-3)"
   ]
@ -148,7 +148,7 @@
   "outputs": [],
   "source": [
    "dls = get_dls(128, 128)\n",
-    "learn = Learner(dls, xresnet50(), loss_func=CrossEntropyLossFlat(), \n",
+    "learn = Learner(dls, xresnet50(n_out=dls.c), loss_func=CrossEntropyLossFlat(), \n",
    "                metrics=accuracy)\n",
    "learn.fit_one_cycle(4, 3e-3)"
   ]
--- a/clean/08_collab.ipynb
+++ b/clean/08_collab.ipynb
@ -503,7 +503,6 @@
    "movie_w = learn.model.movie_factors[top_idxs].cpu().detach()\n",
    "movie_pca = movie_w.pca(3)\n",
    "fac0,fac1,fac2 = movie_pca.t()\n",
-    "idxs = np.random.choice(len(top_movies), 50, replace=False)\n",
    "idxs = list(range(50))\n",
    "X = fac0[idxs]\n",
    "Y = fac2[idxs]\n",
--- a/clean/09_tabular.ipynb
+++ b/clean/09_tabular.ipynb
@ -7,7 +7,7 @@
   "outputs": [],
   "source": [
    "#hide\n",
-    "!pip install -Uqq fastbook\n",
+    "!pip install -Uqq fastbook kaggle waterfallcharts treeinterpreter dtreeviz\n",
    "import fastbook\n",
    "fastbook.setup_book()"
   ]
@ -85,7 +85,7 @@
    "cred_path = Path('~/.kaggle/kaggle.json').expanduser()\n",
    "if not cred_path.exists():\n",
    "    cred_path.parent.mkdir(exist_ok=True)\n",
-    "    cred_path.write(creds)\n",
+    "    cred_path.write_text(creds)\n",
    "    cred_path.chmod(0o600)"
   ]
  },
@ -116,7 +116,7 @@
   "outputs": [],
   "source": [
    "if not path.exists():\n",
-    "    path.mkdir()\n",
+    "    path.mkdir(parents=true)\n",
    "    api.competition_download_cli('bluebook-for-bulldozers', path=path)\n",
    "    file_extract(path/'bluebook-for-bulldozers.zip')\n",
    "\n",
@ -344,7 +344,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "(path/'to.pkl').save(to)"
+    "save_pickle(path/'to.pkl',to)"
   ]
  },
  {
@ -361,7 +361,7 @@
   "outputs": [],
   "source": [
    "#hide\n",
-    "to = (path/'to.pkl').load()"
+    "to = load_pickle(path/'to.pkl')"
   ]
  },
  {
@ -390,7 +390,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "draw_tree(m, xs, size=7, leaves_parallel=True, precision=2)"
+    "draw_tree(m, xs, size=10, leaves_parallel=True, precision=2)"
   ]
  },
  {
@ -829,8 +829,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "(path/'xs_final.pkl').save(xs_final)\n",
-    "(path/'valid_xs_final.pkl').save(valid_xs_final)"
+    "save_pickle(path/'xs_final.pkl', xs_final)\n",
+    "save_pickle(path/'valid_xs_final.pkl', valid_xs_final)"
   ]
  },
  {
@ -839,8 +839,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "xs_final = (path/'xs_final.pkl').load()\n",
-    "valid_xs_final = (path/'valid_xs_final.pkl').load()"
+    "xs_final = load_pickle(path/'xs_final.pkl')\n",
+    "valid_xs_final = load_pickle(path/'valid_xs_final.pkl')"
   ]
  },
  {
@ -1157,6 +1157,15 @@
    "cat_nn.remove('saleElapsed')"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_nn['saleElapsed'] = df_nn['saleElapsed'].astype(int)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -1175,7 +1184,7 @@
    "xs_filt2 = xs_filt.drop('fiModelDescriptor', axis=1)\n",
    "valid_xs_time2 = valid_xs_time.drop('fiModelDescriptor', axis=1)\n",
    "m2 = rf(xs_filt2, y_filt)\n",
-    "m_rmse(m, xs_filt2, y_filt), m_rmse(m2, valid_xs_time2, valid_y)"
+    "m_rmse(m2, xs_filt2, y_filt), m_rmse(m2, valid_xs_time2, valid_y)"
   ]
  },
  {
@ -1217,15 +1226,6 @@
    "y.min(),y.max()"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from fastai.tabular.all import *"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -1393,7 +1393,7 @@
   "metadata": {},
   "source": [
    "1. Pick a competition on Kaggle with tabular data (current or past) and try to adapt the techniques seen in this chapter to get the best possible results. Compare your results to the private leaderboard.\n",
-    "1. Implement the decision tree algorithm in this chapter from scratch yourself, and try it on the datase you used in the first exercise.\n",
+    "1. Implement the decision tree algorithm in this chapter from scratch yourself, and try it on the dataset you used in the first exercise.\n",
    "1. Use the embeddings from the neural net in this chapter in a random forest, and see if you can improve on the random forest results we saw.\n",
    "1. Explain what each line of the source of `TabularModel` does (with the exception of the `BatchNorm1d` and `Dropout` layers)."
   ]
--- a/clean/12_nlp_dive.ipynb
+++ b/clean/12_nlp_dive.ipynb
@ -491,14 +491,14 @@
    "\n",
    "    def forward(self, input, state):\n",
    "        h,c = state\n",
-    "        h = torch.stack([h, input], dim=1)\n",
+    "        h = torch.cat([h, input], dim=1)\n",
    "        forget = torch.sigmoid(self.forget_gate(h))\n",
    "        c = c * forget\n",
    "        inp = torch.sigmoid(self.input_gate(h))\n",
    "        cell = torch.tanh(self.cell_gate(h))\n",
    "        c = c + inp * cell\n",
    "        out = torch.sigmoid(self.output_gate(h))\n",
-    "        h = outgate * torch.tanh(c)\n",
+    "        h = out * torch.tanh(c)\n",
    "        return h, (h,c)"
   ]
  },
--- a/clean/16_accel_sgd.ipynb
+++ b/clean/16_accel_sgd.ipynb
@ -257,7 +257,7 @@
   "source": [
    "def average_sqr_grad(p, sqr_mom, sqr_avg=None, **kwargs):\n",
    "    if sqr_avg is None: sqr_avg = torch.zeros_like(p.grad.data)\n",
-    "    return {'sqr_avg': sqr_avg*sqr_mom + p.grad.data**2}"
+    "    return {'sqr_avg': sqr_mom*sqr_avg + (1-sqr_mom)*p.grad.data**2}"
   ]
  },
  {
--- a/clean/17_foundations.ipynb
+++ b/clean/17_foundations.ipynb
@ -938,7 +938,7 @@
    "    \n",
    "    def bwd(self, out, inp):\n",
    "        inp.g = out.g @ self.w.t()\n",
-    "        self.w.g = self.inp.t() @ self.out.g\n",
+    "        self.w.g = inp.t() @ self.out.g\n",
    "        self.b.g = out.g.sum(0)"
   ]
  },
--- a/clean/app_jupyter.ipynb
+++ b/clean/app_jupyter.ipynb
@ -7,7 +7,7 @@
   "outputs": [],
   "source": [
    "#hide\n",
-    "# !pip install -Uqq fastbook\n",
+    "!pip install -Uqq fastbook\n",
    "import fastbook\n",
    "fastbook.setup_book()"
   ]
--- a/tools/clean.py
+++ b/tools/clean.py
@ -1,3 +1,5 @@
+#!/usr/bin/env python
+
 import nbformat
 from nbdev.export import *
 from nbdev.clean import *