V2.1rc (#4)
* change to README * update README * frozen versions for v2 * frozen requirements, ISLP==0.3.19 * v2.1 notebooks excluding 10,13 * v2.1 of Ch13 * pairing notebooks * v2.1 of Ch10 * update README for v2.1 * update README * remove workflows: notebooks can be tested in ISLP
This commit is contained in:
40
.github/workflows/test_Ch02.yml
vendored
40
.github/workflows/test_Ch02.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch02 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch02 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch02-statlearn-lab.ipynb --allow-errors
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch03.yml
vendored
40
.github/workflows/test_Ch03.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch03 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch03 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch03*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch04.yml
vendored
40
.github/workflows/test_Ch04.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch04 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch04 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch04*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch05.yml
vendored
40
.github/workflows/test_Ch05.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch05 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch05 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch05*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch06.yml
vendored
40
.github/workflows/test_Ch06.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch06 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch06 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch06*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch07.yml
vendored
40
.github/workflows/test_Ch07.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch07 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch07 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch07*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch08.yml
vendored
40
.github/workflows/test_Ch08.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch08 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch08 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch08*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch09.yml
vendored
40
.github/workflows/test_Ch09.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch09 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch09 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch09*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch10.yml
vendored
40
.github/workflows/test_Ch10.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch10 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch10 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch10*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch11.yml
vendored
40
.github/workflows/test_Ch11.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch11 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch11 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch11*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch12.yml
vendored
40
.github/workflows/test_Ch12.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch12 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch12 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch12*ipynb
|
||||
|
||||
|
||||
40
.github/workflows/test_Ch13.yml
vendored
40
.github/workflows/test_Ch13.yml
vendored
@@ -1,40 +0,0 @@
|
||||
# This is a basic workflow to help you get started with Actions
|
||||
|
||||
name: Building Ch13 notebook
|
||||
|
||||
# Controls when the workflow will run
|
||||
on:
|
||||
workflow_dispatch:
|
||||
environment:
|
||||
description: 'Environment to run tests against'
|
||||
type: environment
|
||||
required: true
|
||||
|
||||
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
|
||||
jobs:
|
||||
# This workflow contains a single job called "build"
|
||||
build:
|
||||
# The type of runner that the job will run on
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
# Steps represent a sequence of tasks that will be executed as part of the job
|
||||
steps:
|
||||
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.10'
|
||||
cache: 'pip'
|
||||
|
||||
# Install
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
pip install ISLP jupyter
|
||||
|
||||
# Runs a set of commands using the runners shell
|
||||
- name: Build notebook for Ch13 (some errors, not really a test)
|
||||
run: |
|
||||
jupyter nbconvert --execute --inplace Ch13*ipynb
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -15,8 +15,6 @@ jupyter:
|
||||
# Chapter 3
|
||||
|
||||
|
||||
|
||||
|
||||
# Lab: Linear Regression
|
||||
|
||||
## Importing packages
|
||||
@@ -277,7 +275,7 @@ value of 25.05 for `medv` when `lstat` equals
|
||||
10), but the latter are substantially wider.
|
||||
|
||||
Next we will plot `medv` and `lstat`
|
||||
using `DataFrame.plot.scatter()`,
|
||||
using `DataFrame.plot.scatter()`, \definelongblankMR{plot.scatter()}{plot.slashslashscatter()}
|
||||
and wish to
|
||||
add the regression line to the resulting plot.
|
||||
|
||||
@@ -399,7 +397,7 @@ Notice how we have compacted the first line into a succinct expression describin
|
||||
|
||||
The `Boston` data set contains 12 variables, and so it would be cumbersome
|
||||
to have to type all of these in order to perform a regression using all of the predictors.
|
||||
Instead, we can use the following short-hand:
|
||||
Instead, we can use the following short-hand:\definelongblankMR{columns.drop()}{columns.slashslashdrop()}
|
||||
|
||||
```{python}
|
||||
terms = Boston.columns.drop('medv')
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -807,7 +807,7 @@ feature_std.std()
|
||||
|
||||
```
|
||||
|
||||
Notice that the standard deviations are not quite $1$ here; this is again due to some procedures using the $1/n$ convention for variances (in this case `scaler()`), while others use $1/(n-1)$ (the `std()` method). See the footnote on page 103.
|
||||
Notice that the standard deviations are not quite $1$ here; this is again due to some procedures using the $1/n$ convention for variances (in this case `scaler()`), while others use $1/(n-1)$ (the `std()` method). See the footnote on page 200.
|
||||
In this case it does not matter, as long as the variables are all on the same scale.
|
||||
|
||||
Using the function `train_test_split()` we now split the observations into a test set,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -518,13 +518,13 @@ slope. Interestingly, these are somewhat different from the estimates
|
||||
obtained using the bootstrap. Does this indicate a problem with the
|
||||
bootstrap? In fact, it suggests the opposite. Recall that the
|
||||
standard formulas given in
|
||||
{Equation 3.8 on page 80}
|
||||
{Equation 3.8 on page 82}
|
||||
rely on certain assumptions. For example,
|
||||
they depend on the unknown parameter $\sigma^2$, the noise
|
||||
variance. We then estimate $\sigma^2$ using the RSS. Now although the
|
||||
formula for the standard errors do not rely on the linear model being
|
||||
correct, the estimate for $\sigma^2$ does. We see
|
||||
{in Figure 3.8 on page 106} that there is
|
||||
{in Figure 3.8 on page 108} that there is
|
||||
a non-linear relationship in the data, and so the residuals from a
|
||||
linear fit will be inflated, and so will $\hat{\sigma}^2$. Secondly,
|
||||
the standard formulas assume (somewhat unrealistically) that the $x_i$
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a3f2f85",
|
||||
"id": "85ad9863",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bb22af17",
|
||||
"id": "ac8b08af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Lab: Cross-Validation and the Bootstrap\n",
|
||||
@@ -26,13 +26,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "60fad148",
|
||||
"id": "e7712cfe",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:37.622425Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:37.621828Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:38.459128Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:38.458689Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:01.252458Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:01.251970Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:02.044045Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:02.043730Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -49,7 +49,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "78fcfe7a",
|
||||
"id": "784a2ba3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are several new imports needed for this lab."
|
||||
@@ -58,13 +58,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "2478aeb4",
|
||||
"id": "21c2ed4f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:38.461290Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:38.461070Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:38.463158Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:38.462899Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:02.045927Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:02.045761Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:02.047761Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:02.047491Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -81,7 +81,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "713d30db",
|
||||
"id": "9ac3acd5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## The Validation Set Approach\n",
|
||||
@@ -102,13 +102,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "99c95faf",
|
||||
"id": "8af59641",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:38.464725Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:38.464616Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:38.472566Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:38.472315Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:02.049239Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:02.049145Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:02.055524Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:02.055162Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -121,7 +121,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "57be35df",
|
||||
"id": "e76383f0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we can fit a linear regression using only the observations corresponding to the training set `Auto_train`."
|
||||
@@ -130,13 +130,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "41b0717d",
|
||||
"id": "d9b0b7c8",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:38.474061Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:38.473957Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:38.477686Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:38.477432Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:02.057278Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:02.057182Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:02.062537Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:02.062265Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -150,7 +150,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7f1bef95",
|
||||
"id": "d196dd08",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now use the `predict()` method of `results` evaluated on the model matrix for this model\n",
|
||||
@@ -160,13 +160,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d7ea3c0d",
|
||||
"id": "3e77d831",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:38.479141Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:38.479053Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:38.483270Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:38.483037Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:02.064056Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:02.063966Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:02.068279Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:02.068024Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -190,7 +190,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6dba5d55",
|
||||
"id": "f4369ee6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Hence our estimate for the validation MSE of the linear regression\n",
|
||||
@@ -204,13 +204,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "a02a2d05",
|
||||
"id": "0aa4bfcc",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:38.484782Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:38.484689Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:38.486891Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:38.486642Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:02.069789Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:02.069682Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:02.071953Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:02.071703Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -235,7 +235,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39ab59b1",
|
||||
"id": "0271dc50",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let’s use this function to estimate the validation MSE\n",
|
||||
@@ -247,13 +247,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "51d93dea",
|
||||
"id": "a0dbd55f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:38.488297Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:38.488205Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:38.497955Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:38.497708Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:02.073322Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:02.073229Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:02.088464Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:02.088192Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -280,7 +280,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "936e168a",
|
||||
"id": "a7401536",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"These error rates are $23.62, 18.76$, and $18.80$, respectively. If we\n",
|
||||
@@ -291,13 +291,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "83432f06",
|
||||
"id": "885136a4",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:38.499478Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:38.499391Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:38.509735Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:38.509466Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:02.089889Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:02.089804Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:02.105353Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:02.105089Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -327,7 +327,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f5ceb357",
|
||||
"id": "00785402",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using this split of the observations into a training set and a validation set,\n",
|
||||
@@ -341,7 +341,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6d624a5c",
|
||||
"id": "21c071b8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cross-Validation\n",
|
||||
@@ -374,13 +374,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "bcfc433f",
|
||||
"id": "6d957d8c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:38.511210Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:38.511122Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.069624Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.069325Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:02.106979Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:02.106884Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.184550Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.184259Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -410,7 +410,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5b0f6f30",
|
||||
"id": "c17e2bc8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The arguments to `cross_validate()` are as follows: an\n",
|
||||
@@ -426,7 +426,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b527f67f",
|
||||
"id": "5c7901f2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can repeat this procedure for increasingly complex polynomial fits.\n",
|
||||
@@ -442,13 +442,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "f951ffc8",
|
||||
"id": "e2b5ce95",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.071240Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.071138Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.674084Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.673774Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.186226Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.186108Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.782413Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.782122Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -480,7 +480,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "792f1304",
|
||||
"id": "03706248",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As in Figure 5.4, we see a sharp drop in the estimated test MSE between the linear and\n",
|
||||
@@ -499,13 +499,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "e3610b5a",
|
||||
"id": "1dda1bd7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.675725Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.675614Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.678046Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.677762Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.783997Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.783886Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.786132Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.785881Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -530,7 +530,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "983625b2",
|
||||
"id": "f5092f1b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In the CV example above, we used $K=n$, but of course we can also use $K<n$. The code is very similar\n",
|
||||
@@ -541,13 +541,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "1627460d",
|
||||
"id": "fb25fa70",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.679517Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.679423Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.701200Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.700946Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.787622Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.787525Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.809671Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.809398Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -580,7 +580,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "32bf6662",
|
||||
"id": "c4ec6afb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice that the computation time is much shorter than that of LOOCV.\n",
|
||||
@@ -595,7 +595,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1e89127b",
|
||||
"id": "5edf407f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `cross_validate()` function is flexible and can take\n",
|
||||
@@ -606,13 +606,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "8a636468",
|
||||
"id": "d78795cd",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.702802Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.702718Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.708140Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.707865Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.811123Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.811046Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.817840Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.817582Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -641,7 +641,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2c0fb0d5",
|
||||
"id": "a081be63",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"One can estimate the variability in the test error by running the following:"
|
||||
@@ -650,13 +650,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "746aeccd",
|
||||
"id": "0407ad56",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.709627Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.709548Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.729721Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.729428Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.819308Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.819228Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.851921Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.851658Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -684,7 +684,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3310fe80",
|
||||
"id": "b66db3cb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that this standard deviation is not a valid estimate of the\n",
|
||||
@@ -724,13 +724,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "daa53d0c",
|
||||
"id": "f04f15bd",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.731264Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.731179Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.734494Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.734221Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.853415Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.853334Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.857370Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.857115Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -745,7 +745,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fd439170",
|
||||
"id": "c88bd6a4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This function returns an estimate for $\\alpha$\n",
|
||||
@@ -758,13 +758,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "578c9564",
|
||||
"id": "f98c0323",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.736147Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.736062Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.738776Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.738545Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.858828Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.858753Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.861443Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.861198Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -785,7 +785,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cc18244c",
|
||||
"id": "58a78f00",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next we randomly select\n",
|
||||
@@ -797,13 +797,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "5754d6d5",
|
||||
"id": "bcd40175",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.740183Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.740108Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.743599Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.743267Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.862933Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.862830Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.865766Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.865514Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -829,7 +829,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0e97e132",
|
||||
"id": "e6058be4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This process can be generalized to create a simple function `boot_SE()` for\n",
|
||||
@@ -840,13 +840,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "8320a49c",
|
||||
"id": "ab6602cd",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.745013Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.744924Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:39.747163Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:39.746928Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.867170Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.867072Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:03.869326Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:03.869094Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -872,7 +872,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a1d25cfe",
|
||||
"id": "d94d383e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice the use of `_` as a loop variable in `for _ in range(B)`. This is often used if the value of the counter is\n",
|
||||
@@ -885,13 +885,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "e656aa1f",
|
||||
"id": "4a323513",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:39.748642Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:39.748543Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:40.034488Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:40.034215Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:03.870755Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:03.870664Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:04.157907Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:04.157623Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -916,7 +916,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "258ccf67",
|
||||
"id": "22343f53",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The final output shows that the bootstrap estimate for ${\\rm SE}(\\hat{\\alpha})$ is $0.0912$.\n",
|
||||
@@ -951,13 +951,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "c5d14195",
|
||||
"id": "0220f3af",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:40.036061Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:40.035977Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:40.037907Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:40.037662Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:04.159500Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:04.159419Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:04.161332Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:04.161073Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -972,7 +972,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "89a6fb3e",
|
||||
"id": "df0c7f05",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This is not quite what is needed as the first argument to\n",
|
||||
@@ -986,13 +986,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "7e0523f0",
|
||||
"id": "62037dcb",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:40.039299Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:40.039208Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:40.040837Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:40.040599Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:04.162950Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:04.162849Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:04.164486Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:04.164241Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -1003,7 +1003,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4d8f9f61",
|
||||
"id": "61fbe248",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Typing `hp_func?` will show that it has two arguments `D`\n",
|
||||
@@ -1019,13 +1019,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "32836e93",
|
||||
"id": "b8bdb7a4",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:40.042164Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:40.042091Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:40.056730Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:40.056480Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:04.165879Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:04.165798Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:04.194029Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:04.193764Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -1060,7 +1060,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "aa8cae71",
|
||||
"id": "2a831036",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we use the `boot_SE()` {} function to compute the standard\n",
|
||||
@@ -1070,13 +1070,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "14ce3afa",
|
||||
"id": "36808258",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:40.058168Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:40.058092Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:41.197103Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:41.196820Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:04.195612Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:04.195529Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:06.747175Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:06.746638Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1104,7 +1104,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1d0db4c6",
|
||||
"id": "38c65fbf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This indicates that the bootstrap estimate for ${\\rm SE}(\\hat{\\beta}_0)$ is\n",
|
||||
@@ -1120,13 +1120,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "6b1213ac",
|
||||
"id": "c9aea297",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:41.198611Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:41.198528Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:41.257926Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:41.257642Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:06.749614Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:06.749433Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:06.812583Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:06.812298Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1152,7 +1152,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2b158ef6",
|
||||
"id": "d870ad6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The standard error estimates for $\\hat{\\beta}_0$ and $\\hat{\\beta}_1$\n",
|
||||
@@ -1164,13 +1164,13 @@
|
||||
"obtained using the bootstrap. Does this indicate a problem with the\n",
|
||||
"bootstrap? In fact, it suggests the opposite. Recall that the\n",
|
||||
"standard formulas given in\n",
|
||||
" {Equation 3.8 on page 80}\n",
|
||||
" {Equation 3.8 on page 82}\n",
|
||||
"rely on certain assumptions. For example,\n",
|
||||
"they depend on the unknown parameter $\\sigma^2$, the noise\n",
|
||||
"variance. We then estimate $\\sigma^2$ using the RSS. Now although the\n",
|
||||
"formula for the standard errors do not rely on the linear model being\n",
|
||||
"correct, the estimate for $\\sigma^2$ does. We see\n",
|
||||
" {in Figure 3.8 on page 106} that there is\n",
|
||||
" {in Figure 3.8 on page 108} that there is\n",
|
||||
"a non-linear relationship in the data, and so the residuals from a\n",
|
||||
"linear fit will be inflated, and so will $\\hat{\\sigma}^2$. Secondly,\n",
|
||||
"the standard formulas assume (somewhat unrealistically) that the $x_i$\n",
|
||||
@@ -1192,13 +1192,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "af99b778",
|
||||
"id": "79c56529",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:41.259623Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:41.259482Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:43.037184Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:43.036911Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:06.814267Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:06.814125Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:10.162177Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:10.161855Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1226,7 +1226,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1badcfd1",
|
||||
"id": "9fccbbbd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We compare the results to the standard errors computed using `sm.OLS()`."
|
||||
@@ -1235,13 +1235,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "0206281e",
|
||||
"id": "4d0b4edc",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:18:43.038778Z",
|
||||
"iopub.status.busy": "2023-08-07T00:18:43.038680Z",
|
||||
"iopub.status.idle": "2023-08-07T00:18:43.046810Z",
|
||||
"shell.execute_reply": "2023-08-07T00:18:43.046545Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:10.163852Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:10.163742Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:10.173834Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:10.173578Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -1268,7 +1268,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c11a71f",
|
||||
"id": "9a86ff6e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
|
||||
@@ -540,7 +540,7 @@ grid.best_params_['ridge__alpha']
|
||||
grid.best_estimator_
|
||||
|
||||
```
|
||||
Recall we set up the `kfold` object for 5-fold cross-validation on page 296. We now plot the cross-validated MSE as a function of $-\log(\lambda)$, which has shrinkage decreasing from left
|
||||
Recall we set up the `kfold` object for 5-fold cross-validation on page 298. We now plot the cross-validated MSE as a function of $-\log(\lambda)$, which has shrinkage decreasing from left
|
||||
to right.
|
||||
|
||||
```{python}
|
||||
@@ -728,7 +728,7 @@ ax.set_ylabel('Standardized coefficiients', fontsize=20);
|
||||
```
|
||||
The smallest cross-validated error is lower than the test set MSE of the null model
|
||||
and of least squares, and very similar to the test MSE of 115526.71 of ridge
|
||||
regression (page 303) with $\lambda$ chosen by cross-validation.
|
||||
regression (page 305) with $\lambda$ chosen by cross-validation.
|
||||
|
||||
```{python}
|
||||
np.min(tuned_lasso.mse_path_.mean(1))
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -83,7 +83,7 @@ for feature transformations such as `PCA()` seen in Section 6.5.3) which
|
||||
allows for easy evaluation of the polynomial at new data points. Here `poly()` is referred to as a *helper* function, and sets up the transformation; `Poly()` is the actual workhorse that computes the transformation. See also
|
||||
the
|
||||
discussion of transformations on
|
||||
page 127.
|
||||
page 129.
|
||||
|
||||
In the code above, the first line executes the `fit()` method
|
||||
using the dataframe
|
||||
@@ -748,7 +748,6 @@ ANOVA, $\mathcal{M}_2$ is preferred.
|
||||
|
||||
We can repeat the same process for `age` as well. We see there is very clear evidence that
|
||||
a non-linear term is required for `age`.
|
||||
\newpage
|
||||
|
||||
```{python}
|
||||
gam_0 = LinearGAM(year_term +
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d45c6d2b",
|
||||
"id": "30b873f3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "94d1e03c",
|
||||
"id": "3e06bca8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Lab: Support Vector Machines\n",
|
||||
@@ -25,13 +25,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "eeaa5be0",
|
||||
"id": "3973b95f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:27.947789Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:27.947634Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:28.991210Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:28.990616Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:58.477582Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:58.477467Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.432527Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.432225Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -45,7 +45,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "26ebd377",
|
||||
"id": "631b7d3d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We also collect the new imports\n",
|
||||
@@ -55,13 +55,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "41a59634",
|
||||
"id": "0161e55e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:28.993557Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:28.993245Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.028199Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.027857Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.434432Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.434258Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.466972Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.466647Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -73,7 +73,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f197b846",
|
||||
"id": "b397af05",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will use the function `RocCurveDisplay.from_estimator()` to\n",
|
||||
@@ -83,13 +83,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "c9a175d7",
|
||||
"id": "7661b056",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.030225Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.030097Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.032026Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.031756Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.469128Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.468999Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.470961Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.470667Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -99,7 +99,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f666c212",
|
||||
"id": "38115984",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Support Vector Classifier\n",
|
||||
@@ -123,13 +123,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "a7216b47",
|
||||
"id": "46e9ab84",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.033695Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.033581Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.207161Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.205980Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.472867Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.472726Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.583508Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.583126Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -159,7 +159,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7b4aff06",
|
||||
"id": "a9766d18",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"They are not. We now fit the classifier."
|
||||
@@ -168,13 +168,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ed329198",
|
||||
"id": "605ffdc0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.211951Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.211403Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.220643Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.219858Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.585485Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.585317Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.590274Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.589979Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -200,7 +200,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5e6b4c79",
|
||||
"id": "16215b77",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The support vector classifier with two features can\n",
|
||||
@@ -212,13 +212,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "95494b8b",
|
||||
"id": "302a49a1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.224179Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.223775Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.400927Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.400620Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.591976Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.591865Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.734225Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.733936Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -243,7 +243,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f6ce1246",
|
||||
"id": "6010e865",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The decision\n",
|
||||
@@ -257,13 +257,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "98c2236f",
|
||||
"id": "cc1d6a13",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.402894Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.402744Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.544636Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.544249Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.735943Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.735816Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.878335Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.878032Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -291,7 +291,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "906f4bb8",
|
||||
"id": "301d764d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With a smaller value of the cost parameter, we\n",
|
||||
@@ -303,13 +303,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "b498f594",
|
||||
"id": "6133c846",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.546722Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.546549Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.549088Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.548814Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.880078Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.879965Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.882347Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.882070Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -331,7 +331,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90a0ee53",
|
||||
"id": "0693b3eb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Since the support vector machine is an estimator in `sklearn`, we\n",
|
||||
@@ -341,13 +341,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "b65e80d6",
|
||||
"id": "9adb3793",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.550593Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.550485Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.578952Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.578657Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.883852Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.883749Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.910535Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.910272Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -378,7 +378,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d390528c",
|
||||
"id": "611e76a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can easily access the cross-validation errors for each of these models\n",
|
||||
@@ -389,13 +389,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "bba8fad7",
|
||||
"id": "d3ab343e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.580977Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.580845Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.583558Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.583239Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.912005Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.911925Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.914189Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.913943Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -417,7 +417,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "703e2d43",
|
||||
"id": "41d85a2a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We see that `C=1` results in the highest cross-validation\n",
|
||||
@@ -430,13 +430,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "ad64269d",
|
||||
"id": "6aba117e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.585087Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.584981Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.586995Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.586714Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.915563Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.915487Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.917323Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.917078Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -448,7 +448,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "db41f5e2",
|
||||
"id": "ddbda9de",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we predict the class labels of these test observations. Here we\n",
|
||||
@@ -459,13 +459,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "5107fca1",
|
||||
"id": "dbe7d737",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.588685Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.588519Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.595768Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.595341Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.918744Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.918666Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.925361Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.925039Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -534,7 +534,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bbfc8005",
|
||||
"id": "7f002ea6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Thus, with this value of `C`,\n",
|
||||
@@ -546,13 +546,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "0320d9e0",
|
||||
"id": "ab1697c2",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.597509Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.597387Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.602346Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.601964Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.927158Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.927027Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:59.931558Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:59.931228Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -622,7 +622,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "427d775f",
|
||||
"id": "7574703a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this case 60% of test observations are correctly classified.\n",
|
||||
@@ -637,13 +637,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "84d7e778",
|
||||
"id": "0fd42b1e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.604018Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.603879Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.734586Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.734264Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:59.933100Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:59.933001Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:00.054738Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:00.054338Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -666,7 +666,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff7bdad1",
|
||||
"id": "4bdaf415",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now the observations are just barely linearly separable."
|
||||
@@ -675,13 +675,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "abb1f8be",
|
||||
"id": "09c15299",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.736388Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.736251Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.741179Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.740886Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:00.056655Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:00.056526Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:00.061096Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:00.060792Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -750,7 +750,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c44297cc",
|
||||
"id": "d987eecc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We fit the\n",
|
||||
@@ -762,13 +762,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "2e4ed2f5",
|
||||
"id": "d5fd2ff9",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.742864Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.742750Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.860686Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.860305Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:00.062673Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:00.062585Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:00.199860Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:00.199129Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -794,7 +794,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2836d70d",
|
||||
"id": "0834d471",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Indeed no training errors were made and only three support vectors were used.\n",
|
||||
@@ -807,13 +807,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "164a611c",
|
||||
"id": "39aff1b1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.862647Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.862496Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.867261Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.866916Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:00.202380Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:00.202233Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:00.207886Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:00.207493Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -882,7 +882,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39a432d1",
|
||||
"id": "d0684844",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Using `C=0.1`, we again do not misclassify any training observations, but we\n",
|
||||
@@ -894,13 +894,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "c67591a1",
|
||||
"id": "63a9d752",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.868821Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.868723Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.990207Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.989921Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:00.209907Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:00.209781Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:00.340803Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:00.340433Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -926,7 +926,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "25e61f65",
|
||||
"id": "a70d84f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Support Vector Machine\n",
|
||||
@@ -947,13 +947,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "322be574",
|
||||
"id": "2fee8df5",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.991910Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.991799Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:29.993907Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:29.993635Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:00.342773Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:00.342626Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:00.345094Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:00.344774Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -966,7 +966,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "22fe2182",
|
||||
"id": "d5c7545e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Plotting the data makes it clear that the class boundary is indeed non-linear."
|
||||
@@ -975,13 +975,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "04fda182",
|
||||
"id": "48f01abe",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:29.995558Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:29.995406Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:30.089596Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:30.089130Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:00.347053Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:00.346902Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:00.440453Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:00.440153Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -989,7 +989,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<matplotlib.collections.PathCollection at 0x17f2b35d0>"
|
||||
"<matplotlib.collections.PathCollection at 0x28b7c65d0>"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
@@ -1017,7 +1017,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "64913fe3",
|
||||
"id": "7c0bc32b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The data is randomly split into training and testing groups. We then\n",
|
||||
@@ -1028,13 +1028,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "0c2690d1",
|
||||
"id": "4acc3246",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:30.091605Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:30.091498Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:30.095614Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:30.095347Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:00.442257Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:00.442156Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:00.446674Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:00.446369Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1066,7 +1066,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5da9efdb",
|
||||
"id": "b2c7e95e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The plot shows that the resulting SVM has a decidedly non-linear\n",
|
||||
@@ -1076,13 +1076,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "3eb171e8",
|
||||
"id": "e9852a28",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:30.097178Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:30.097088Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:30.357131Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:30.356847Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:00.448268Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:00.448160Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:00.828511Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:00.828128Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1107,7 +1107,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ab5b1446",
|
||||
"id": "acfa4bed",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can see from the figure that there are a fair number of training\n",
|
||||
@@ -1120,13 +1120,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "9a6b905b",
|
||||
"id": "01232fc9",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:30.358811Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:30.358698Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:30.513702Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:30.513395Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:00.830365Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:00.830226Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:01.132677Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:01.132224Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1153,7 +1153,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "300c1b8b",
|
||||
"id": "5bc77e3f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can perform cross-validation using `skm.GridSearchCV()` to select the\n",
|
||||
@@ -1164,13 +1164,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "5ab01d6c",
|
||||
"id": "bcbd15a4",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:30.515803Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:30.515668Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:30.612245Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:30.611940Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:01.134616Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:01.134486Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:01.243519Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:01.243203Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1201,7 +1201,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1bb987ae",
|
||||
"id": "997bbfbd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The best choice of parameters under five-fold CV is achieved at `C=1`\n",
|
||||
@@ -1212,13 +1212,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "166a6acb",
|
||||
"id": "28ca551e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:30.614152Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:30.614029Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:30.850984Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:30.850653Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:01.245550Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:01.245377Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:01.600896Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:01.600574Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1303,7 +1303,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39ee6f32",
|
||||
"id": "48e671f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With these parameters, 12% of test\n",
|
||||
@@ -1312,7 +1312,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f0ea699d",
|
||||
"id": "eaed0a87",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ROC Curves\n",
|
||||
@@ -1346,13 +1346,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "0607fc41",
|
||||
"id": "68ac9421",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:30.853079Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:30.852934Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:30.948570Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:30.948252Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:01.602740Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:01.602614Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:01.698620Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:01.698322Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -1380,7 +1380,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "54446e71",
|
||||
"id": "0c35d32a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" In this example, the SVM appears to provide accurate predictions. By increasing\n",
|
||||
@@ -1391,13 +1391,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "5211a882",
|
||||
"id": "f79a9e0a",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:30.950213Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:30.950106Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:31.095103Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:31.094737Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:01.700479Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:01.700347Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:01.837479Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:01.837102Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1428,7 +1428,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "de7e4be8",
|
||||
"id": "7bd1a22b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"However, these ROC curves are all on the training data. We are really\n",
|
||||
@@ -1440,13 +1440,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "12acc4ff",
|
||||
"id": "bdb9e503",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:31.096951Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:31.096805Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:31.101372Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:31.101097Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:01.839390Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:01.839243Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:01.843595Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:01.843287Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1462,7 +1462,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eb5c8aeb",
|
||||
"id": "8a42e924",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let’s look at our tuned SVM."
|
||||
@@ -1471,13 +1471,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "21c81913",
|
||||
"id": "329f5d2c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:31.103089Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:31.102993Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:31.204133Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:31.203835Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:01.845300Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:01.845201Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:01.944073Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:01.943763Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1510,7 +1510,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b9fefe9f",
|
||||
"id": "bac19279",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## SVM with Multiple Classes\n",
|
||||
@@ -1526,13 +1526,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "2fff4fa8",
|
||||
"id": "267e113d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:31.205816Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:31.205709Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:31.294925Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:31.294593Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:01.945725Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:01.945611Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:02.034378Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:02.034069Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1558,7 +1558,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b7adc87d",
|
||||
"id": "a9f4297c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now fit an SVM to the data:"
|
||||
@@ -1567,13 +1567,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "5396f2df",
|
||||
"id": "64cbebd0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:31.296594Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:31.296472Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:31.880175Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:31.879674Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:02.036083Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:02.035963Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:03.015535Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:03.014798Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -1605,7 +1605,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "837644f5",
|
||||
"id": "62c5d16e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `sklearn.svm` library can also be used to perform support vector\n",
|
||||
@@ -1614,7 +1614,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a6bc0cbc",
|
||||
"id": "5c0824b6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Application to Gene Expression Data\n",
|
||||
@@ -1631,13 +1631,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "f63c575e",
|
||||
"id": "b6e6f12b",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:31.882095Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:31.881962Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:31.959079Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:31.958769Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:03.017430Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:03.017293Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:03.099156Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:03.098760Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1659,7 +1659,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bfd6492c",
|
||||
"id": "e3fbaa58",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This data set consists of expression measurements for 2,308\n",
|
||||
@@ -1677,13 +1677,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "32091338",
|
||||
"id": "273a10b2",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:31.960641Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:31.960528Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:31.990176Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:31.989868Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:03.101069Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:03.100881Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:03.130224Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:03.129845Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1777,7 +1777,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23043ab0",
|
||||
"id": "31cad43a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We see that there are *no* training\n",
|
||||
@@ -1791,13 +1791,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "d9058023",
|
||||
"id": "bc3079a7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:31.991754Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:31.991636Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:32.002452Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:32.002189Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:03.132111Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:03.131975Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:03.143298Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:03.142948Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1889,7 +1889,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d0d5aba4",
|
||||
"id": "0d059312",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We see that using `C=10` yields two test set errors on these data.\n",
|
||||
|
||||
@@ -1,19 +1,3 @@
|
||||
---
|
||||
jupyter:
|
||||
jupytext:
|
||||
cell_metadata_filter: -all
|
||||
formats: ipynb,Rmd
|
||||
text_representation:
|
||||
extension: .Rmd
|
||||
format_name: rmarkdown
|
||||
format_version: '1.2'
|
||||
jupytext_version: 1.14.7
|
||||
kernelspec:
|
||||
display_name: Python 3 (ipykernel)
|
||||
language: python
|
||||
name: python3
|
||||
---
|
||||
|
||||
|
||||
# Chapter 10
|
||||
|
||||
@@ -872,7 +856,7 @@ for idx, (X_ ,Y_) in enumerate(cifar_dm.train_dataloader()):
|
||||
|
||||
|
||||
Before we start, we look at some of the training images; similar code produced
|
||||
Figure 10.5 on page 164. The example below also illustrates
|
||||
Figure 10.5 on page 447. The example below also illustrates
|
||||
that `TensorDataset` objects can be indexed with integers --- we are choosing
|
||||
random images from the training data by indexing `cifar_train`. In order to display correctly,
|
||||
we must reorder the dimensions by a call to `np.transpose()`.
|
||||
@@ -1705,7 +1689,6 @@ early stopping, since then the test performance would be biased.
|
||||
|
||||
We form the training dataset similar to
|
||||
our `Hitters` example.
|
||||
|
||||
|
||||
```{python}
|
||||
datasets = []
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c7f4eb5a",
|
||||
"id": "62a1a218",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
@@ -12,7 +12,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0ae03bc9",
|
||||
"id": "9da89fbb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Lab: Survival Analysis\n",
|
||||
@@ -31,13 +31,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "91ac40fd",
|
||||
"id": "d2d71add",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:33.224953Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:33.224846Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.446999Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.446629Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:04.373618Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:04.373522Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.528375Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.528065Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -51,7 +51,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a3dbcbbf",
|
||||
"id": "70fe80b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We also collect the new imports\n",
|
||||
@@ -61,13 +61,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "99782418",
|
||||
"id": "994efc94",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.448996Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.448819Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.539258Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.538955Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.530453Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.530271Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.593786Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.593483Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -83,7 +83,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2c538d28",
|
||||
"id": "e65a4796",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Brain Cancer Data\n",
|
||||
@@ -94,13 +94,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "3137149a",
|
||||
"id": "9d41ddea",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.541177Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.541057Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.547991Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.547753Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.595762Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.595642Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.602243Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.601969Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -122,7 +122,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e798f172",
|
||||
"id": "4ac65a33",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The rows index the 88 patients, while the 8 columns contain the predictors and outcome variables.\n",
|
||||
@@ -132,13 +132,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "45963c92",
|
||||
"id": "2bece782",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.549558Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.549453Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.552571Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.552293Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.603954Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.603852Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.607075Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.606729Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -163,13 +163,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "73be61f6",
|
||||
"id": "9ca465e5",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.553962Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.553866Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.556544Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.556286Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.608553Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.608445Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.611386Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.611134Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -196,13 +196,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "572f0b9e",
|
||||
"id": "33bc4d3c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.557984Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.557901Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.560759Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.560490Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.612735Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.612639Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.615164Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.614915Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -226,7 +226,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fbd132de",
|
||||
"id": "eb9c6d4f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Before beginning an analysis, it is important to know how the\n",
|
||||
@@ -252,13 +252,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "92c39707",
|
||||
"id": "0b6dba70",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.562264Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.562173Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.764386Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.763084Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.616714Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.616622Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.728265Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.727903Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -292,7 +292,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f037665b",
|
||||
"id": "2cc511cd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next we create Kaplan-Meier survival curves that are stratified by\n",
|
||||
@@ -318,13 +318,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "3fc7848c",
|
||||
"id": "9e6f2e70",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.770269Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.769500Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.900514Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.900203Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.730200Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.730056Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.845830Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.845506Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -350,7 +350,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c0c1a16a",
|
||||
"id": "4d7efefb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As discussed in Section 11.4, we can perform a\n",
|
||||
@@ -363,13 +363,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "bf30d26f",
|
||||
"id": "c135f7aa",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.902462Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.902313Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.956077Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.955714Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.847658Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.847519Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.901295Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.900935Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -462,7 +462,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e270649c",
|
||||
"id": "bd14317d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The resulting $p$-value is $0.23$, indicating no evidence of a\n",
|
||||
@@ -476,13 +476,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "2ab78e07",
|
||||
"id": "5f9303dd",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.957966Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.957792Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.984567Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.984261Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.903263Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.903017Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.930691Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.930331Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -553,7 +553,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b58b93ae",
|
||||
"id": "7e56e83e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The first argument to `fit` should be a data frame containing\n",
|
||||
@@ -569,13 +569,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "4716b7b0",
|
||||
"id": "bcc8470c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.986336Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.986193Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:34.991518Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:34.991252Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.932434Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.932285Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.937796Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.937549Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -659,7 +659,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2820f486",
|
||||
"id": "2e6163ca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Regardless of which test we use, we see that there is no clear\n",
|
||||
@@ -675,13 +675,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "c2767d88",
|
||||
"id": "c26a3499",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:34.993223Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:34.993093Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.028673Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.028408Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.939300Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.939184Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.979585Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.979250Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -800,7 +800,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "eee4ab1f",
|
||||
"id": "a96e311c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" The `diagnosis` variable has been coded so that the baseline\n",
|
||||
@@ -823,13 +823,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "ede1d219",
|
||||
"id": "8d999f26",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.030313Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.030211Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.034142Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.033836Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.981441Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.981315Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.986317Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.985949Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -845,7 +845,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e1c307ae",
|
||||
"id": "bf628fd8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We make four\n",
|
||||
@@ -856,13 +856,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "dc032a71",
|
||||
"id": "a1f6b355",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.035583Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.035483Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.041790Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.041394Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.988012Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.987898Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:05.993889Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:05.993534Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -974,7 +974,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "84da2586",
|
||||
"id": "3f6334e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We then construct the model matrix based on the model specification `all_MS` used to fit\n",
|
||||
@@ -984,13 +984,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "e7c1fe43",
|
||||
"id": "a1a9d5b3",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.043454Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.043346Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.050931Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.050643Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:05.995682Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:05.995549Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.005479Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.005089Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1117,7 +1117,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3cfe1ec4",
|
||||
"id": "3eaec7e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can use the `predict_survival_function()` method to obtain the estimated survival function."
|
||||
@@ -1126,13 +1126,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "f89fbed7",
|
||||
"id": "1a18b56a",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.052472Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.052367Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.059232Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.058922Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.007172Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.007049Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.014185Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.013870Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -1276,7 +1276,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "29afd641",
|
||||
"id": "7d533f90",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This returns a data frame,\n",
|
||||
@@ -1287,13 +1287,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "8f0329b4",
|
||||
"id": "ff3de29c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.061046Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.060930Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.167601Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.167288Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.015778Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.015664Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.124035Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.123732Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1316,7 +1316,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12723ce5",
|
||||
"id": "e660793e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Publication Data\n",
|
||||
@@ -1330,13 +1330,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "3045bfc0",
|
||||
"id": "cd9060c1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.169251Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.169133Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.287186Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.286859Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.125714Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.125592Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.243701Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.243300Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1363,7 +1363,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6fcb22f7",
|
||||
"id": "d8f0f687",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As discussed previously, the $p$-values from fitting Cox’s\n",
|
||||
@@ -1375,13 +1375,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "d070f716",
|
||||
"id": "6af7106e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.288887Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.288769Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.321428Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.321128Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.245493Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.245357Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.281521Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.281138Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1453,7 +1453,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "513a55b1",
|
||||
"id": "8ef28b8b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"However, the results change dramatically when we include other\n",
|
||||
@@ -1464,13 +1464,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "2bbcdd0c",
|
||||
"id": "b6ebefa7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.323119Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.323003Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.362910Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.362438Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.283282Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.283123Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.327003Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.326646Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1573,7 +1573,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "75bb8aa6",
|
||||
"id": "d463e623",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We see that there are a number of statistically significant variables,\n",
|
||||
@@ -1583,7 +1583,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bfe236e5",
|
||||
"id": "a23c38e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Call Center Data\n",
|
||||
@@ -1608,13 +1608,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "b8ece43a",
|
||||
"id": "098f42ea",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.364905Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.364772Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.368589Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.368291Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.329058Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.328927Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.332782Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.332425Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1637,7 +1637,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c93e44f3",
|
||||
"id": "2f54ed03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We then build a model matrix (omitting the intercept)"
|
||||
@@ -1646,13 +1646,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "3e4f766f",
|
||||
"id": "26d5d0d0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.370485Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.370371Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.377790Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.377469Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.334692Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.334589Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.344047Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.343708Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1666,7 +1666,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cad1ed19",
|
||||
"id": "a1a8f65d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It is worthwhile to take a peek at the model matrix `X`, so\n",
|
||||
@@ -1678,13 +1678,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "72f42d14",
|
||||
"id": "77500663",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.380244Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.380068Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.384542Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.384259Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.345660Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.345575Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.350086Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.349797Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1781,7 +1781,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "38c40ae1",
|
||||
"id": "fd7bd61c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we specify the coefficients and the hazard function."
|
||||
@@ -1790,13 +1790,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "8b921536",
|
||||
"id": "74324a56",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.386034Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.385942Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.420461Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.405608Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.351738Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.351549Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.444268Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.441484Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1808,7 +1808,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a0698ffd",
|
||||
"id": "cfe879e6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here, we have set the coefficient associated with `Operators` to\n",
|
||||
@@ -1837,13 +1837,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "96ce0f99",
|
||||
"id": "d4be10c2",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.484657Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.484150Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.509286Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.508169Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.449822Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.449515Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.458388Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.457673Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -1854,7 +1854,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1956e4c2",
|
||||
"id": "6095cfc1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We are now ready to generate data under the Cox proportional hazards\n",
|
||||
@@ -1868,13 +1868,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "63d78ff9",
|
||||
"id": "c98d396f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.522637Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.522260Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.630452Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.627960Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.461931Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.461787Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.624349Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.624026Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1886,7 +1886,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "035e4ecf",
|
||||
"id": "ed2e23ea",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now simulate our censoring variable, for which we assume\n",
|
||||
@@ -1897,13 +1897,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "fe008dbf",
|
||||
"id": "caf627bc",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.635844Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.635469Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.649527Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.646191Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.626165Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.626054Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.630808Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.630542Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -2005,13 +2005,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "c3a2bec7",
|
||||
"id": "e63242f9",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.653001Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.652338Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.656830Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.656500Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.632357Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.632261Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.634630Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.634305Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -2032,7 +2032,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "207937e5",
|
||||
"id": "5f345011",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now plot Kaplan-Meier survival curves. First, we stratify by `Center`."
|
||||
@@ -2041,13 +2041,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "2b27af56",
|
||||
"id": "338db71d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.658421Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.658328Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:35.811796Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:35.811449Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.636188Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.636081Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.791856Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.791521Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -2084,7 +2084,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "be6d37f7",
|
||||
"id": "1b5a1230",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we stratify by `Time`."
|
||||
@@ -2093,13 +2093,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "9625598d",
|
||||
"id": "c1db6e15",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:35.813696Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:35.813601Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:36.041021Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:36.040708Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.793629Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.793538Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:06.992155Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:06.991803Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -2136,7 +2136,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1408ebc0",
|
||||
"id": "deb73d38",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It seems that calls at Call Center B take longer to be answered than\n",
|
||||
@@ -2149,13 +2149,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "75a744ef",
|
||||
"id": "02ea4212",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:36.043079Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:36.042900Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:36.061936Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:36.061630Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:06.993929Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:06.993819Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:07.011557Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:07.011276Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -2247,7 +2247,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "be5055e4",
|
||||
"id": "db9cc6ee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we consider the effect of `Time`."
|
||||
@@ -2256,13 +2256,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "9badb3e3",
|
||||
"id": "0ac610d5",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:36.063627Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:36.063519Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:36.082451Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:36.082161Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:07.013331Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:07.013187Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:07.030401Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:07.030073Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -2354,7 +2354,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "64b2bc33",
|
||||
"id": "0946d3ef",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As in the case of a categorical variable with 2 levels, these\n",
|
||||
@@ -2366,13 +2366,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "026e9ff8",
|
||||
"id": "107cedad",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:36.084076Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:36.083964Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:36.208409Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:36.208076Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:07.032008Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:07.031887Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:07.160931Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:07.160639Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -2462,7 +2462,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4ed54fe0",
|
||||
"id": "10f2a0c1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we look at the results for `Time`."
|
||||
@@ -2471,13 +2471,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "7cab3789",
|
||||
"id": "334eb331",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:36.210101Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:36.209985Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:36.334146Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:36.333801Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:07.162793Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:07.162651Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:07.291875Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:07.291550Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -2567,7 +2567,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2d250dc9",
|
||||
"id": "774963d4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We find that differences between centers are highly significant, as\n",
|
||||
@@ -2579,13 +2579,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"id": "5cc4b898",
|
||||
"id": "421811c5",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:36.336025Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:36.335898Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:36.561174Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:36.559597Z"
|
||||
"iopub.execute_input": "2023-08-21T02:30:07.293545Z",
|
||||
"iopub.status.busy": "2023-08-21T02:30:07.293433Z",
|
||||
"iopub.status.idle": "2023-08-21T02:30:07.532213Z",
|
||||
"shell.execute_reply": "2023-08-21T02:30:07.531293Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -2684,7 +2684,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bec9d61d",
|
||||
"id": "3c65063f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The $p$-values for Center B and evening time\n",
|
||||
|
||||
@@ -14,7 +14,7 @@ jupyter:
|
||||
|
||||
# Chapter 12
|
||||
|
||||
# Lab: Unsupervised Learning
|
||||
# Lab: Unsupervised Learning
|
||||
In this lab we demonstrate PCA and clustering on several datasets.
|
||||
As in other labs, we import some of our libraries at this top
|
||||
level. This makes the code more readable, as scanning the first few
|
||||
@@ -819,7 +819,7 @@ linkage. Clearly cell lines within a single cancer type do tend to
|
||||
cluster together, although the clustering is not perfect. We will use
|
||||
complete linkage hierarchical clustering for the analysis that
|
||||
follows.
|
||||
|
||||
|
||||
We can cut the dendrogram at the height that will yield a particular
|
||||
number of clusters, say four:
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -121,7 +121,7 @@ amounts to quite a weak signal, and it resulted in a high number of
|
||||
Type II errors. Let’s instead simulate data with a stronger signal,
|
||||
so that the ratio of the mean to the standard deviation for the false
|
||||
null hypotheses equals $1$. We make only 10 Type II errors.
|
||||
|
||||
|
||||
|
||||
```{python}
|
||||
true_mean = np.array([1]*50 + [0]*50)
|
||||
@@ -244,6 +244,7 @@ mult_test(fund_mini_pvals, method = "holm", alpha=0.05)[:2]
|
||||
|
||||
As discussed previously, Manager One seems to perform particularly
|
||||
well, whereas Manager Two has poor performance.
|
||||
|
||||
|
||||
```{python}
|
||||
fund_mini.mean()
|
||||
@@ -575,5 +576,5 @@ ax.set_xlabel("Number of Rejections")
|
||||
ax.set_ylabel("False Discovery Rate");
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "75b2d75c",
|
||||
"id": "687e9b80",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
@@ -14,7 +14,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "34e410a6",
|
||||
"id": "9f594a41",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We include our usual imports seen in earlier labs."
|
||||
@@ -23,13 +23,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "1f928b2d",
|
||||
"id": "7cc4fbeb",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:48.489971Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:48.489727Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.216508Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.215573Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:16.417394Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:16.417287Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.613483Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.613156Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -43,7 +43,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "12319e0a",
|
||||
"id": "08ba7bed",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We also collect the new imports\n",
|
||||
@@ -53,13 +53,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "eb4b32aa",
|
||||
"id": "595efc18",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.219989Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.219429Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.223239Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.222392Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.615551Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.615375Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.617379Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.617087Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -78,7 +78,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a2747e58",
|
||||
"id": "69e5023e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Review of Hypothesis Tests\n",
|
||||
@@ -92,13 +92,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "e12ac0cd",
|
||||
"id": "985d1d6e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.225454Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.225335Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.228651Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.228301Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.618995Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.618887Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.620921Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.620629Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -111,7 +111,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "70d37233",
|
||||
"id": "9ae561c4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To begin, we use `ttest_1samp()` from the\n",
|
||||
@@ -122,13 +122,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "04d0f49e",
|
||||
"id": "753d612a",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.230854Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.230727Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.236745Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.236388Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.622537Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.622429Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.626063Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.625801Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -150,7 +150,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cf83426f",
|
||||
"id": "5d9dc17f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The $p$-value comes out to 0.931, which is not low enough to\n",
|
||||
@@ -169,13 +169,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "d1f0c695",
|
||||
"id": "facd6569",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.238822Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.238685Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.263876Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.263508Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.627714Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.627617Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.651726Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.651448Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -195,7 +195,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3d8e0d96",
|
||||
"id": "4094daa7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Since this is a simulated data set, we can create a $2 \\times 2$ table\n",
|
||||
@@ -205,13 +205,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "7a9594a0",
|
||||
"id": "e89ef3eb",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.266708Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.266387Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.280831Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.280194Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.653344Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.653256Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.662644Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.662346Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -282,7 +282,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9610c817",
|
||||
"id": "a10ba423",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Therefore, at level $\\alpha=0.05$, we reject 15 of the 50 false\n",
|
||||
@@ -299,19 +299,20 @@
|
||||
"amounts to quite a weak signal, and it resulted in a high number of\n",
|
||||
"Type II errors. Let’s instead simulate data with a stronger signal,\n",
|
||||
"so that the ratio of the mean to the standard deviation for the false\n",
|
||||
"null hypotheses equals $1$. We make only 10 Type II errors.\n"
|
||||
"null hypotheses equals $1$. We make only 10 Type II errors.\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "25f7fc5d",
|
||||
"id": "ae184aaf",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.283336Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.283198Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.317664Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.317356Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.664327Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.664213Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.690928Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.690657Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -394,7 +395,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bb70c597",
|
||||
"id": "7ca15d3f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" "
|
||||
@@ -402,7 +403,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f6953d33",
|
||||
"id": "9e9f5977",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Family-Wise Error Rate\n",
|
||||
@@ -417,13 +418,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "369b5bd3",
|
||||
"id": "0295fe68",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.319399Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.319268Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.674219Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.673887Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.692568Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.692459Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.899403Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.899081Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -454,7 +455,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3a81479e",
|
||||
"id": "fecaca9e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As discussed previously, even for moderate values of $m$ such as $50$,\n",
|
||||
@@ -473,13 +474,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "9ce7a19f",
|
||||
"id": "406e59a8",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.676029Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.675909Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.715451Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.715145Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.901146Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.901041Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.939312Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.939019Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -505,7 +506,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7561e3a3",
|
||||
"id": "87bab88b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The $p$-values are low for Managers One and Three, and high for the\n",
|
||||
@@ -530,7 +531,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5b608e46",
|
||||
"id": "d0c7a2a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `mult_test()` function takes $p$-values and a `method` argument, as well as an optional\n",
|
||||
@@ -541,13 +542,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "de6cffed",
|
||||
"id": "d4f6a247",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.717476Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.717258Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.719841Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.719577Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.941032Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.940919Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.943369Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.943081Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -570,7 +571,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5135c6b9",
|
||||
"id": "4d5bc7e7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The $p$-values `bonf` are simply the `fund_mini_pvalues` multiplied by 5 and truncated to be less than\n",
|
||||
@@ -580,13 +581,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "0de71500",
|
||||
"id": "01a29d71",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.721450Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.721342Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.723962Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.723691Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.944859Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.944760Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.946888Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.946639Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -608,7 +609,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1f0bc112",
|
||||
"id": "e68c9051",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Therefore, using Bonferroni’s method, we are able to reject the null hypothesis only for Manager\n",
|
||||
@@ -622,13 +623,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "f7e87bdb",
|
||||
"id": "95454eb4",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.725450Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.725349Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.777055Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.776612Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.948474Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.948372Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.990740Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.990464Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -651,23 +652,24 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f762fecd",
|
||||
"id": "587b5b48",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As discussed previously, Manager One seems to perform particularly\n",
|
||||
"well, whereas Manager Two has poor performance."
|
||||
"well, whereas Manager Two has poor performance.\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "e88be376",
|
||||
"id": "1f1ac764",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.779763Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.779554Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.783798Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.783418Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.992261Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.992149Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.995141Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.994894Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -694,7 +696,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "88dbf0a6",
|
||||
"id": "e3c4b716",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Is there evidence of a meaningful difference in performance between\n",
|
||||
@@ -705,13 +707,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "41149af6",
|
||||
"id": "298d975d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.786752Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.786580Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:50.791095Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:50.790607Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:17.996686Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:17.996590Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:17.999332Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:17.999076Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -733,7 +735,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1aca6122",
|
||||
"id": "3908d7d2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The test results in a $p$-value of 0.038,\n",
|
||||
@@ -757,13 +759,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "61aabda7",
|
||||
"id": "be117713",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:50.793577Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:50.793394Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:51.318999Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:51.318071Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:18.000853Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:18.000747Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:18.487357Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:18.487078Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -799,7 +801,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e0084fc5",
|
||||
"id": "0fdf963f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `pairwise_tukeyhsd()` function provides confidence intervals\n",
|
||||
@@ -817,13 +819,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "cbcad4de",
|
||||
"id": "537c4ea8",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:51.321200Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:51.321046Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:51.411142Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:51.410682Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:18.489069Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:18.488949Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:18.570869Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:18.570427Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -845,7 +847,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6278d13c",
|
||||
"id": "8590f246",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## False Discovery Rate\n",
|
||||
@@ -858,13 +860,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "b5842190",
|
||||
"id": "2c88ec87",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:51.413331Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:51.413176Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:51.848427Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:51.847956Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:18.572454Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:18.572341Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:19.005707Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:19.005387Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -876,7 +878,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "80fc2fcc",
|
||||
"id": "80e77fab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are far too many managers to consider trying to control the FWER.\n",
|
||||
@@ -887,13 +889,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "7c9d8bed",
|
||||
"id": "b6d56819",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:51.850663Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:51.850523Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:51.854777Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:51.854196Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:19.007847Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:19.007564Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:19.010742Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:19.010371Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -916,7 +918,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4f73096d",
|
||||
"id": "b4662444",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The *q-values* output by the\n",
|
||||
@@ -932,13 +934,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "bfa39f7c",
|
||||
"id": "b00da3a1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:51.856795Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:51.856678Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:51.859719Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:51.859327Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:19.012400Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:19.012298Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:19.015314Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:19.014978Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -960,7 +962,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ccb44c8d",
|
||||
"id": "fdccb808",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We find that 146 of the 2,000 fund managers have a $q$-value below\n",
|
||||
@@ -976,13 +978,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"id": "70b69b47",
|
||||
"id": "1c230117",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:51.861924Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:51.861522Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:51.864394Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:51.863987Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:19.016857Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:19.016769Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:19.019332Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:19.019032Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1004,7 +1006,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c8a969f4",
|
||||
"id": "6112239d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Figure 13.6 displays the ordered\n",
|
||||
@@ -1026,13 +1028,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"id": "4c0ddea1",
|
||||
"id": "62289650",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:51.865978Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:51.865869Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:51.868792Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:51.868357Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:19.021112Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:19.020904Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:19.023622Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:19.023338Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1051,7 +1053,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ddeb3900",
|
||||
"id": "c36b13b7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We now reproduce the middle panel of Figure 13.6."
|
||||
@@ -1060,13 +1062,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "0314eac9",
|
||||
"id": "18b3c0ed",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:51.871473Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:51.871214Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:52.126671Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:52.126261Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:19.025191Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:19.025074Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:19.262207Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:19.261823Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1096,7 +1098,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "83416f4a",
|
||||
"id": "d87198e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## A Re-Sampling Approach\n",
|
||||
@@ -1110,13 +1112,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"id": "b59b8137",
|
||||
"id": "eb79e606",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:52.129167Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:52.128842Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:52.208320Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:52.207936Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:19.264174Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:19.264030Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:19.339232Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:19.338912Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1145,7 +1147,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5534c8d4",
|
||||
"id": "659ee2b8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There are four classes of cancer. For each gene, we compare the mean\n",
|
||||
@@ -1161,13 +1163,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "96fb2f61",
|
||||
"id": "1afbcf47",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:52.210235Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:52.210101Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:52.215004Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:52.214604Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:19.341009Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:19.340889Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:19.344670Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:19.344391Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1195,7 +1197,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3131124e",
|
||||
"id": "61f24919",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"However, this $p$-value relies on the assumption that under the null\n",
|
||||
@@ -1214,13 +1216,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "fdc229fa",
|
||||
"id": "f73f4c6d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:52.217206Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:52.217085Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:54.776066Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:54.775642Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:19.346368Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:19.346227Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:21.776569Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:21.776267Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
@@ -1253,7 +1255,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c7fc4557",
|
||||
"id": "a97f74af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This fraction, 0.0398,\n",
|
||||
@@ -1265,13 +1267,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "e3894695",
|
||||
"id": "062daf19",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:54.778563Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:54.778388Z",
|
||||
"iopub.status.idle": "2023-08-07T00:19:55.017161Z",
|
||||
"shell.execute_reply": "2023-08-07T00:19:55.016821Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:21.778366Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:21.778242Z",
|
||||
"iopub.status.idle": "2023-08-21T02:29:21.990476Z",
|
||||
"shell.execute_reply": "2023-08-21T02:29:21.989965Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -1307,7 +1309,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3bd21158",
|
||||
"id": "e81b939b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The re-sampling-based null distribution is almost identical to the theoretical null distribution, which is displayed in red.\n",
|
||||
@@ -1325,13 +1327,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "3b7392cb",
|
||||
"id": "6d14fcad",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:19:55.019036Z",
|
||||
"iopub.status.busy": "2023-08-07T00:19:55.018920Z",
|
||||
"iopub.status.idle": "2023-08-07T00:39:19.291005Z",
|
||||
"shell.execute_reply": "2023-08-07T00:39:19.287314Z"
|
||||
"iopub.execute_input": "2023-08-21T02:29:21.992665Z",
|
||||
"iopub.status.busy": "2023-08-21T02:29:21.992515Z",
|
||||
"iopub.status.idle": "2023-08-21T02:34:05.930300Z",
|
||||
"shell.execute_reply": "2023-08-21T02:34:05.929181Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1358,7 +1360,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b92df1b",
|
||||
"id": "06286699",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we compute the number of rejected null hypotheses $R$, the\n",
|
||||
@@ -1371,13 +1373,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "cac15616",
|
||||
"id": "8f0ec909",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:39:19.314420Z",
|
||||
"iopub.status.busy": "2023-08-07T00:39:19.313452Z",
|
||||
"iopub.status.idle": "2023-08-07T00:39:19.544251Z",
|
||||
"shell.execute_reply": "2023-08-07T00:39:19.543932Z"
|
||||
"iopub.execute_input": "2023-08-21T02:34:05.935513Z",
|
||||
"iopub.status.busy": "2023-08-21T02:34:05.935323Z",
|
||||
"iopub.status.idle": "2023-08-21T02:34:06.118079Z",
|
||||
"shell.execute_reply": "2023-08-21T02:34:06.117633Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1394,7 +1396,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f6779ea0",
|
||||
"id": "e26b64c6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, for any given FDR, we can find the genes that will be\n",
|
||||
@@ -1412,13 +1414,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "9661eb10",
|
||||
"id": "f11339e5",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:39:19.546693Z",
|
||||
"iopub.status.busy": "2023-08-07T00:39:19.546543Z",
|
||||
"iopub.status.idle": "2023-08-07T00:39:19.549970Z",
|
||||
"shell.execute_reply": "2023-08-07T00:39:19.549697Z"
|
||||
"iopub.execute_input": "2023-08-21T02:34:06.120138Z",
|
||||
"iopub.status.busy": "2023-08-21T02:34:06.119994Z",
|
||||
"iopub.status.idle": "2023-08-21T02:34:06.123846Z",
|
||||
"shell.execute_reply": "2023-08-21T02:34:06.123478Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1457,7 +1459,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "001e3fc1",
|
||||
"id": "e145621b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"At an FDR threshold of 0.2, more genes are selected, at the cost of having a higher expected\n",
|
||||
@@ -1467,13 +1469,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "18ad4900",
|
||||
"id": "d2600773",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:39:19.552090Z",
|
||||
"iopub.status.busy": "2023-08-07T00:39:19.552004Z",
|
||||
"iopub.status.idle": "2023-08-07T00:39:19.554743Z",
|
||||
"shell.execute_reply": "2023-08-07T00:39:19.554473Z"
|
||||
"iopub.execute_input": "2023-08-21T02:34:06.126460Z",
|
||||
"iopub.status.busy": "2023-08-21T02:34:06.126346Z",
|
||||
"iopub.status.idle": "2023-08-21T02:34:06.129561Z",
|
||||
"shell.execute_reply": "2023-08-21T02:34:06.129124Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1524,7 +1526,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8767f70c",
|
||||
"id": "32e600ff",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The next line generates Figure 13.11, which is similar\n",
|
||||
@@ -1535,13 +1537,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "28c276b6",
|
||||
"id": "924b7705",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-08-07T00:39:19.556715Z",
|
||||
"iopub.status.busy": "2023-08-07T00:39:19.556515Z",
|
||||
"iopub.status.idle": "2023-08-07T00:39:19.650514Z",
|
||||
"shell.execute_reply": "2023-08-07T00:39:19.650181Z"
|
||||
"iopub.execute_input": "2023-08-21T02:34:06.131323Z",
|
||||
"iopub.status.busy": "2023-08-21T02:34:06.131207Z",
|
||||
"iopub.status.idle": "2023-08-21T02:34:06.216626Z",
|
||||
"shell.execute_reply": "2023-08-21T02:34:06.216270Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
@@ -1566,10 +1568,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e4b5d621",
|
||||
"id": "b9f54695",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n"
|
||||
" \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -1,25 +1,23 @@
|
||||
# ISLP_labs
|
||||
|
||||
[](https://mybinder.org/v2/gh/intro-stat-learning/ISLP_labs/v2)
|
||||
[](https://mybinder.org/v2/gh/intro-stat-learning/ISLP_labs/v2.1)
|
||||
|
||||
|
||||
Up-to-date version of labs for ISLP.
|
||||
|
||||
This repo will track labs for ISLP as their source code changes. The
|
||||
intent is that building a conda environment with
|
||||
`requirements.txt` will reproduce
|
||||
the results in this repo.
|
||||
`requirements.txt` will reproduce the results in this repo.
|
||||
|
||||
To install the current version of the requirements run
|
||||
|
||||
```
|
||||
pip install -r https://raw.githubusercontent.com/intro-stat-learning/ISLP_labs/v2/requirements.txt;
|
||||
pip install -r https://raw.githubusercontent.com/intro-stat-learning/ISLP_labs/v2.1/requirements.txt;
|
||||
```
|
||||
|
||||
The labs can now be run from this directory:
|
||||
|
||||
```
|
||||
cd notebook;
|
||||
jupyter lab Ch02-statlearning-lab.ipynb
|
||||
```
|
||||
|
||||
|
||||
@@ -1,17 +1,16 @@
|
||||
numpy>=1.7.1
|
||||
scipy>=0.9
|
||||
pandas>=0.20
|
||||
pandas<=1.9
|
||||
lxml # pandas needs this for html
|
||||
scikit-learn>=1.2
|
||||
joblib
|
||||
statsmodels>=0.13
|
||||
lifelines
|
||||
pygam # for GAM in Ch7
|
||||
l0bnb==1.0.0 # for bestsubsets
|
||||
ISLP
|
||||
torch
|
||||
pytorch_lightning
|
||||
torchmetrics
|
||||
torchvision
|
||||
torchinfo
|
||||
numpy==1.24.2
|
||||
scipy==1.11.1
|
||||
pandas==1.5.3
|
||||
lxml==4.9.3
|
||||
scikit-learn==1.3.0
|
||||
joblib==1.3.1
|
||||
statsmodels==0.14.0
|
||||
lifelines==0.27.7
|
||||
pygam==0.9.0
|
||||
l0bnb==1.0.0
|
||||
torch==2.0.1
|
||||
torchvision==0.15.2
|
||||
pytorch-lightning==2.0.6
|
||||
torchinfo==1.8.0
|
||||
torchmetrics==1.0.1
|
||||
ISLP==0.3.19
|
||||
|
||||
Reference in New Issue
Block a user