* change to README

* update README

* frozen versions for v2

* frozen requirements, ISLP==0.3.19

* v2.1 notebooks excluding 10,13

* v2.1 of Ch13

* pairing notebooks

* v2.1 of Ch10

* update README for v2.1

* update README

* remove workflows: notebooks can be tested in ISLP
This commit is contained in:
Jonathan Taylor
2023-08-20 20:48:30 -07:00
committed by GitHub
parent 065a1ae993
commit bec719e809
34 changed files with 3978 additions and 4481 deletions

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch02 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch02 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch02-statlearn-lab.ipynb --allow-errors

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch03 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch03 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch03*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch04 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch04 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch04*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch05 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch05 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch05*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch06 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch06 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch06*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch07 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch07 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch07*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch08 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch08 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch08*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch09 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch09 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch09*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch10 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch10 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch10*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch11 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch11 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch11*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch12 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch12 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch12*ipynb

View File

@@ -1,40 +0,0 @@
# This is a basic workflow to help you get started with Actions
name: Building Ch13 notebook
# Controls when the workflow will run
on:
workflow_dispatch:
environment:
description: 'Environment to run tests against'
type: environment
required: true
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "build"
build:
# The type of runner that the job will run on
runs-on: ubuntu-latest
# Steps represent a sequence of tasks that will be executed as part of the job
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
with:
python-version: '3.10'
cache: 'pip'
# Install
- name: Install dependencies
run: |
pip install -r requirements.txt
pip install ISLP jupyter
# Runs a set of commands using the runners shell
- name: Build notebook for Ch13 (some errors, not really a test)
run: |
jupyter nbconvert --execute --inplace Ch13*ipynb

File diff suppressed because it is too large Load Diff

View File

@@ -15,8 +15,6 @@ jupyter:
# Chapter 3
# Lab: Linear Regression
## Importing packages
@@ -277,7 +275,7 @@ value of 25.05 for `medv` when `lstat` equals
10), but the latter are substantially wider.
Next we will plot `medv` and `lstat`
using `DataFrame.plot.scatter()`,
using `DataFrame.plot.scatter()`, \definelongblankMR{plot.scatter()}{plot.slashslashscatter()}
and wish to
add the regression line to the resulting plot.
@@ -399,7 +397,7 @@ Notice how we have compacted the first line into a succinct expression describin
The `Boston` data set contains 12 variables, and so it would be cumbersome
to have to type all of these in order to perform a regression using all of the predictors.
Instead, we can use the following short-hand:
Instead, we can use the following short-hand:\definelongblankMR{columns.drop()}{columns.slashslashdrop()}
```{python}
terms = Boston.columns.drop('medv')

File diff suppressed because it is too large Load Diff

View File

@@ -807,7 +807,7 @@ feature_std.std()
```
Notice that the standard deviations are not quite $1$ here; this is again due to some procedures using the $1/n$ convention for variances (in this case `scaler()`), while others use $1/(n-1)$ (the `std()` method). See the footnote on page 103.
Notice that the standard deviations are not quite $1$ here; this is again due to some procedures using the $1/n$ convention for variances (in this case `scaler()`), while others use $1/(n-1)$ (the `std()` method). See the footnote on page 200.
In this case it does not matter, as long as the variables are all on the same scale.
Using the function `train_test_split()` we now split the observations into a test set,

File diff suppressed because it is too large Load Diff

View File

@@ -518,13 +518,13 @@ slope. Interestingly, these are somewhat different from the estimates
obtained using the bootstrap. Does this indicate a problem with the
bootstrap? In fact, it suggests the opposite. Recall that the
standard formulas given in
{Equation 3.8 on page 80}
{Equation 3.8 on page 82}
rely on certain assumptions. For example,
they depend on the unknown parameter $\sigma^2$, the noise
variance. We then estimate $\sigma^2$ using the RSS. Now although the
formula for the standard errors do not rely on the linear model being
correct, the estimate for $\sigma^2$ does. We see
{in Figure 3.8 on page 106} that there is
{in Figure 3.8 on page 108} that there is
a non-linear relationship in the data, and so the residuals from a
linear fit will be inflated, and so will $\hat{\sigma}^2$. Secondly,
the standard formulas assume (somewhat unrealistically) that the $x_i$

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "3a3f2f85",
"id": "85ad9863",
"metadata": {},
"source": [
"\n",
@@ -12,7 +12,7 @@
},
{
"cell_type": "markdown",
"id": "bb22af17",
"id": "ac8b08af",
"metadata": {},
"source": [
"# Lab: Cross-Validation and the Bootstrap\n",
@@ -26,13 +26,13 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "60fad148",
"id": "e7712cfe",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:37.622425Z",
"iopub.status.busy": "2023-08-07T00:18:37.621828Z",
"iopub.status.idle": "2023-08-07T00:18:38.459128Z",
"shell.execute_reply": "2023-08-07T00:18:38.458689Z"
"iopub.execute_input": "2023-08-21T02:29:01.252458Z",
"iopub.status.busy": "2023-08-21T02:29:01.251970Z",
"iopub.status.idle": "2023-08-21T02:29:02.044045Z",
"shell.execute_reply": "2023-08-21T02:29:02.043730Z"
},
"lines_to_next_cell": 2
},
@@ -49,7 +49,7 @@
},
{
"cell_type": "markdown",
"id": "78fcfe7a",
"id": "784a2ba3",
"metadata": {},
"source": [
"There are several new imports needed for this lab."
@@ -58,13 +58,13 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "2478aeb4",
"id": "21c2ed4f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:38.461290Z",
"iopub.status.busy": "2023-08-07T00:18:38.461070Z",
"iopub.status.idle": "2023-08-07T00:18:38.463158Z",
"shell.execute_reply": "2023-08-07T00:18:38.462899Z"
"iopub.execute_input": "2023-08-21T02:29:02.045927Z",
"iopub.status.busy": "2023-08-21T02:29:02.045761Z",
"iopub.status.idle": "2023-08-21T02:29:02.047761Z",
"shell.execute_reply": "2023-08-21T02:29:02.047491Z"
},
"lines_to_next_cell": 2
},
@@ -81,7 +81,7 @@
},
{
"cell_type": "markdown",
"id": "713d30db",
"id": "9ac3acd5",
"metadata": {},
"source": [
"## The Validation Set Approach\n",
@@ -102,13 +102,13 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "99c95faf",
"id": "8af59641",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:38.464725Z",
"iopub.status.busy": "2023-08-07T00:18:38.464616Z",
"iopub.status.idle": "2023-08-07T00:18:38.472566Z",
"shell.execute_reply": "2023-08-07T00:18:38.472315Z"
"iopub.execute_input": "2023-08-21T02:29:02.049239Z",
"iopub.status.busy": "2023-08-21T02:29:02.049145Z",
"iopub.status.idle": "2023-08-21T02:29:02.055524Z",
"shell.execute_reply": "2023-08-21T02:29:02.055162Z"
}
},
"outputs": [],
@@ -121,7 +121,7 @@
},
{
"cell_type": "markdown",
"id": "57be35df",
"id": "e76383f0",
"metadata": {},
"source": [
"Now we can fit a linear regression using only the observations corresponding to the training set `Auto_train`."
@@ -130,13 +130,13 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "41b0717d",
"id": "d9b0b7c8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:38.474061Z",
"iopub.status.busy": "2023-08-07T00:18:38.473957Z",
"iopub.status.idle": "2023-08-07T00:18:38.477686Z",
"shell.execute_reply": "2023-08-07T00:18:38.477432Z"
"iopub.execute_input": "2023-08-21T02:29:02.057278Z",
"iopub.status.busy": "2023-08-21T02:29:02.057182Z",
"iopub.status.idle": "2023-08-21T02:29:02.062537Z",
"shell.execute_reply": "2023-08-21T02:29:02.062265Z"
}
},
"outputs": [],
@@ -150,7 +150,7 @@
},
{
"cell_type": "markdown",
"id": "7f1bef95",
"id": "d196dd08",
"metadata": {},
"source": [
"We now use the `predict()` method of `results` evaluated on the model matrix for this model\n",
@@ -160,13 +160,13 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "d7ea3c0d",
"id": "3e77d831",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:38.479141Z",
"iopub.status.busy": "2023-08-07T00:18:38.479053Z",
"iopub.status.idle": "2023-08-07T00:18:38.483270Z",
"shell.execute_reply": "2023-08-07T00:18:38.483037Z"
"iopub.execute_input": "2023-08-21T02:29:02.064056Z",
"iopub.status.busy": "2023-08-21T02:29:02.063966Z",
"iopub.status.idle": "2023-08-21T02:29:02.068279Z",
"shell.execute_reply": "2023-08-21T02:29:02.068024Z"
}
},
"outputs": [
@@ -190,7 +190,7 @@
},
{
"cell_type": "markdown",
"id": "6dba5d55",
"id": "f4369ee6",
"metadata": {},
"source": [
"Hence our estimate for the validation MSE of the linear regression\n",
@@ -204,13 +204,13 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "a02a2d05",
"id": "0aa4bfcc",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:38.484782Z",
"iopub.status.busy": "2023-08-07T00:18:38.484689Z",
"iopub.status.idle": "2023-08-07T00:18:38.486891Z",
"shell.execute_reply": "2023-08-07T00:18:38.486642Z"
"iopub.execute_input": "2023-08-21T02:29:02.069789Z",
"iopub.status.busy": "2023-08-21T02:29:02.069682Z",
"iopub.status.idle": "2023-08-21T02:29:02.071953Z",
"shell.execute_reply": "2023-08-21T02:29:02.071703Z"
}
},
"outputs": [],
@@ -235,7 +235,7 @@
},
{
"cell_type": "markdown",
"id": "39ab59b1",
"id": "0271dc50",
"metadata": {},
"source": [
"Lets use this function to estimate the validation MSE\n",
@@ -247,13 +247,13 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "51d93dea",
"id": "a0dbd55f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:38.488297Z",
"iopub.status.busy": "2023-08-07T00:18:38.488205Z",
"iopub.status.idle": "2023-08-07T00:18:38.497955Z",
"shell.execute_reply": "2023-08-07T00:18:38.497708Z"
"iopub.execute_input": "2023-08-21T02:29:02.073322Z",
"iopub.status.busy": "2023-08-21T02:29:02.073229Z",
"iopub.status.idle": "2023-08-21T02:29:02.088464Z",
"shell.execute_reply": "2023-08-21T02:29:02.088192Z"
}
},
"outputs": [
@@ -280,7 +280,7 @@
},
{
"cell_type": "markdown",
"id": "936e168a",
"id": "a7401536",
"metadata": {},
"source": [
"These error rates are $23.62, 18.76$, and $18.80$, respectively. If we\n",
@@ -291,13 +291,13 @@
{
"cell_type": "code",
"execution_count": 8,
"id": "83432f06",
"id": "885136a4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:38.499478Z",
"iopub.status.busy": "2023-08-07T00:18:38.499391Z",
"iopub.status.idle": "2023-08-07T00:18:38.509735Z",
"shell.execute_reply": "2023-08-07T00:18:38.509466Z"
"iopub.execute_input": "2023-08-21T02:29:02.089889Z",
"iopub.status.busy": "2023-08-21T02:29:02.089804Z",
"iopub.status.idle": "2023-08-21T02:29:02.105353Z",
"shell.execute_reply": "2023-08-21T02:29:02.105089Z"
}
},
"outputs": [
@@ -327,7 +327,7 @@
},
{
"cell_type": "markdown",
"id": "f5ceb357",
"id": "00785402",
"metadata": {},
"source": [
"Using this split of the observations into a training set and a validation set,\n",
@@ -341,7 +341,7 @@
},
{
"cell_type": "markdown",
"id": "6d624a5c",
"id": "21c071b8",
"metadata": {},
"source": [
"## Cross-Validation\n",
@@ -374,13 +374,13 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "bcfc433f",
"id": "6d957d8c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:38.511210Z",
"iopub.status.busy": "2023-08-07T00:18:38.511122Z",
"iopub.status.idle": "2023-08-07T00:18:39.069624Z",
"shell.execute_reply": "2023-08-07T00:18:39.069325Z"
"iopub.execute_input": "2023-08-21T02:29:02.106979Z",
"iopub.status.busy": "2023-08-21T02:29:02.106884Z",
"iopub.status.idle": "2023-08-21T02:29:03.184550Z",
"shell.execute_reply": "2023-08-21T02:29:03.184259Z"
},
"lines_to_next_cell": 0
},
@@ -410,7 +410,7 @@
},
{
"cell_type": "markdown",
"id": "5b0f6f30",
"id": "c17e2bc8",
"metadata": {},
"source": [
"The arguments to `cross_validate()` are as follows: an\n",
@@ -426,7 +426,7 @@
},
{
"cell_type": "markdown",
"id": "b527f67f",
"id": "5c7901f2",
"metadata": {},
"source": [
"We can repeat this procedure for increasingly complex polynomial fits.\n",
@@ -442,13 +442,13 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "f951ffc8",
"id": "e2b5ce95",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.071240Z",
"iopub.status.busy": "2023-08-07T00:18:39.071138Z",
"iopub.status.idle": "2023-08-07T00:18:39.674084Z",
"shell.execute_reply": "2023-08-07T00:18:39.673774Z"
"iopub.execute_input": "2023-08-21T02:29:03.186226Z",
"iopub.status.busy": "2023-08-21T02:29:03.186108Z",
"iopub.status.idle": "2023-08-21T02:29:03.782413Z",
"shell.execute_reply": "2023-08-21T02:29:03.782122Z"
},
"lines_to_next_cell": 0
},
@@ -480,7 +480,7 @@
},
{
"cell_type": "markdown",
"id": "792f1304",
"id": "03706248",
"metadata": {},
"source": [
"As in Figure 5.4, we see a sharp drop in the estimated test MSE between the linear and\n",
@@ -499,13 +499,13 @@
{
"cell_type": "code",
"execution_count": 11,
"id": "e3610b5a",
"id": "1dda1bd7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.675725Z",
"iopub.status.busy": "2023-08-07T00:18:39.675614Z",
"iopub.status.idle": "2023-08-07T00:18:39.678046Z",
"shell.execute_reply": "2023-08-07T00:18:39.677762Z"
"iopub.execute_input": "2023-08-21T02:29:03.783997Z",
"iopub.status.busy": "2023-08-21T02:29:03.783886Z",
"iopub.status.idle": "2023-08-21T02:29:03.786132Z",
"shell.execute_reply": "2023-08-21T02:29:03.785881Z"
}
},
"outputs": [
@@ -530,7 +530,7 @@
},
{
"cell_type": "markdown",
"id": "983625b2",
"id": "f5092f1b",
"metadata": {},
"source": [
"In the CV example above, we used $K=n$, but of course we can also use $K<n$. The code is very similar\n",
@@ -541,13 +541,13 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "1627460d",
"id": "fb25fa70",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.679517Z",
"iopub.status.busy": "2023-08-07T00:18:39.679423Z",
"iopub.status.idle": "2023-08-07T00:18:39.701200Z",
"shell.execute_reply": "2023-08-07T00:18:39.700946Z"
"iopub.execute_input": "2023-08-21T02:29:03.787622Z",
"iopub.status.busy": "2023-08-21T02:29:03.787525Z",
"iopub.status.idle": "2023-08-21T02:29:03.809671Z",
"shell.execute_reply": "2023-08-21T02:29:03.809398Z"
},
"lines_to_next_cell": 0
},
@@ -580,7 +580,7 @@
},
{
"cell_type": "markdown",
"id": "32bf6662",
"id": "c4ec6afb",
"metadata": {},
"source": [
"Notice that the computation time is much shorter than that of LOOCV.\n",
@@ -595,7 +595,7 @@
},
{
"cell_type": "markdown",
"id": "1e89127b",
"id": "5edf407f",
"metadata": {},
"source": [
"The `cross_validate()` function is flexible and can take\n",
@@ -606,13 +606,13 @@
{
"cell_type": "code",
"execution_count": 13,
"id": "8a636468",
"id": "d78795cd",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.702802Z",
"iopub.status.busy": "2023-08-07T00:18:39.702718Z",
"iopub.status.idle": "2023-08-07T00:18:39.708140Z",
"shell.execute_reply": "2023-08-07T00:18:39.707865Z"
"iopub.execute_input": "2023-08-21T02:29:03.811123Z",
"iopub.status.busy": "2023-08-21T02:29:03.811046Z",
"iopub.status.idle": "2023-08-21T02:29:03.817840Z",
"shell.execute_reply": "2023-08-21T02:29:03.817582Z"
},
"lines_to_next_cell": 2
},
@@ -641,7 +641,7 @@
},
{
"cell_type": "markdown",
"id": "2c0fb0d5",
"id": "a081be63",
"metadata": {},
"source": [
"One can estimate the variability in the test error by running the following:"
@@ -650,13 +650,13 @@
{
"cell_type": "code",
"execution_count": 14,
"id": "746aeccd",
"id": "0407ad56",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.709627Z",
"iopub.status.busy": "2023-08-07T00:18:39.709548Z",
"iopub.status.idle": "2023-08-07T00:18:39.729721Z",
"shell.execute_reply": "2023-08-07T00:18:39.729428Z"
"iopub.execute_input": "2023-08-21T02:29:03.819308Z",
"iopub.status.busy": "2023-08-21T02:29:03.819228Z",
"iopub.status.idle": "2023-08-21T02:29:03.851921Z",
"shell.execute_reply": "2023-08-21T02:29:03.851658Z"
}
},
"outputs": [
@@ -684,7 +684,7 @@
},
{
"cell_type": "markdown",
"id": "3310fe80",
"id": "b66db3cb",
"metadata": {},
"source": [
"Note that this standard deviation is not a valid estimate of the\n",
@@ -724,13 +724,13 @@
{
"cell_type": "code",
"execution_count": 15,
"id": "daa53d0c",
"id": "f04f15bd",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.731264Z",
"iopub.status.busy": "2023-08-07T00:18:39.731179Z",
"iopub.status.idle": "2023-08-07T00:18:39.734494Z",
"shell.execute_reply": "2023-08-07T00:18:39.734221Z"
"iopub.execute_input": "2023-08-21T02:29:03.853415Z",
"iopub.status.busy": "2023-08-21T02:29:03.853334Z",
"iopub.status.idle": "2023-08-21T02:29:03.857370Z",
"shell.execute_reply": "2023-08-21T02:29:03.857115Z"
},
"lines_to_next_cell": 0
},
@@ -745,7 +745,7 @@
},
{
"cell_type": "markdown",
"id": "fd439170",
"id": "c88bd6a4",
"metadata": {},
"source": [
"This function returns an estimate for $\\alpha$\n",
@@ -758,13 +758,13 @@
{
"cell_type": "code",
"execution_count": 16,
"id": "578c9564",
"id": "f98c0323",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.736147Z",
"iopub.status.busy": "2023-08-07T00:18:39.736062Z",
"iopub.status.idle": "2023-08-07T00:18:39.738776Z",
"shell.execute_reply": "2023-08-07T00:18:39.738545Z"
"iopub.execute_input": "2023-08-21T02:29:03.858828Z",
"iopub.status.busy": "2023-08-21T02:29:03.858753Z",
"iopub.status.idle": "2023-08-21T02:29:03.861443Z",
"shell.execute_reply": "2023-08-21T02:29:03.861198Z"
}
},
"outputs": [
@@ -785,7 +785,7 @@
},
{
"cell_type": "markdown",
"id": "cc18244c",
"id": "58a78f00",
"metadata": {},
"source": [
"Next we randomly select\n",
@@ -797,13 +797,13 @@
{
"cell_type": "code",
"execution_count": 17,
"id": "5754d6d5",
"id": "bcd40175",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.740183Z",
"iopub.status.busy": "2023-08-07T00:18:39.740108Z",
"iopub.status.idle": "2023-08-07T00:18:39.743599Z",
"shell.execute_reply": "2023-08-07T00:18:39.743267Z"
"iopub.execute_input": "2023-08-21T02:29:03.862933Z",
"iopub.status.busy": "2023-08-21T02:29:03.862830Z",
"iopub.status.idle": "2023-08-21T02:29:03.865766Z",
"shell.execute_reply": "2023-08-21T02:29:03.865514Z"
},
"lines_to_next_cell": 2
},
@@ -829,7 +829,7 @@
},
{
"cell_type": "markdown",
"id": "0e97e132",
"id": "e6058be4",
"metadata": {},
"source": [
"This process can be generalized to create a simple function `boot_SE()` for\n",
@@ -840,13 +840,13 @@
{
"cell_type": "code",
"execution_count": 18,
"id": "8320a49c",
"id": "ab6602cd",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.745013Z",
"iopub.status.busy": "2023-08-07T00:18:39.744924Z",
"iopub.status.idle": "2023-08-07T00:18:39.747163Z",
"shell.execute_reply": "2023-08-07T00:18:39.746928Z"
"iopub.execute_input": "2023-08-21T02:29:03.867170Z",
"iopub.status.busy": "2023-08-21T02:29:03.867072Z",
"iopub.status.idle": "2023-08-21T02:29:03.869326Z",
"shell.execute_reply": "2023-08-21T02:29:03.869094Z"
},
"lines_to_next_cell": 0
},
@@ -872,7 +872,7 @@
},
{
"cell_type": "markdown",
"id": "a1d25cfe",
"id": "d94d383e",
"metadata": {},
"source": [
"Notice the use of `_` as a loop variable in `for _ in range(B)`. This is often used if the value of the counter is\n",
@@ -885,13 +885,13 @@
{
"cell_type": "code",
"execution_count": 19,
"id": "e656aa1f",
"id": "4a323513",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:39.748642Z",
"iopub.status.busy": "2023-08-07T00:18:39.748543Z",
"iopub.status.idle": "2023-08-07T00:18:40.034488Z",
"shell.execute_reply": "2023-08-07T00:18:40.034215Z"
"iopub.execute_input": "2023-08-21T02:29:03.870755Z",
"iopub.status.busy": "2023-08-21T02:29:03.870664Z",
"iopub.status.idle": "2023-08-21T02:29:04.157907Z",
"shell.execute_reply": "2023-08-21T02:29:04.157623Z"
}
},
"outputs": [
@@ -916,7 +916,7 @@
},
{
"cell_type": "markdown",
"id": "258ccf67",
"id": "22343f53",
"metadata": {},
"source": [
"The final output shows that the bootstrap estimate for ${\\rm SE}(\\hat{\\alpha})$ is $0.0912$.\n",
@@ -951,13 +951,13 @@
{
"cell_type": "code",
"execution_count": 20,
"id": "c5d14195",
"id": "0220f3af",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:40.036061Z",
"iopub.status.busy": "2023-08-07T00:18:40.035977Z",
"iopub.status.idle": "2023-08-07T00:18:40.037907Z",
"shell.execute_reply": "2023-08-07T00:18:40.037662Z"
"iopub.execute_input": "2023-08-21T02:29:04.159500Z",
"iopub.status.busy": "2023-08-21T02:29:04.159419Z",
"iopub.status.idle": "2023-08-21T02:29:04.161332Z",
"shell.execute_reply": "2023-08-21T02:29:04.161073Z"
},
"lines_to_next_cell": 0
},
@@ -972,7 +972,7 @@
},
{
"cell_type": "markdown",
"id": "89a6fb3e",
"id": "df0c7f05",
"metadata": {},
"source": [
"This is not quite what is needed as the first argument to\n",
@@ -986,13 +986,13 @@
{
"cell_type": "code",
"execution_count": 21,
"id": "7e0523f0",
"id": "62037dcb",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:40.039299Z",
"iopub.status.busy": "2023-08-07T00:18:40.039208Z",
"iopub.status.idle": "2023-08-07T00:18:40.040837Z",
"shell.execute_reply": "2023-08-07T00:18:40.040599Z"
"iopub.execute_input": "2023-08-21T02:29:04.162950Z",
"iopub.status.busy": "2023-08-21T02:29:04.162849Z",
"iopub.status.idle": "2023-08-21T02:29:04.164486Z",
"shell.execute_reply": "2023-08-21T02:29:04.164241Z"
},
"lines_to_next_cell": 0
},
@@ -1003,7 +1003,7 @@
},
{
"cell_type": "markdown",
"id": "4d8f9f61",
"id": "61fbe248",
"metadata": {},
"source": [
"Typing `hp_func?` will show that it has two arguments `D`\n",
@@ -1019,13 +1019,13 @@
{
"cell_type": "code",
"execution_count": 22,
"id": "32836e93",
"id": "b8bdb7a4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:40.042164Z",
"iopub.status.busy": "2023-08-07T00:18:40.042091Z",
"iopub.status.idle": "2023-08-07T00:18:40.056730Z",
"shell.execute_reply": "2023-08-07T00:18:40.056480Z"
"iopub.execute_input": "2023-08-21T02:29:04.165879Z",
"iopub.status.busy": "2023-08-21T02:29:04.165798Z",
"iopub.status.idle": "2023-08-21T02:29:04.194029Z",
"shell.execute_reply": "2023-08-21T02:29:04.193764Z"
},
"lines_to_next_cell": 0
},
@@ -1060,7 +1060,7 @@
},
{
"cell_type": "markdown",
"id": "aa8cae71",
"id": "2a831036",
"metadata": {},
"source": [
"Next, we use the `boot_SE()` {} function to compute the standard\n",
@@ -1070,13 +1070,13 @@
{
"cell_type": "code",
"execution_count": 23,
"id": "14ce3afa",
"id": "36808258",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:40.058168Z",
"iopub.status.busy": "2023-08-07T00:18:40.058092Z",
"iopub.status.idle": "2023-08-07T00:18:41.197103Z",
"shell.execute_reply": "2023-08-07T00:18:41.196820Z"
"iopub.execute_input": "2023-08-21T02:29:04.195612Z",
"iopub.status.busy": "2023-08-21T02:29:04.195529Z",
"iopub.status.idle": "2023-08-21T02:29:06.747175Z",
"shell.execute_reply": "2023-08-21T02:29:06.746638Z"
},
"lines_to_next_cell": 2
},
@@ -1104,7 +1104,7 @@
},
{
"cell_type": "markdown",
"id": "1d0db4c6",
"id": "38c65fbf",
"metadata": {},
"source": [
"This indicates that the bootstrap estimate for ${\\rm SE}(\\hat{\\beta}_0)$ is\n",
@@ -1120,13 +1120,13 @@
{
"cell_type": "code",
"execution_count": 24,
"id": "6b1213ac",
"id": "c9aea297",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:41.198611Z",
"iopub.status.busy": "2023-08-07T00:18:41.198528Z",
"iopub.status.idle": "2023-08-07T00:18:41.257926Z",
"shell.execute_reply": "2023-08-07T00:18:41.257642Z"
"iopub.execute_input": "2023-08-21T02:29:06.749614Z",
"iopub.status.busy": "2023-08-21T02:29:06.749433Z",
"iopub.status.idle": "2023-08-21T02:29:06.812583Z",
"shell.execute_reply": "2023-08-21T02:29:06.812298Z"
},
"lines_to_next_cell": 2
},
@@ -1152,7 +1152,7 @@
},
{
"cell_type": "markdown",
"id": "2b158ef6",
"id": "d870ad6b",
"metadata": {},
"source": [
"The standard error estimates for $\\hat{\\beta}_0$ and $\\hat{\\beta}_1$\n",
@@ -1164,13 +1164,13 @@
"obtained using the bootstrap. Does this indicate a problem with the\n",
"bootstrap? In fact, it suggests the opposite. Recall that the\n",
"standard formulas given in\n",
" {Equation 3.8 on page 80}\n",
" {Equation 3.8 on page 82}\n",
"rely on certain assumptions. For example,\n",
"they depend on the unknown parameter $\\sigma^2$, the noise\n",
"variance. We then estimate $\\sigma^2$ using the RSS. Now although the\n",
"formula for the standard errors do not rely on the linear model being\n",
"correct, the estimate for $\\sigma^2$ does. We see\n",
" {in Figure 3.8 on page 106} that there is\n",
" {in Figure 3.8 on page 108} that there is\n",
"a non-linear relationship in the data, and so the residuals from a\n",
"linear fit will be inflated, and so will $\\hat{\\sigma}^2$. Secondly,\n",
"the standard formulas assume (somewhat unrealistically) that the $x_i$\n",
@@ -1192,13 +1192,13 @@
{
"cell_type": "code",
"execution_count": 25,
"id": "af99b778",
"id": "79c56529",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:41.259623Z",
"iopub.status.busy": "2023-08-07T00:18:41.259482Z",
"iopub.status.idle": "2023-08-07T00:18:43.037184Z",
"shell.execute_reply": "2023-08-07T00:18:43.036911Z"
"iopub.execute_input": "2023-08-21T02:29:06.814267Z",
"iopub.status.busy": "2023-08-21T02:29:06.814125Z",
"iopub.status.idle": "2023-08-21T02:29:10.162177Z",
"shell.execute_reply": "2023-08-21T02:29:10.161855Z"
}
},
"outputs": [
@@ -1226,7 +1226,7 @@
},
{
"cell_type": "markdown",
"id": "1badcfd1",
"id": "9fccbbbd",
"metadata": {},
"source": [
"We compare the results to the standard errors computed using `sm.OLS()`."
@@ -1235,13 +1235,13 @@
{
"cell_type": "code",
"execution_count": 26,
"id": "0206281e",
"id": "4d0b4edc",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:18:43.038778Z",
"iopub.status.busy": "2023-08-07T00:18:43.038680Z",
"iopub.status.idle": "2023-08-07T00:18:43.046810Z",
"shell.execute_reply": "2023-08-07T00:18:43.046545Z"
"iopub.execute_input": "2023-08-21T02:29:10.163852Z",
"iopub.status.busy": "2023-08-21T02:29:10.163742Z",
"iopub.status.idle": "2023-08-21T02:29:10.173834Z",
"shell.execute_reply": "2023-08-21T02:29:10.173578Z"
},
"lines_to_next_cell": 0
},
@@ -1268,7 +1268,7 @@
},
{
"cell_type": "markdown",
"id": "0c11a71f",
"id": "9a86ff6e",
"metadata": {},
"source": [
"\n",

View File

@@ -540,7 +540,7 @@ grid.best_params_['ridge__alpha']
grid.best_estimator_
```
Recall we set up the `kfold` object for 5-fold cross-validation on page 296. We now plot the cross-validated MSE as a function of $-\log(\lambda)$, which has shrinkage decreasing from left
Recall we set up the `kfold` object for 5-fold cross-validation on page 298. We now plot the cross-validated MSE as a function of $-\log(\lambda)$, which has shrinkage decreasing from left
to right.
```{python}
@@ -728,7 +728,7 @@ ax.set_ylabel('Standardized coefficiients', fontsize=20);
```
The smallest cross-validated error is lower than the test set MSE of the null model
and of least squares, and very similar to the test MSE of 115526.71 of ridge
regression (page 303) with $\lambda$ chosen by cross-validation.
regression (page 305) with $\lambda$ chosen by cross-validation.
```{python}
np.min(tuned_lasso.mse_path_.mean(1))

File diff suppressed because it is too large Load Diff

View File

@@ -83,7 +83,7 @@ for feature transformations such as `PCA()` seen in Section 6.5.3) which
allows for easy evaluation of the polynomial at new data points. Here `poly()` is referred to as a *helper* function, and sets up the transformation; `Poly()` is the actual workhorse that computes the transformation. See also
the
discussion of transformations on
page 127.
page 129.
In the code above, the first line executes the `fit()` method
using the dataframe
@@ -748,7 +748,6 @@ ANOVA, $\mathcal{M}_2$ is preferred.
We can repeat the same process for `age` as well. We see there is very clear evidence that
a non-linear term is required for `age`.
\newpage
```{python}
gam_0 = LinearGAM(year_term +

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "d45c6d2b",
"id": "30b873f3",
"metadata": {},
"source": [
"\n",
@@ -12,7 +12,7 @@
},
{
"cell_type": "markdown",
"id": "94d1e03c",
"id": "3e06bca8",
"metadata": {},
"source": [
"# Lab: Support Vector Machines\n",
@@ -25,13 +25,13 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "eeaa5be0",
"id": "3973b95f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:27.947789Z",
"iopub.status.busy": "2023-08-07T00:19:27.947634Z",
"iopub.status.idle": "2023-08-07T00:19:28.991210Z",
"shell.execute_reply": "2023-08-07T00:19:28.990616Z"
"iopub.execute_input": "2023-08-21T02:29:58.477582Z",
"iopub.status.busy": "2023-08-21T02:29:58.477467Z",
"iopub.status.idle": "2023-08-21T02:29:59.432527Z",
"shell.execute_reply": "2023-08-21T02:29:59.432225Z"
},
"lines_to_next_cell": 0
},
@@ -45,7 +45,7 @@
},
{
"cell_type": "markdown",
"id": "26ebd377",
"id": "631b7d3d",
"metadata": {},
"source": [
"We also collect the new imports\n",
@@ -55,13 +55,13 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "41a59634",
"id": "0161e55e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:28.993557Z",
"iopub.status.busy": "2023-08-07T00:19:28.993245Z",
"iopub.status.idle": "2023-08-07T00:19:29.028199Z",
"shell.execute_reply": "2023-08-07T00:19:29.027857Z"
"iopub.execute_input": "2023-08-21T02:29:59.434432Z",
"iopub.status.busy": "2023-08-21T02:29:59.434258Z",
"iopub.status.idle": "2023-08-21T02:29:59.466972Z",
"shell.execute_reply": "2023-08-21T02:29:59.466647Z"
}
},
"outputs": [],
@@ -73,7 +73,7 @@
},
{
"cell_type": "markdown",
"id": "f197b846",
"id": "b397af05",
"metadata": {},
"source": [
"We will use the function `RocCurveDisplay.from_estimator()` to\n",
@@ -83,13 +83,13 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "c9a175d7",
"id": "7661b056",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.030225Z",
"iopub.status.busy": "2023-08-07T00:19:29.030097Z",
"iopub.status.idle": "2023-08-07T00:19:29.032026Z",
"shell.execute_reply": "2023-08-07T00:19:29.031756Z"
"iopub.execute_input": "2023-08-21T02:29:59.469128Z",
"iopub.status.busy": "2023-08-21T02:29:59.468999Z",
"iopub.status.idle": "2023-08-21T02:29:59.470961Z",
"shell.execute_reply": "2023-08-21T02:29:59.470667Z"
}
},
"outputs": [],
@@ -99,7 +99,7 @@
},
{
"cell_type": "markdown",
"id": "f666c212",
"id": "38115984",
"metadata": {},
"source": [
"## Support Vector Classifier\n",
@@ -123,13 +123,13 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "a7216b47",
"id": "46e9ab84",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.033695Z",
"iopub.status.busy": "2023-08-07T00:19:29.033581Z",
"iopub.status.idle": "2023-08-07T00:19:29.207161Z",
"shell.execute_reply": "2023-08-07T00:19:29.205980Z"
"iopub.execute_input": "2023-08-21T02:29:59.472867Z",
"iopub.status.busy": "2023-08-21T02:29:59.472726Z",
"iopub.status.idle": "2023-08-21T02:29:59.583508Z",
"shell.execute_reply": "2023-08-21T02:29:59.583126Z"
},
"lines_to_next_cell": 0
},
@@ -159,7 +159,7 @@
},
{
"cell_type": "markdown",
"id": "7b4aff06",
"id": "a9766d18",
"metadata": {},
"source": [
"They are not. We now fit the classifier."
@@ -168,13 +168,13 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "ed329198",
"id": "605ffdc0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.211951Z",
"iopub.status.busy": "2023-08-07T00:19:29.211403Z",
"iopub.status.idle": "2023-08-07T00:19:29.220643Z",
"shell.execute_reply": "2023-08-07T00:19:29.219858Z"
"iopub.execute_input": "2023-08-21T02:29:59.585485Z",
"iopub.status.busy": "2023-08-21T02:29:59.585317Z",
"iopub.status.idle": "2023-08-21T02:29:59.590274Z",
"shell.execute_reply": "2023-08-21T02:29:59.589979Z"
},
"lines_to_next_cell": 2
},
@@ -200,7 +200,7 @@
},
{
"cell_type": "markdown",
"id": "5e6b4c79",
"id": "16215b77",
"metadata": {},
"source": [
"The support vector classifier with two features can\n",
@@ -212,13 +212,13 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "95494b8b",
"id": "302a49a1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.224179Z",
"iopub.status.busy": "2023-08-07T00:19:29.223775Z",
"iopub.status.idle": "2023-08-07T00:19:29.400927Z",
"shell.execute_reply": "2023-08-07T00:19:29.400620Z"
"iopub.execute_input": "2023-08-21T02:29:59.591976Z",
"iopub.status.busy": "2023-08-21T02:29:59.591865Z",
"iopub.status.idle": "2023-08-21T02:29:59.734225Z",
"shell.execute_reply": "2023-08-21T02:29:59.733936Z"
}
},
"outputs": [
@@ -243,7 +243,7 @@
},
{
"cell_type": "markdown",
"id": "f6ce1246",
"id": "6010e865",
"metadata": {},
"source": [
"The decision\n",
@@ -257,13 +257,13 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "98c2236f",
"id": "cc1d6a13",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.402894Z",
"iopub.status.busy": "2023-08-07T00:19:29.402744Z",
"iopub.status.idle": "2023-08-07T00:19:29.544636Z",
"shell.execute_reply": "2023-08-07T00:19:29.544249Z"
"iopub.execute_input": "2023-08-21T02:29:59.735943Z",
"iopub.status.busy": "2023-08-21T02:29:59.735816Z",
"iopub.status.idle": "2023-08-21T02:29:59.878335Z",
"shell.execute_reply": "2023-08-21T02:29:59.878032Z"
},
"lines_to_next_cell": 0
},
@@ -291,7 +291,7 @@
},
{
"cell_type": "markdown",
"id": "906f4bb8",
"id": "301d764d",
"metadata": {},
"source": [
"With a smaller value of the cost parameter, we\n",
@@ -303,13 +303,13 @@
{
"cell_type": "code",
"execution_count": 8,
"id": "b498f594",
"id": "6133c846",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.546722Z",
"iopub.status.busy": "2023-08-07T00:19:29.546549Z",
"iopub.status.idle": "2023-08-07T00:19:29.549088Z",
"shell.execute_reply": "2023-08-07T00:19:29.548814Z"
"iopub.execute_input": "2023-08-21T02:29:59.880078Z",
"iopub.status.busy": "2023-08-21T02:29:59.879965Z",
"iopub.status.idle": "2023-08-21T02:29:59.882347Z",
"shell.execute_reply": "2023-08-21T02:29:59.882070Z"
},
"lines_to_next_cell": 2
},
@@ -331,7 +331,7 @@
},
{
"cell_type": "markdown",
"id": "90a0ee53",
"id": "0693b3eb",
"metadata": {},
"source": [
"Since the support vector machine is an estimator in `sklearn`, we\n",
@@ -341,13 +341,13 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "b65e80d6",
"id": "9adb3793",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.550593Z",
"iopub.status.busy": "2023-08-07T00:19:29.550485Z",
"iopub.status.idle": "2023-08-07T00:19:29.578952Z",
"shell.execute_reply": "2023-08-07T00:19:29.578657Z"
"iopub.execute_input": "2023-08-21T02:29:59.883852Z",
"iopub.status.busy": "2023-08-21T02:29:59.883749Z",
"iopub.status.idle": "2023-08-21T02:29:59.910535Z",
"shell.execute_reply": "2023-08-21T02:29:59.910272Z"
},
"lines_to_next_cell": 2
},
@@ -378,7 +378,7 @@
},
{
"cell_type": "markdown",
"id": "d390528c",
"id": "611e76a6",
"metadata": {},
"source": [
"We can easily access the cross-validation errors for each of these models\n",
@@ -389,13 +389,13 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "bba8fad7",
"id": "d3ab343e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.580977Z",
"iopub.status.busy": "2023-08-07T00:19:29.580845Z",
"iopub.status.idle": "2023-08-07T00:19:29.583558Z",
"shell.execute_reply": "2023-08-07T00:19:29.583239Z"
"iopub.execute_input": "2023-08-21T02:29:59.912005Z",
"iopub.status.busy": "2023-08-21T02:29:59.911925Z",
"iopub.status.idle": "2023-08-21T02:29:59.914189Z",
"shell.execute_reply": "2023-08-21T02:29:59.913943Z"
},
"lines_to_next_cell": 0
},
@@ -417,7 +417,7 @@
},
{
"cell_type": "markdown",
"id": "703e2d43",
"id": "41d85a2a",
"metadata": {},
"source": [
"We see that `C=1` results in the highest cross-validation\n",
@@ -430,13 +430,13 @@
{
"cell_type": "code",
"execution_count": 11,
"id": "ad64269d",
"id": "6aba117e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.585087Z",
"iopub.status.busy": "2023-08-07T00:19:29.584981Z",
"iopub.status.idle": "2023-08-07T00:19:29.586995Z",
"shell.execute_reply": "2023-08-07T00:19:29.586714Z"
"iopub.execute_input": "2023-08-21T02:29:59.915563Z",
"iopub.status.busy": "2023-08-21T02:29:59.915487Z",
"iopub.status.idle": "2023-08-21T02:29:59.917323Z",
"shell.execute_reply": "2023-08-21T02:29:59.917078Z"
}
},
"outputs": [],
@@ -448,7 +448,7 @@
},
{
"cell_type": "markdown",
"id": "db41f5e2",
"id": "ddbda9de",
"metadata": {},
"source": [
"Now we predict the class labels of these test observations. Here we\n",
@@ -459,13 +459,13 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "5107fca1",
"id": "dbe7d737",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.588685Z",
"iopub.status.busy": "2023-08-07T00:19:29.588519Z",
"iopub.status.idle": "2023-08-07T00:19:29.595768Z",
"shell.execute_reply": "2023-08-07T00:19:29.595341Z"
"iopub.execute_input": "2023-08-21T02:29:59.918744Z",
"iopub.status.busy": "2023-08-21T02:29:59.918666Z",
"iopub.status.idle": "2023-08-21T02:29:59.925361Z",
"shell.execute_reply": "2023-08-21T02:29:59.925039Z"
}
},
"outputs": [
@@ -534,7 +534,7 @@
},
{
"cell_type": "markdown",
"id": "bbfc8005",
"id": "7f002ea6",
"metadata": {},
"source": [
"Thus, with this value of `C`,\n",
@@ -546,13 +546,13 @@
{
"cell_type": "code",
"execution_count": 13,
"id": "0320d9e0",
"id": "ab1697c2",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.597509Z",
"iopub.status.busy": "2023-08-07T00:19:29.597387Z",
"iopub.status.idle": "2023-08-07T00:19:29.602346Z",
"shell.execute_reply": "2023-08-07T00:19:29.601964Z"
"iopub.execute_input": "2023-08-21T02:29:59.927158Z",
"iopub.status.busy": "2023-08-21T02:29:59.927027Z",
"iopub.status.idle": "2023-08-21T02:29:59.931558Z",
"shell.execute_reply": "2023-08-21T02:29:59.931228Z"
}
},
"outputs": [
@@ -622,7 +622,7 @@
},
{
"cell_type": "markdown",
"id": "427d775f",
"id": "7574703a",
"metadata": {},
"source": [
"In this case 60% of test observations are correctly classified.\n",
@@ -637,13 +637,13 @@
{
"cell_type": "code",
"execution_count": 14,
"id": "84d7e778",
"id": "0fd42b1e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.604018Z",
"iopub.status.busy": "2023-08-07T00:19:29.603879Z",
"iopub.status.idle": "2023-08-07T00:19:29.734586Z",
"shell.execute_reply": "2023-08-07T00:19:29.734264Z"
"iopub.execute_input": "2023-08-21T02:29:59.933100Z",
"iopub.status.busy": "2023-08-21T02:29:59.933001Z",
"iopub.status.idle": "2023-08-21T02:30:00.054738Z",
"shell.execute_reply": "2023-08-21T02:30:00.054338Z"
}
},
"outputs": [
@@ -666,7 +666,7 @@
},
{
"cell_type": "markdown",
"id": "ff7bdad1",
"id": "4bdaf415",
"metadata": {},
"source": [
"Now the observations are just barely linearly separable."
@@ -675,13 +675,13 @@
{
"cell_type": "code",
"execution_count": 15,
"id": "abb1f8be",
"id": "09c15299",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.736388Z",
"iopub.status.busy": "2023-08-07T00:19:29.736251Z",
"iopub.status.idle": "2023-08-07T00:19:29.741179Z",
"shell.execute_reply": "2023-08-07T00:19:29.740886Z"
"iopub.execute_input": "2023-08-21T02:30:00.056655Z",
"iopub.status.busy": "2023-08-21T02:30:00.056526Z",
"iopub.status.idle": "2023-08-21T02:30:00.061096Z",
"shell.execute_reply": "2023-08-21T02:30:00.060792Z"
}
},
"outputs": [
@@ -750,7 +750,7 @@
},
{
"cell_type": "markdown",
"id": "c44297cc",
"id": "d987eecc",
"metadata": {},
"source": [
"We fit the\n",
@@ -762,13 +762,13 @@
{
"cell_type": "code",
"execution_count": 16,
"id": "2e4ed2f5",
"id": "d5fd2ff9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.742864Z",
"iopub.status.busy": "2023-08-07T00:19:29.742750Z",
"iopub.status.idle": "2023-08-07T00:19:29.860686Z",
"shell.execute_reply": "2023-08-07T00:19:29.860305Z"
"iopub.execute_input": "2023-08-21T02:30:00.062673Z",
"iopub.status.busy": "2023-08-21T02:30:00.062585Z",
"iopub.status.idle": "2023-08-21T02:30:00.199860Z",
"shell.execute_reply": "2023-08-21T02:30:00.199129Z"
},
"lines_to_next_cell": 0
},
@@ -794,7 +794,7 @@
},
{
"cell_type": "markdown",
"id": "2836d70d",
"id": "0834d471",
"metadata": {},
"source": [
"Indeed no training errors were made and only three support vectors were used.\n",
@@ -807,13 +807,13 @@
{
"cell_type": "code",
"execution_count": 17,
"id": "164a611c",
"id": "39aff1b1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.862647Z",
"iopub.status.busy": "2023-08-07T00:19:29.862496Z",
"iopub.status.idle": "2023-08-07T00:19:29.867261Z",
"shell.execute_reply": "2023-08-07T00:19:29.866916Z"
"iopub.execute_input": "2023-08-21T02:30:00.202380Z",
"iopub.status.busy": "2023-08-21T02:30:00.202233Z",
"iopub.status.idle": "2023-08-21T02:30:00.207886Z",
"shell.execute_reply": "2023-08-21T02:30:00.207493Z"
}
},
"outputs": [
@@ -882,7 +882,7 @@
},
{
"cell_type": "markdown",
"id": "39a432d1",
"id": "d0684844",
"metadata": {},
"source": [
"Using `C=0.1`, we again do not misclassify any training observations, but we\n",
@@ -894,13 +894,13 @@
{
"cell_type": "code",
"execution_count": 18,
"id": "c67591a1",
"id": "63a9d752",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.868821Z",
"iopub.status.busy": "2023-08-07T00:19:29.868723Z",
"iopub.status.idle": "2023-08-07T00:19:29.990207Z",
"shell.execute_reply": "2023-08-07T00:19:29.989921Z"
"iopub.execute_input": "2023-08-21T02:30:00.209907Z",
"iopub.status.busy": "2023-08-21T02:30:00.209781Z",
"iopub.status.idle": "2023-08-21T02:30:00.340803Z",
"shell.execute_reply": "2023-08-21T02:30:00.340433Z"
},
"lines_to_next_cell": 2
},
@@ -926,7 +926,7 @@
},
{
"cell_type": "markdown",
"id": "25e61f65",
"id": "a70d84f4",
"metadata": {},
"source": [
"## Support Vector Machine\n",
@@ -947,13 +947,13 @@
{
"cell_type": "code",
"execution_count": 19,
"id": "322be574",
"id": "2fee8df5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.991910Z",
"iopub.status.busy": "2023-08-07T00:19:29.991799Z",
"iopub.status.idle": "2023-08-07T00:19:29.993907Z",
"shell.execute_reply": "2023-08-07T00:19:29.993635Z"
"iopub.execute_input": "2023-08-21T02:30:00.342773Z",
"iopub.status.busy": "2023-08-21T02:30:00.342626Z",
"iopub.status.idle": "2023-08-21T02:30:00.345094Z",
"shell.execute_reply": "2023-08-21T02:30:00.344774Z"
}
},
"outputs": [],
@@ -966,7 +966,7 @@
},
{
"cell_type": "markdown",
"id": "22fe2182",
"id": "d5c7545e",
"metadata": {},
"source": [
"Plotting the data makes it clear that the class boundary is indeed non-linear."
@@ -975,13 +975,13 @@
{
"cell_type": "code",
"execution_count": 20,
"id": "04fda182",
"id": "48f01abe",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:29.995558Z",
"iopub.status.busy": "2023-08-07T00:19:29.995406Z",
"iopub.status.idle": "2023-08-07T00:19:30.089596Z",
"shell.execute_reply": "2023-08-07T00:19:30.089130Z"
"iopub.execute_input": "2023-08-21T02:30:00.347053Z",
"iopub.status.busy": "2023-08-21T02:30:00.346902Z",
"iopub.status.idle": "2023-08-21T02:30:00.440453Z",
"shell.execute_reply": "2023-08-21T02:30:00.440153Z"
},
"lines_to_next_cell": 2
},
@@ -989,7 +989,7 @@
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x17f2b35d0>"
"<matplotlib.collections.PathCollection at 0x28b7c65d0>"
]
},
"execution_count": 20,
@@ -1017,7 +1017,7 @@
},
{
"cell_type": "markdown",
"id": "64913fe3",
"id": "7c0bc32b",
"metadata": {},
"source": [
"The data is randomly split into training and testing groups. We then\n",
@@ -1028,13 +1028,13 @@
{
"cell_type": "code",
"execution_count": 21,
"id": "0c2690d1",
"id": "4acc3246",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:30.091605Z",
"iopub.status.busy": "2023-08-07T00:19:30.091498Z",
"iopub.status.idle": "2023-08-07T00:19:30.095614Z",
"shell.execute_reply": "2023-08-07T00:19:30.095347Z"
"iopub.execute_input": "2023-08-21T02:30:00.442257Z",
"iopub.status.busy": "2023-08-21T02:30:00.442156Z",
"iopub.status.idle": "2023-08-21T02:30:00.446674Z",
"shell.execute_reply": "2023-08-21T02:30:00.446369Z"
}
},
"outputs": [
@@ -1066,7 +1066,7 @@
},
{
"cell_type": "markdown",
"id": "5da9efdb",
"id": "b2c7e95e",
"metadata": {},
"source": [
"The plot shows that the resulting SVM has a decidedly non-linear\n",
@@ -1076,13 +1076,13 @@
{
"cell_type": "code",
"execution_count": 22,
"id": "3eb171e8",
"id": "e9852a28",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:30.097178Z",
"iopub.status.busy": "2023-08-07T00:19:30.097088Z",
"iopub.status.idle": "2023-08-07T00:19:30.357131Z",
"shell.execute_reply": "2023-08-07T00:19:30.356847Z"
"iopub.execute_input": "2023-08-21T02:30:00.448268Z",
"iopub.status.busy": "2023-08-21T02:30:00.448160Z",
"iopub.status.idle": "2023-08-21T02:30:00.828511Z",
"shell.execute_reply": "2023-08-21T02:30:00.828128Z"
}
},
"outputs": [
@@ -1107,7 +1107,7 @@
},
{
"cell_type": "markdown",
"id": "ab5b1446",
"id": "acfa4bed",
"metadata": {},
"source": [
"We can see from the figure that there are a fair number of training\n",
@@ -1120,13 +1120,13 @@
{
"cell_type": "code",
"execution_count": 23,
"id": "9a6b905b",
"id": "01232fc9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:30.358811Z",
"iopub.status.busy": "2023-08-07T00:19:30.358698Z",
"iopub.status.idle": "2023-08-07T00:19:30.513702Z",
"shell.execute_reply": "2023-08-07T00:19:30.513395Z"
"iopub.execute_input": "2023-08-21T02:30:00.830365Z",
"iopub.status.busy": "2023-08-21T02:30:00.830226Z",
"iopub.status.idle": "2023-08-21T02:30:01.132677Z",
"shell.execute_reply": "2023-08-21T02:30:01.132224Z"
}
},
"outputs": [
@@ -1153,7 +1153,7 @@
},
{
"cell_type": "markdown",
"id": "300c1b8b",
"id": "5bc77e3f",
"metadata": {},
"source": [
"We can perform cross-validation using `skm.GridSearchCV()` to select the\n",
@@ -1164,13 +1164,13 @@
{
"cell_type": "code",
"execution_count": 24,
"id": "5ab01d6c",
"id": "bcbd15a4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:30.515803Z",
"iopub.status.busy": "2023-08-07T00:19:30.515668Z",
"iopub.status.idle": "2023-08-07T00:19:30.612245Z",
"shell.execute_reply": "2023-08-07T00:19:30.611940Z"
"iopub.execute_input": "2023-08-21T02:30:01.134616Z",
"iopub.status.busy": "2023-08-21T02:30:01.134486Z",
"iopub.status.idle": "2023-08-21T02:30:01.243519Z",
"shell.execute_reply": "2023-08-21T02:30:01.243203Z"
}
},
"outputs": [
@@ -1201,7 +1201,7 @@
},
{
"cell_type": "markdown",
"id": "1bb987ae",
"id": "997bbfbd",
"metadata": {},
"source": [
"The best choice of parameters under five-fold CV is achieved at `C=1`\n",
@@ -1212,13 +1212,13 @@
{
"cell_type": "code",
"execution_count": 25,
"id": "166a6acb",
"id": "28ca551e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:30.614152Z",
"iopub.status.busy": "2023-08-07T00:19:30.614029Z",
"iopub.status.idle": "2023-08-07T00:19:30.850984Z",
"shell.execute_reply": "2023-08-07T00:19:30.850653Z"
"iopub.execute_input": "2023-08-21T02:30:01.245550Z",
"iopub.status.busy": "2023-08-21T02:30:01.245377Z",
"iopub.status.idle": "2023-08-21T02:30:01.600896Z",
"shell.execute_reply": "2023-08-21T02:30:01.600574Z"
}
},
"outputs": [
@@ -1303,7 +1303,7 @@
},
{
"cell_type": "markdown",
"id": "39ee6f32",
"id": "48e671f4",
"metadata": {},
"source": [
"With these parameters, 12% of test\n",
@@ -1312,7 +1312,7 @@
},
{
"cell_type": "markdown",
"id": "f0ea699d",
"id": "eaed0a87",
"metadata": {},
"source": [
"## ROC Curves\n",
@@ -1346,13 +1346,13 @@
{
"cell_type": "code",
"execution_count": 26,
"id": "0607fc41",
"id": "68ac9421",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:30.853079Z",
"iopub.status.busy": "2023-08-07T00:19:30.852934Z",
"iopub.status.idle": "2023-08-07T00:19:30.948570Z",
"shell.execute_reply": "2023-08-07T00:19:30.948252Z"
"iopub.execute_input": "2023-08-21T02:30:01.602740Z",
"iopub.status.busy": "2023-08-21T02:30:01.602614Z",
"iopub.status.idle": "2023-08-21T02:30:01.698620Z",
"shell.execute_reply": "2023-08-21T02:30:01.698322Z"
},
"lines_to_next_cell": 0
},
@@ -1380,7 +1380,7 @@
},
{
"cell_type": "markdown",
"id": "54446e71",
"id": "0c35d32a",
"metadata": {},
"source": [
" In this example, the SVM appears to provide accurate predictions. By increasing\n",
@@ -1391,13 +1391,13 @@
{
"cell_type": "code",
"execution_count": 27,
"id": "5211a882",
"id": "f79a9e0a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:30.950213Z",
"iopub.status.busy": "2023-08-07T00:19:30.950106Z",
"iopub.status.idle": "2023-08-07T00:19:31.095103Z",
"shell.execute_reply": "2023-08-07T00:19:31.094737Z"
"iopub.execute_input": "2023-08-21T02:30:01.700479Z",
"iopub.status.busy": "2023-08-21T02:30:01.700347Z",
"iopub.status.idle": "2023-08-21T02:30:01.837479Z",
"shell.execute_reply": "2023-08-21T02:30:01.837102Z"
}
},
"outputs": [
@@ -1428,7 +1428,7 @@
},
{
"cell_type": "markdown",
"id": "de7e4be8",
"id": "7bd1a22b",
"metadata": {},
"source": [
"However, these ROC curves are all on the training data. We are really\n",
@@ -1440,13 +1440,13 @@
{
"cell_type": "code",
"execution_count": 28,
"id": "12acc4ff",
"id": "bdb9e503",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:31.096951Z",
"iopub.status.busy": "2023-08-07T00:19:31.096805Z",
"iopub.status.idle": "2023-08-07T00:19:31.101372Z",
"shell.execute_reply": "2023-08-07T00:19:31.101097Z"
"iopub.execute_input": "2023-08-21T02:30:01.839390Z",
"iopub.status.busy": "2023-08-21T02:30:01.839243Z",
"iopub.status.idle": "2023-08-21T02:30:01.843595Z",
"shell.execute_reply": "2023-08-21T02:30:01.843287Z"
}
},
"outputs": [],
@@ -1462,7 +1462,7 @@
},
{
"cell_type": "markdown",
"id": "eb5c8aeb",
"id": "8a42e924",
"metadata": {},
"source": [
"Lets look at our tuned SVM."
@@ -1471,13 +1471,13 @@
{
"cell_type": "code",
"execution_count": 29,
"id": "21c81913",
"id": "329f5d2c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:31.103089Z",
"iopub.status.busy": "2023-08-07T00:19:31.102993Z",
"iopub.status.idle": "2023-08-07T00:19:31.204133Z",
"shell.execute_reply": "2023-08-07T00:19:31.203835Z"
"iopub.execute_input": "2023-08-21T02:30:01.845300Z",
"iopub.status.busy": "2023-08-21T02:30:01.845201Z",
"iopub.status.idle": "2023-08-21T02:30:01.944073Z",
"shell.execute_reply": "2023-08-21T02:30:01.943763Z"
}
},
"outputs": [
@@ -1510,7 +1510,7 @@
},
{
"cell_type": "markdown",
"id": "b9fefe9f",
"id": "bac19279",
"metadata": {},
"source": [
"## SVM with Multiple Classes\n",
@@ -1526,13 +1526,13 @@
{
"cell_type": "code",
"execution_count": 30,
"id": "2fff4fa8",
"id": "267e113d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:31.205816Z",
"iopub.status.busy": "2023-08-07T00:19:31.205709Z",
"iopub.status.idle": "2023-08-07T00:19:31.294925Z",
"shell.execute_reply": "2023-08-07T00:19:31.294593Z"
"iopub.execute_input": "2023-08-21T02:30:01.945725Z",
"iopub.status.busy": "2023-08-21T02:30:01.945611Z",
"iopub.status.idle": "2023-08-21T02:30:02.034378Z",
"shell.execute_reply": "2023-08-21T02:30:02.034069Z"
}
},
"outputs": [
@@ -1558,7 +1558,7 @@
},
{
"cell_type": "markdown",
"id": "b7adc87d",
"id": "a9f4297c",
"metadata": {},
"source": [
"We now fit an SVM to the data:"
@@ -1567,13 +1567,13 @@
{
"cell_type": "code",
"execution_count": 31,
"id": "5396f2df",
"id": "64cbebd0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:31.296594Z",
"iopub.status.busy": "2023-08-07T00:19:31.296472Z",
"iopub.status.idle": "2023-08-07T00:19:31.880175Z",
"shell.execute_reply": "2023-08-07T00:19:31.879674Z"
"iopub.execute_input": "2023-08-21T02:30:02.036083Z",
"iopub.status.busy": "2023-08-21T02:30:02.035963Z",
"iopub.status.idle": "2023-08-21T02:30:03.015535Z",
"shell.execute_reply": "2023-08-21T02:30:03.014798Z"
},
"lines_to_next_cell": 0
},
@@ -1605,7 +1605,7 @@
},
{
"cell_type": "markdown",
"id": "837644f5",
"id": "62c5d16e",
"metadata": {},
"source": [
"The `sklearn.svm` library can also be used to perform support vector\n",
@@ -1614,7 +1614,7 @@
},
{
"cell_type": "markdown",
"id": "a6bc0cbc",
"id": "5c0824b6",
"metadata": {},
"source": [
"## Application to Gene Expression Data\n",
@@ -1631,13 +1631,13 @@
{
"cell_type": "code",
"execution_count": 32,
"id": "f63c575e",
"id": "b6e6f12b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:31.882095Z",
"iopub.status.busy": "2023-08-07T00:19:31.881962Z",
"iopub.status.idle": "2023-08-07T00:19:31.959079Z",
"shell.execute_reply": "2023-08-07T00:19:31.958769Z"
"iopub.execute_input": "2023-08-21T02:30:03.017430Z",
"iopub.status.busy": "2023-08-21T02:30:03.017293Z",
"iopub.status.idle": "2023-08-21T02:30:03.099156Z",
"shell.execute_reply": "2023-08-21T02:30:03.098760Z"
}
},
"outputs": [
@@ -1659,7 +1659,7 @@
},
{
"cell_type": "markdown",
"id": "bfd6492c",
"id": "e3fbaa58",
"metadata": {},
"source": [
"This data set consists of expression measurements for 2,308\n",
@@ -1677,13 +1677,13 @@
{
"cell_type": "code",
"execution_count": 33,
"id": "32091338",
"id": "273a10b2",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:31.960641Z",
"iopub.status.busy": "2023-08-07T00:19:31.960528Z",
"iopub.status.idle": "2023-08-07T00:19:31.990176Z",
"shell.execute_reply": "2023-08-07T00:19:31.989868Z"
"iopub.execute_input": "2023-08-21T02:30:03.101069Z",
"iopub.status.busy": "2023-08-21T02:30:03.100881Z",
"iopub.status.idle": "2023-08-21T02:30:03.130224Z",
"shell.execute_reply": "2023-08-21T02:30:03.129845Z"
}
},
"outputs": [
@@ -1777,7 +1777,7 @@
},
{
"cell_type": "markdown",
"id": "23043ab0",
"id": "31cad43a",
"metadata": {},
"source": [
"We see that there are *no* training\n",
@@ -1791,13 +1791,13 @@
{
"cell_type": "code",
"execution_count": 34,
"id": "d9058023",
"id": "bc3079a7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:31.991754Z",
"iopub.status.busy": "2023-08-07T00:19:31.991636Z",
"iopub.status.idle": "2023-08-07T00:19:32.002452Z",
"shell.execute_reply": "2023-08-07T00:19:32.002189Z"
"iopub.execute_input": "2023-08-21T02:30:03.132111Z",
"iopub.status.busy": "2023-08-21T02:30:03.131975Z",
"iopub.status.idle": "2023-08-21T02:30:03.143298Z",
"shell.execute_reply": "2023-08-21T02:30:03.142948Z"
}
},
"outputs": [
@@ -1889,7 +1889,7 @@
},
{
"cell_type": "markdown",
"id": "d0d5aba4",
"id": "0d059312",
"metadata": {},
"source": [
"We see that using `C=10` yields two test set errors on these data.\n",

View File

@@ -1,19 +1,3 @@
---
jupyter:
jupytext:
cell_metadata_filter: -all
formats: ipynb,Rmd
text_representation:
extension: .Rmd
format_name: rmarkdown
format_version: '1.2'
jupytext_version: 1.14.7
kernelspec:
display_name: Python 3 (ipykernel)
language: python
name: python3
---
# Chapter 10
@@ -872,7 +856,7 @@ for idx, (X_ ,Y_) in enumerate(cifar_dm.train_dataloader()):
Before we start, we look at some of the training images; similar code produced
Figure 10.5 on page 164. The example below also illustrates
Figure 10.5 on page 447. The example below also illustrates
that `TensorDataset` objects can be indexed with integers --- we are choosing
random images from the training data by indexing `cifar_train`. In order to display correctly,
we must reorder the dimensions by a call to `np.transpose()`.
@@ -1705,7 +1689,6 @@ early stopping, since then the test performance would be biased.
We form the training dataset similar to
our `Hitters` example.
```{python}
datasets = []

File diff suppressed because one or more lines are too long

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "c7f4eb5a",
"id": "62a1a218",
"metadata": {},
"source": [
"\n",
@@ -12,7 +12,7 @@
},
{
"cell_type": "markdown",
"id": "0ae03bc9",
"id": "9da89fbb",
"metadata": {},
"source": [
"# Lab: Survival Analysis\n",
@@ -31,13 +31,13 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "91ac40fd",
"id": "d2d71add",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:33.224953Z",
"iopub.status.busy": "2023-08-07T00:19:33.224846Z",
"iopub.status.idle": "2023-08-07T00:19:34.446999Z",
"shell.execute_reply": "2023-08-07T00:19:34.446629Z"
"iopub.execute_input": "2023-08-21T02:30:04.373618Z",
"iopub.status.busy": "2023-08-21T02:30:04.373522Z",
"iopub.status.idle": "2023-08-21T02:30:05.528375Z",
"shell.execute_reply": "2023-08-21T02:30:05.528065Z"
}
},
"outputs": [],
@@ -51,7 +51,7 @@
},
{
"cell_type": "markdown",
"id": "a3dbcbbf",
"id": "70fe80b5",
"metadata": {},
"source": [
"We also collect the new imports\n",
@@ -61,13 +61,13 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "99782418",
"id": "994efc94",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.448996Z",
"iopub.status.busy": "2023-08-07T00:19:34.448819Z",
"iopub.status.idle": "2023-08-07T00:19:34.539258Z",
"shell.execute_reply": "2023-08-07T00:19:34.538955Z"
"iopub.execute_input": "2023-08-21T02:30:05.530453Z",
"iopub.status.busy": "2023-08-21T02:30:05.530271Z",
"iopub.status.idle": "2023-08-21T02:30:05.593786Z",
"shell.execute_reply": "2023-08-21T02:30:05.593483Z"
}
},
"outputs": [],
@@ -83,7 +83,7 @@
},
{
"cell_type": "markdown",
"id": "2c538d28",
"id": "e65a4796",
"metadata": {},
"source": [
"## Brain Cancer Data\n",
@@ -94,13 +94,13 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "3137149a",
"id": "9d41ddea",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.541177Z",
"iopub.status.busy": "2023-08-07T00:19:34.541057Z",
"iopub.status.idle": "2023-08-07T00:19:34.547991Z",
"shell.execute_reply": "2023-08-07T00:19:34.547753Z"
"iopub.execute_input": "2023-08-21T02:30:05.595762Z",
"iopub.status.busy": "2023-08-21T02:30:05.595642Z",
"iopub.status.idle": "2023-08-21T02:30:05.602243Z",
"shell.execute_reply": "2023-08-21T02:30:05.601969Z"
}
},
"outputs": [
@@ -122,7 +122,7 @@
},
{
"cell_type": "markdown",
"id": "e798f172",
"id": "4ac65a33",
"metadata": {},
"source": [
"The rows index the 88 patients, while the 8 columns contain the predictors and outcome variables.\n",
@@ -132,13 +132,13 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "45963c92",
"id": "2bece782",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.549558Z",
"iopub.status.busy": "2023-08-07T00:19:34.549453Z",
"iopub.status.idle": "2023-08-07T00:19:34.552571Z",
"shell.execute_reply": "2023-08-07T00:19:34.552293Z"
"iopub.execute_input": "2023-08-21T02:30:05.603954Z",
"iopub.status.busy": "2023-08-21T02:30:05.603852Z",
"iopub.status.idle": "2023-08-21T02:30:05.607075Z",
"shell.execute_reply": "2023-08-21T02:30:05.606729Z"
},
"lines_to_next_cell": 2
},
@@ -163,13 +163,13 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "73be61f6",
"id": "9ca465e5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.553962Z",
"iopub.status.busy": "2023-08-07T00:19:34.553866Z",
"iopub.status.idle": "2023-08-07T00:19:34.556544Z",
"shell.execute_reply": "2023-08-07T00:19:34.556286Z"
"iopub.execute_input": "2023-08-21T02:30:05.608553Z",
"iopub.status.busy": "2023-08-21T02:30:05.608445Z",
"iopub.status.idle": "2023-08-21T02:30:05.611386Z",
"shell.execute_reply": "2023-08-21T02:30:05.611134Z"
},
"lines_to_next_cell": 2
},
@@ -196,13 +196,13 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "572f0b9e",
"id": "33bc4d3c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.557984Z",
"iopub.status.busy": "2023-08-07T00:19:34.557901Z",
"iopub.status.idle": "2023-08-07T00:19:34.560759Z",
"shell.execute_reply": "2023-08-07T00:19:34.560490Z"
"iopub.execute_input": "2023-08-21T02:30:05.612735Z",
"iopub.status.busy": "2023-08-21T02:30:05.612639Z",
"iopub.status.idle": "2023-08-21T02:30:05.615164Z",
"shell.execute_reply": "2023-08-21T02:30:05.614915Z"
},
"lines_to_next_cell": 2
},
@@ -226,7 +226,7 @@
},
{
"cell_type": "markdown",
"id": "fbd132de",
"id": "eb9c6d4f",
"metadata": {},
"source": [
"Before beginning an analysis, it is important to know how the\n",
@@ -252,13 +252,13 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "92c39707",
"id": "0b6dba70",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.562264Z",
"iopub.status.busy": "2023-08-07T00:19:34.562173Z",
"iopub.status.idle": "2023-08-07T00:19:34.764386Z",
"shell.execute_reply": "2023-08-07T00:19:34.763084Z"
"iopub.execute_input": "2023-08-21T02:30:05.616714Z",
"iopub.status.busy": "2023-08-21T02:30:05.616622Z",
"iopub.status.idle": "2023-08-21T02:30:05.728265Z",
"shell.execute_reply": "2023-08-21T02:30:05.727903Z"
}
},
"outputs": [
@@ -292,7 +292,7 @@
},
{
"cell_type": "markdown",
"id": "f037665b",
"id": "2cc511cd",
"metadata": {},
"source": [
"Next we create Kaplan-Meier survival curves that are stratified by\n",
@@ -318,13 +318,13 @@
{
"cell_type": "code",
"execution_count": 8,
"id": "3fc7848c",
"id": "9e6f2e70",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.770269Z",
"iopub.status.busy": "2023-08-07T00:19:34.769500Z",
"iopub.status.idle": "2023-08-07T00:19:34.900514Z",
"shell.execute_reply": "2023-08-07T00:19:34.900203Z"
"iopub.execute_input": "2023-08-21T02:30:05.730200Z",
"iopub.status.busy": "2023-08-21T02:30:05.730056Z",
"iopub.status.idle": "2023-08-21T02:30:05.845830Z",
"shell.execute_reply": "2023-08-21T02:30:05.845506Z"
}
},
"outputs": [
@@ -350,7 +350,7 @@
},
{
"cell_type": "markdown",
"id": "c0c1a16a",
"id": "4d7efefb",
"metadata": {},
"source": [
"As discussed in Section 11.4, we can perform a\n",
@@ -363,13 +363,13 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "bf30d26f",
"id": "c135f7aa",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.902462Z",
"iopub.status.busy": "2023-08-07T00:19:34.902313Z",
"iopub.status.idle": "2023-08-07T00:19:34.956077Z",
"shell.execute_reply": "2023-08-07T00:19:34.955714Z"
"iopub.execute_input": "2023-08-21T02:30:05.847658Z",
"iopub.status.busy": "2023-08-21T02:30:05.847519Z",
"iopub.status.idle": "2023-08-21T02:30:05.901295Z",
"shell.execute_reply": "2023-08-21T02:30:05.900935Z"
},
"lines_to_next_cell": 2
},
@@ -462,7 +462,7 @@
},
{
"cell_type": "markdown",
"id": "e270649c",
"id": "bd14317d",
"metadata": {},
"source": [
"The resulting $p$-value is $0.23$, indicating no evidence of a\n",
@@ -476,13 +476,13 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "2ab78e07",
"id": "5f9303dd",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.957966Z",
"iopub.status.busy": "2023-08-07T00:19:34.957792Z",
"iopub.status.idle": "2023-08-07T00:19:34.984567Z",
"shell.execute_reply": "2023-08-07T00:19:34.984261Z"
"iopub.execute_input": "2023-08-21T02:30:05.903263Z",
"iopub.status.busy": "2023-08-21T02:30:05.903017Z",
"iopub.status.idle": "2023-08-21T02:30:05.930691Z",
"shell.execute_reply": "2023-08-21T02:30:05.930331Z"
}
},
"outputs": [
@@ -553,7 +553,7 @@
},
{
"cell_type": "markdown",
"id": "b58b93ae",
"id": "7e56e83e",
"metadata": {},
"source": [
"The first argument to `fit` should be a data frame containing\n",
@@ -569,13 +569,13 @@
{
"cell_type": "code",
"execution_count": 11,
"id": "4716b7b0",
"id": "bcc8470c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.986336Z",
"iopub.status.busy": "2023-08-07T00:19:34.986193Z",
"iopub.status.idle": "2023-08-07T00:19:34.991518Z",
"shell.execute_reply": "2023-08-07T00:19:34.991252Z"
"iopub.execute_input": "2023-08-21T02:30:05.932434Z",
"iopub.status.busy": "2023-08-21T02:30:05.932285Z",
"iopub.status.idle": "2023-08-21T02:30:05.937796Z",
"shell.execute_reply": "2023-08-21T02:30:05.937549Z"
}
},
"outputs": [
@@ -659,7 +659,7 @@
},
{
"cell_type": "markdown",
"id": "2820f486",
"id": "2e6163ca",
"metadata": {},
"source": [
"Regardless of which test we use, we see that there is no clear\n",
@@ -675,13 +675,13 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "c2767d88",
"id": "c26a3499",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:34.993223Z",
"iopub.status.busy": "2023-08-07T00:19:34.993093Z",
"iopub.status.idle": "2023-08-07T00:19:35.028673Z",
"shell.execute_reply": "2023-08-07T00:19:35.028408Z"
"iopub.execute_input": "2023-08-21T02:30:05.939300Z",
"iopub.status.busy": "2023-08-21T02:30:05.939184Z",
"iopub.status.idle": "2023-08-21T02:30:05.979585Z",
"shell.execute_reply": "2023-08-21T02:30:05.979250Z"
}
},
"outputs": [
@@ -800,7 +800,7 @@
},
{
"cell_type": "markdown",
"id": "eee4ab1f",
"id": "a96e311c",
"metadata": {},
"source": [
" The `diagnosis` variable has been coded so that the baseline\n",
@@ -823,13 +823,13 @@
{
"cell_type": "code",
"execution_count": 13,
"id": "ede1d219",
"id": "8d999f26",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.030313Z",
"iopub.status.busy": "2023-08-07T00:19:35.030211Z",
"iopub.status.idle": "2023-08-07T00:19:35.034142Z",
"shell.execute_reply": "2023-08-07T00:19:35.033836Z"
"iopub.execute_input": "2023-08-21T02:30:05.981441Z",
"iopub.status.busy": "2023-08-21T02:30:05.981315Z",
"iopub.status.idle": "2023-08-21T02:30:05.986317Z",
"shell.execute_reply": "2023-08-21T02:30:05.985949Z"
}
},
"outputs": [],
@@ -845,7 +845,7 @@
},
{
"cell_type": "markdown",
"id": "e1c307ae",
"id": "bf628fd8",
"metadata": {},
"source": [
"We make four\n",
@@ -856,13 +856,13 @@
{
"cell_type": "code",
"execution_count": 14,
"id": "dc032a71",
"id": "a1f6b355",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.035583Z",
"iopub.status.busy": "2023-08-07T00:19:35.035483Z",
"iopub.status.idle": "2023-08-07T00:19:35.041790Z",
"shell.execute_reply": "2023-08-07T00:19:35.041394Z"
"iopub.execute_input": "2023-08-21T02:30:05.988012Z",
"iopub.status.busy": "2023-08-21T02:30:05.987898Z",
"iopub.status.idle": "2023-08-21T02:30:05.993889Z",
"shell.execute_reply": "2023-08-21T02:30:05.993534Z"
}
},
"outputs": [
@@ -974,7 +974,7 @@
},
{
"cell_type": "markdown",
"id": "84da2586",
"id": "3f6334e4",
"metadata": {},
"source": [
"We then construct the model matrix based on the model specification `all_MS` used to fit\n",
@@ -984,13 +984,13 @@
{
"cell_type": "code",
"execution_count": 15,
"id": "e7c1fe43",
"id": "a1a9d5b3",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.043454Z",
"iopub.status.busy": "2023-08-07T00:19:35.043346Z",
"iopub.status.idle": "2023-08-07T00:19:35.050931Z",
"shell.execute_reply": "2023-08-07T00:19:35.050643Z"
"iopub.execute_input": "2023-08-21T02:30:05.995682Z",
"iopub.status.busy": "2023-08-21T02:30:05.995549Z",
"iopub.status.idle": "2023-08-21T02:30:06.005479Z",
"shell.execute_reply": "2023-08-21T02:30:06.005089Z"
}
},
"outputs": [
@@ -1117,7 +1117,7 @@
},
{
"cell_type": "markdown",
"id": "3cfe1ec4",
"id": "3eaec7e6",
"metadata": {},
"source": [
"We can use the `predict_survival_function()` method to obtain the estimated survival function."
@@ -1126,13 +1126,13 @@
{
"cell_type": "code",
"execution_count": 16,
"id": "f89fbed7",
"id": "1a18b56a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.052472Z",
"iopub.status.busy": "2023-08-07T00:19:35.052367Z",
"iopub.status.idle": "2023-08-07T00:19:35.059232Z",
"shell.execute_reply": "2023-08-07T00:19:35.058922Z"
"iopub.execute_input": "2023-08-21T02:30:06.007172Z",
"iopub.status.busy": "2023-08-21T02:30:06.007049Z",
"iopub.status.idle": "2023-08-21T02:30:06.014185Z",
"shell.execute_reply": "2023-08-21T02:30:06.013870Z"
},
"lines_to_next_cell": 0
},
@@ -1276,7 +1276,7 @@
},
{
"cell_type": "markdown",
"id": "29afd641",
"id": "7d533f90",
"metadata": {},
"source": [
"This returns a data frame,\n",
@@ -1287,13 +1287,13 @@
{
"cell_type": "code",
"execution_count": 17,
"id": "8f0329b4",
"id": "ff3de29c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.061046Z",
"iopub.status.busy": "2023-08-07T00:19:35.060930Z",
"iopub.status.idle": "2023-08-07T00:19:35.167601Z",
"shell.execute_reply": "2023-08-07T00:19:35.167288Z"
"iopub.execute_input": "2023-08-21T02:30:06.015778Z",
"iopub.status.busy": "2023-08-21T02:30:06.015664Z",
"iopub.status.idle": "2023-08-21T02:30:06.124035Z",
"shell.execute_reply": "2023-08-21T02:30:06.123732Z"
},
"lines_to_next_cell": 2
},
@@ -1316,7 +1316,7 @@
},
{
"cell_type": "markdown",
"id": "12723ce5",
"id": "e660793e",
"metadata": {},
"source": [
"## Publication Data\n",
@@ -1330,13 +1330,13 @@
{
"cell_type": "code",
"execution_count": 18,
"id": "3045bfc0",
"id": "cd9060c1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.169251Z",
"iopub.status.busy": "2023-08-07T00:19:35.169133Z",
"iopub.status.idle": "2023-08-07T00:19:35.287186Z",
"shell.execute_reply": "2023-08-07T00:19:35.286859Z"
"iopub.execute_input": "2023-08-21T02:30:06.125714Z",
"iopub.status.busy": "2023-08-21T02:30:06.125592Z",
"iopub.status.idle": "2023-08-21T02:30:06.243701Z",
"shell.execute_reply": "2023-08-21T02:30:06.243300Z"
}
},
"outputs": [
@@ -1363,7 +1363,7 @@
},
{
"cell_type": "markdown",
"id": "6fcb22f7",
"id": "d8f0f687",
"metadata": {},
"source": [
"As discussed previously, the $p$-values from fitting Coxs\n",
@@ -1375,13 +1375,13 @@
{
"cell_type": "code",
"execution_count": 19,
"id": "d070f716",
"id": "6af7106e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.288887Z",
"iopub.status.busy": "2023-08-07T00:19:35.288769Z",
"iopub.status.idle": "2023-08-07T00:19:35.321428Z",
"shell.execute_reply": "2023-08-07T00:19:35.321128Z"
"iopub.execute_input": "2023-08-21T02:30:06.245493Z",
"iopub.status.busy": "2023-08-21T02:30:06.245357Z",
"iopub.status.idle": "2023-08-21T02:30:06.281521Z",
"shell.execute_reply": "2023-08-21T02:30:06.281138Z"
},
"lines_to_next_cell": 2
},
@@ -1453,7 +1453,7 @@
},
{
"cell_type": "markdown",
"id": "513a55b1",
"id": "8ef28b8b",
"metadata": {},
"source": [
"However, the results change dramatically when we include other\n",
@@ -1464,13 +1464,13 @@
{
"cell_type": "code",
"execution_count": 20,
"id": "2bbcdd0c",
"id": "b6ebefa7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.323119Z",
"iopub.status.busy": "2023-08-07T00:19:35.323003Z",
"iopub.status.idle": "2023-08-07T00:19:35.362910Z",
"shell.execute_reply": "2023-08-07T00:19:35.362438Z"
"iopub.execute_input": "2023-08-21T02:30:06.283282Z",
"iopub.status.busy": "2023-08-21T02:30:06.283123Z",
"iopub.status.idle": "2023-08-21T02:30:06.327003Z",
"shell.execute_reply": "2023-08-21T02:30:06.326646Z"
}
},
"outputs": [
@@ -1573,7 +1573,7 @@
},
{
"cell_type": "markdown",
"id": "75bb8aa6",
"id": "d463e623",
"metadata": {},
"source": [
"We see that there are a number of statistically significant variables,\n",
@@ -1583,7 +1583,7 @@
},
{
"cell_type": "markdown",
"id": "bfe236e5",
"id": "a23c38e0",
"metadata": {},
"source": [
"## Call Center Data\n",
@@ -1608,13 +1608,13 @@
{
"cell_type": "code",
"execution_count": 21,
"id": "b8ece43a",
"id": "098f42ea",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.364905Z",
"iopub.status.busy": "2023-08-07T00:19:35.364772Z",
"iopub.status.idle": "2023-08-07T00:19:35.368589Z",
"shell.execute_reply": "2023-08-07T00:19:35.368291Z"
"iopub.execute_input": "2023-08-21T02:30:06.329058Z",
"iopub.status.busy": "2023-08-21T02:30:06.328927Z",
"iopub.status.idle": "2023-08-21T02:30:06.332782Z",
"shell.execute_reply": "2023-08-21T02:30:06.332425Z"
}
},
"outputs": [],
@@ -1637,7 +1637,7 @@
},
{
"cell_type": "markdown",
"id": "c93e44f3",
"id": "2f54ed03",
"metadata": {},
"source": [
"We then build a model matrix (omitting the intercept)"
@@ -1646,13 +1646,13 @@
{
"cell_type": "code",
"execution_count": 22,
"id": "3e4f766f",
"id": "26d5d0d0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.370485Z",
"iopub.status.busy": "2023-08-07T00:19:35.370371Z",
"iopub.status.idle": "2023-08-07T00:19:35.377790Z",
"shell.execute_reply": "2023-08-07T00:19:35.377469Z"
"iopub.execute_input": "2023-08-21T02:30:06.334692Z",
"iopub.status.busy": "2023-08-21T02:30:06.334589Z",
"iopub.status.idle": "2023-08-21T02:30:06.344047Z",
"shell.execute_reply": "2023-08-21T02:30:06.343708Z"
}
},
"outputs": [],
@@ -1666,7 +1666,7 @@
},
{
"cell_type": "markdown",
"id": "cad1ed19",
"id": "a1a8f65d",
"metadata": {},
"source": [
"It is worthwhile to take a peek at the model matrix `X`, so\n",
@@ -1678,13 +1678,13 @@
{
"cell_type": "code",
"execution_count": 23,
"id": "72f42d14",
"id": "77500663",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.380244Z",
"iopub.status.busy": "2023-08-07T00:19:35.380068Z",
"iopub.status.idle": "2023-08-07T00:19:35.384542Z",
"shell.execute_reply": "2023-08-07T00:19:35.384259Z"
"iopub.execute_input": "2023-08-21T02:30:06.345660Z",
"iopub.status.busy": "2023-08-21T02:30:06.345575Z",
"iopub.status.idle": "2023-08-21T02:30:06.350086Z",
"shell.execute_reply": "2023-08-21T02:30:06.349797Z"
}
},
"outputs": [
@@ -1781,7 +1781,7 @@
},
{
"cell_type": "markdown",
"id": "38c40ae1",
"id": "fd7bd61c",
"metadata": {},
"source": [
"Next, we specify the coefficients and the hazard function."
@@ -1790,13 +1790,13 @@
{
"cell_type": "code",
"execution_count": 24,
"id": "8b921536",
"id": "74324a56",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.386034Z",
"iopub.status.busy": "2023-08-07T00:19:35.385942Z",
"iopub.status.idle": "2023-08-07T00:19:35.420461Z",
"shell.execute_reply": "2023-08-07T00:19:35.405608Z"
"iopub.execute_input": "2023-08-21T02:30:06.351738Z",
"iopub.status.busy": "2023-08-21T02:30:06.351549Z",
"iopub.status.idle": "2023-08-21T02:30:06.444268Z",
"shell.execute_reply": "2023-08-21T02:30:06.441484Z"
}
},
"outputs": [],
@@ -1808,7 +1808,7 @@
},
{
"cell_type": "markdown",
"id": "a0698ffd",
"id": "cfe879e6",
"metadata": {},
"source": [
"Here, we have set the coefficient associated with `Operators` to\n",
@@ -1837,13 +1837,13 @@
{
"cell_type": "code",
"execution_count": 25,
"id": "96ce0f99",
"id": "d4be10c2",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.484657Z",
"iopub.status.busy": "2023-08-07T00:19:35.484150Z",
"iopub.status.idle": "2023-08-07T00:19:35.509286Z",
"shell.execute_reply": "2023-08-07T00:19:35.508169Z"
"iopub.execute_input": "2023-08-21T02:30:06.449822Z",
"iopub.status.busy": "2023-08-21T02:30:06.449515Z",
"iopub.status.idle": "2023-08-21T02:30:06.458388Z",
"shell.execute_reply": "2023-08-21T02:30:06.457673Z"
},
"lines_to_next_cell": 0
},
@@ -1854,7 +1854,7 @@
},
{
"cell_type": "markdown",
"id": "1956e4c2",
"id": "6095cfc1",
"metadata": {},
"source": [
"We are now ready to generate data under the Cox proportional hazards\n",
@@ -1868,13 +1868,13 @@
{
"cell_type": "code",
"execution_count": 26,
"id": "63d78ff9",
"id": "c98d396f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.522637Z",
"iopub.status.busy": "2023-08-07T00:19:35.522260Z",
"iopub.status.idle": "2023-08-07T00:19:35.630452Z",
"shell.execute_reply": "2023-08-07T00:19:35.627960Z"
"iopub.execute_input": "2023-08-21T02:30:06.461931Z",
"iopub.status.busy": "2023-08-21T02:30:06.461787Z",
"iopub.status.idle": "2023-08-21T02:30:06.624349Z",
"shell.execute_reply": "2023-08-21T02:30:06.624026Z"
}
},
"outputs": [],
@@ -1886,7 +1886,7 @@
},
{
"cell_type": "markdown",
"id": "035e4ecf",
"id": "ed2e23ea",
"metadata": {},
"source": [
"We now simulate our censoring variable, for which we assume\n",
@@ -1897,13 +1897,13 @@
{
"cell_type": "code",
"execution_count": 27,
"id": "fe008dbf",
"id": "caf627bc",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.635844Z",
"iopub.status.busy": "2023-08-07T00:19:35.635469Z",
"iopub.status.idle": "2023-08-07T00:19:35.649527Z",
"shell.execute_reply": "2023-08-07T00:19:35.646191Z"
"iopub.execute_input": "2023-08-21T02:30:06.626165Z",
"iopub.status.busy": "2023-08-21T02:30:06.626054Z",
"iopub.status.idle": "2023-08-21T02:30:06.630808Z",
"shell.execute_reply": "2023-08-21T02:30:06.630542Z"
},
"lines_to_next_cell": 2
},
@@ -2005,13 +2005,13 @@
{
"cell_type": "code",
"execution_count": 28,
"id": "c3a2bec7",
"id": "e63242f9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.653001Z",
"iopub.status.busy": "2023-08-07T00:19:35.652338Z",
"iopub.status.idle": "2023-08-07T00:19:35.656830Z",
"shell.execute_reply": "2023-08-07T00:19:35.656500Z"
"iopub.execute_input": "2023-08-21T02:30:06.632357Z",
"iopub.status.busy": "2023-08-21T02:30:06.632261Z",
"iopub.status.idle": "2023-08-21T02:30:06.634630Z",
"shell.execute_reply": "2023-08-21T02:30:06.634305Z"
}
},
"outputs": [
@@ -2032,7 +2032,7 @@
},
{
"cell_type": "markdown",
"id": "207937e5",
"id": "5f345011",
"metadata": {},
"source": [
"We now plot Kaplan-Meier survival curves. First, we stratify by `Center`."
@@ -2041,13 +2041,13 @@
{
"cell_type": "code",
"execution_count": 29,
"id": "2b27af56",
"id": "338db71d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.658421Z",
"iopub.status.busy": "2023-08-07T00:19:35.658328Z",
"iopub.status.idle": "2023-08-07T00:19:35.811796Z",
"shell.execute_reply": "2023-08-07T00:19:35.811449Z"
"iopub.execute_input": "2023-08-21T02:30:06.636188Z",
"iopub.status.busy": "2023-08-21T02:30:06.636081Z",
"iopub.status.idle": "2023-08-21T02:30:06.791856Z",
"shell.execute_reply": "2023-08-21T02:30:06.791521Z"
}
},
"outputs": [
@@ -2084,7 +2084,7 @@
},
{
"cell_type": "markdown",
"id": "be6d37f7",
"id": "1b5a1230",
"metadata": {},
"source": [
"Next, we stratify by `Time`."
@@ -2093,13 +2093,13 @@
{
"cell_type": "code",
"execution_count": 30,
"id": "9625598d",
"id": "c1db6e15",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:35.813696Z",
"iopub.status.busy": "2023-08-07T00:19:35.813601Z",
"iopub.status.idle": "2023-08-07T00:19:36.041021Z",
"shell.execute_reply": "2023-08-07T00:19:36.040708Z"
"iopub.execute_input": "2023-08-21T02:30:06.793629Z",
"iopub.status.busy": "2023-08-21T02:30:06.793538Z",
"iopub.status.idle": "2023-08-21T02:30:06.992155Z",
"shell.execute_reply": "2023-08-21T02:30:06.991803Z"
}
},
"outputs": [
@@ -2136,7 +2136,7 @@
},
{
"cell_type": "markdown",
"id": "1408ebc0",
"id": "deb73d38",
"metadata": {},
"source": [
"It seems that calls at Call Center B take longer to be answered than\n",
@@ -2149,13 +2149,13 @@
{
"cell_type": "code",
"execution_count": 31,
"id": "75a744ef",
"id": "02ea4212",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:36.043079Z",
"iopub.status.busy": "2023-08-07T00:19:36.042900Z",
"iopub.status.idle": "2023-08-07T00:19:36.061936Z",
"shell.execute_reply": "2023-08-07T00:19:36.061630Z"
"iopub.execute_input": "2023-08-21T02:30:06.993929Z",
"iopub.status.busy": "2023-08-21T02:30:06.993819Z",
"iopub.status.idle": "2023-08-21T02:30:07.011557Z",
"shell.execute_reply": "2023-08-21T02:30:07.011276Z"
},
"lines_to_next_cell": 2
},
@@ -2247,7 +2247,7 @@
},
{
"cell_type": "markdown",
"id": "be5055e4",
"id": "db9cc6ee",
"metadata": {},
"source": [
"Next, we consider the effect of `Time`."
@@ -2256,13 +2256,13 @@
{
"cell_type": "code",
"execution_count": 32,
"id": "9badb3e3",
"id": "0ac610d5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:36.063627Z",
"iopub.status.busy": "2023-08-07T00:19:36.063519Z",
"iopub.status.idle": "2023-08-07T00:19:36.082451Z",
"shell.execute_reply": "2023-08-07T00:19:36.082161Z"
"iopub.execute_input": "2023-08-21T02:30:07.013331Z",
"iopub.status.busy": "2023-08-21T02:30:07.013187Z",
"iopub.status.idle": "2023-08-21T02:30:07.030401Z",
"shell.execute_reply": "2023-08-21T02:30:07.030073Z"
},
"lines_to_next_cell": 2
},
@@ -2354,7 +2354,7 @@
},
{
"cell_type": "markdown",
"id": "64b2bc33",
"id": "0946d3ef",
"metadata": {},
"source": [
"As in the case of a categorical variable with 2 levels, these\n",
@@ -2366,13 +2366,13 @@
{
"cell_type": "code",
"execution_count": 33,
"id": "026e9ff8",
"id": "107cedad",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:36.084076Z",
"iopub.status.busy": "2023-08-07T00:19:36.083964Z",
"iopub.status.idle": "2023-08-07T00:19:36.208409Z",
"shell.execute_reply": "2023-08-07T00:19:36.208076Z"
"iopub.execute_input": "2023-08-21T02:30:07.032008Z",
"iopub.status.busy": "2023-08-21T02:30:07.031887Z",
"iopub.status.idle": "2023-08-21T02:30:07.160931Z",
"shell.execute_reply": "2023-08-21T02:30:07.160639Z"
},
"lines_to_next_cell": 2
},
@@ -2462,7 +2462,7 @@
},
{
"cell_type": "markdown",
"id": "4ed54fe0",
"id": "10f2a0c1",
"metadata": {},
"source": [
"Next, we look at the results for `Time`."
@@ -2471,13 +2471,13 @@
{
"cell_type": "code",
"execution_count": 34,
"id": "7cab3789",
"id": "334eb331",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:36.210101Z",
"iopub.status.busy": "2023-08-07T00:19:36.209985Z",
"iopub.status.idle": "2023-08-07T00:19:36.334146Z",
"shell.execute_reply": "2023-08-07T00:19:36.333801Z"
"iopub.execute_input": "2023-08-21T02:30:07.162793Z",
"iopub.status.busy": "2023-08-21T02:30:07.162651Z",
"iopub.status.idle": "2023-08-21T02:30:07.291875Z",
"shell.execute_reply": "2023-08-21T02:30:07.291550Z"
},
"lines_to_next_cell": 2
},
@@ -2567,7 +2567,7 @@
},
{
"cell_type": "markdown",
"id": "2d250dc9",
"id": "774963d4",
"metadata": {},
"source": [
"We find that differences between centers are highly significant, as\n",
@@ -2579,13 +2579,13 @@
{
"cell_type": "code",
"execution_count": 35,
"id": "5cc4b898",
"id": "421811c5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:36.336025Z",
"iopub.status.busy": "2023-08-07T00:19:36.335898Z",
"iopub.status.idle": "2023-08-07T00:19:36.561174Z",
"shell.execute_reply": "2023-08-07T00:19:36.559597Z"
"iopub.execute_input": "2023-08-21T02:30:07.293545Z",
"iopub.status.busy": "2023-08-21T02:30:07.293433Z",
"iopub.status.idle": "2023-08-21T02:30:07.532213Z",
"shell.execute_reply": "2023-08-21T02:30:07.531293Z"
},
"lines_to_next_cell": 2
},
@@ -2684,7 +2684,7 @@
},
{
"cell_type": "markdown",
"id": "bec9d61d",
"id": "3c65063f",
"metadata": {},
"source": [
"The $p$-values for Center B and evening time\n",

View File

@@ -14,7 +14,7 @@ jupyter:
# Chapter 12
# Lab: Unsupervised Learning
# Lab: Unsupervised Learning
In this lab we demonstrate PCA and clustering on several datasets.
As in other labs, we import some of our libraries at this top
level. This makes the code more readable, as scanning the first few
@@ -819,7 +819,7 @@ linkage. Clearly cell lines within a single cancer type do tend to
cluster together, although the clustering is not perfect. We will use
complete linkage hierarchical clustering for the analysis that
follows.
We can cut the dendrogram at the height that will yield a particular
number of clusters, say four:

File diff suppressed because it is too large Load Diff

View File

@@ -121,7 +121,7 @@ amounts to quite a weak signal, and it resulted in a high number of
Type II errors. Lets instead simulate data with a stronger signal,
so that the ratio of the mean to the standard deviation for the false
null hypotheses equals $1$. We make only 10 Type II errors.
```{python}
true_mean = np.array([1]*50 + [0]*50)
@@ -244,6 +244,7 @@ mult_test(fund_mini_pvals, method = "holm", alpha=0.05)[:2]
As discussed previously, Manager One seems to perform particularly
well, whereas Manager Two has poor performance.
```{python}
fund_mini.mean()
@@ -575,5 +576,5 @@ ax.set_xlabel("Number of Rejections")
ax.set_ylabel("False Discovery Rate");
```

View File

@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "75b2d75c",
"id": "687e9b80",
"metadata": {},
"source": [
"\n",
@@ -14,7 +14,7 @@
},
{
"cell_type": "markdown",
"id": "34e410a6",
"id": "9f594a41",
"metadata": {},
"source": [
"We include our usual imports seen in earlier labs."
@@ -23,13 +23,13 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "1f928b2d",
"id": "7cc4fbeb",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:48.489971Z",
"iopub.status.busy": "2023-08-07T00:19:48.489727Z",
"iopub.status.idle": "2023-08-07T00:19:50.216508Z",
"shell.execute_reply": "2023-08-07T00:19:50.215573Z"
"iopub.execute_input": "2023-08-21T02:29:16.417394Z",
"iopub.status.busy": "2023-08-21T02:29:16.417287Z",
"iopub.status.idle": "2023-08-21T02:29:17.613483Z",
"shell.execute_reply": "2023-08-21T02:29:17.613156Z"
}
},
"outputs": [],
@@ -43,7 +43,7 @@
},
{
"cell_type": "markdown",
"id": "12319e0a",
"id": "08ba7bed",
"metadata": {},
"source": [
"We also collect the new imports\n",
@@ -53,13 +53,13 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "eb4b32aa",
"id": "595efc18",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.219989Z",
"iopub.status.busy": "2023-08-07T00:19:50.219429Z",
"iopub.status.idle": "2023-08-07T00:19:50.223239Z",
"shell.execute_reply": "2023-08-07T00:19:50.222392Z"
"iopub.execute_input": "2023-08-21T02:29:17.615551Z",
"iopub.status.busy": "2023-08-21T02:29:17.615375Z",
"iopub.status.idle": "2023-08-21T02:29:17.617379Z",
"shell.execute_reply": "2023-08-21T02:29:17.617087Z"
},
"lines_to_next_cell": 2
},
@@ -78,7 +78,7 @@
},
{
"cell_type": "markdown",
"id": "a2747e58",
"id": "69e5023e",
"metadata": {},
"source": [
"## Review of Hypothesis Tests\n",
@@ -92,13 +92,13 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "e12ac0cd",
"id": "985d1d6e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.225454Z",
"iopub.status.busy": "2023-08-07T00:19:50.225335Z",
"iopub.status.idle": "2023-08-07T00:19:50.228651Z",
"shell.execute_reply": "2023-08-07T00:19:50.228301Z"
"iopub.execute_input": "2023-08-21T02:29:17.618995Z",
"iopub.status.busy": "2023-08-21T02:29:17.618887Z",
"iopub.status.idle": "2023-08-21T02:29:17.620921Z",
"shell.execute_reply": "2023-08-21T02:29:17.620629Z"
}
},
"outputs": [],
@@ -111,7 +111,7 @@
},
{
"cell_type": "markdown",
"id": "70d37233",
"id": "9ae561c4",
"metadata": {},
"source": [
"To begin, we use `ttest_1samp()` from the\n",
@@ -122,13 +122,13 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "04d0f49e",
"id": "753d612a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.230854Z",
"iopub.status.busy": "2023-08-07T00:19:50.230727Z",
"iopub.status.idle": "2023-08-07T00:19:50.236745Z",
"shell.execute_reply": "2023-08-07T00:19:50.236388Z"
"iopub.execute_input": "2023-08-21T02:29:17.622537Z",
"iopub.status.busy": "2023-08-21T02:29:17.622429Z",
"iopub.status.idle": "2023-08-21T02:29:17.626063Z",
"shell.execute_reply": "2023-08-21T02:29:17.625801Z"
}
},
"outputs": [
@@ -150,7 +150,7 @@
},
{
"cell_type": "markdown",
"id": "cf83426f",
"id": "5d9dc17f",
"metadata": {},
"source": [
"The $p$-value comes out to 0.931, which is not low enough to\n",
@@ -169,13 +169,13 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "d1f0c695",
"id": "facd6569",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.238822Z",
"iopub.status.busy": "2023-08-07T00:19:50.238685Z",
"iopub.status.idle": "2023-08-07T00:19:50.263876Z",
"shell.execute_reply": "2023-08-07T00:19:50.263508Z"
"iopub.execute_input": "2023-08-21T02:29:17.627714Z",
"iopub.status.busy": "2023-08-21T02:29:17.627617Z",
"iopub.status.idle": "2023-08-21T02:29:17.651726Z",
"shell.execute_reply": "2023-08-21T02:29:17.651448Z"
},
"lines_to_next_cell": 0
},
@@ -195,7 +195,7 @@
},
{
"cell_type": "markdown",
"id": "3d8e0d96",
"id": "4094daa7",
"metadata": {},
"source": [
"Since this is a simulated data set, we can create a $2 \\times 2$ table\n",
@@ -205,13 +205,13 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "7a9594a0",
"id": "e89ef3eb",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.266708Z",
"iopub.status.busy": "2023-08-07T00:19:50.266387Z",
"iopub.status.idle": "2023-08-07T00:19:50.280831Z",
"shell.execute_reply": "2023-08-07T00:19:50.280194Z"
"iopub.execute_input": "2023-08-21T02:29:17.653344Z",
"iopub.status.busy": "2023-08-21T02:29:17.653256Z",
"iopub.status.idle": "2023-08-21T02:29:17.662644Z",
"shell.execute_reply": "2023-08-21T02:29:17.662346Z"
},
"lines_to_next_cell": 0
},
@@ -282,7 +282,7 @@
},
{
"cell_type": "markdown",
"id": "9610c817",
"id": "a10ba423",
"metadata": {},
"source": [
"Therefore, at level $\\alpha=0.05$, we reject 15 of the 50 false\n",
@@ -299,19 +299,20 @@
"amounts to quite a weak signal, and it resulted in a high number of\n",
"Type II errors. Lets instead simulate data with a stronger signal,\n",
"so that the ratio of the mean to the standard deviation for the false\n",
"null hypotheses equals $1$. We make only 10 Type II errors.\n"
"null hypotheses equals $1$. We make only 10 Type II errors.\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "25f7fc5d",
"id": "ae184aaf",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.283336Z",
"iopub.status.busy": "2023-08-07T00:19:50.283198Z",
"iopub.status.idle": "2023-08-07T00:19:50.317664Z",
"shell.execute_reply": "2023-08-07T00:19:50.317356Z"
"iopub.execute_input": "2023-08-21T02:29:17.664327Z",
"iopub.status.busy": "2023-08-21T02:29:17.664213Z",
"iopub.status.idle": "2023-08-21T02:29:17.690928Z",
"shell.execute_reply": "2023-08-21T02:29:17.690657Z"
},
"lines_to_next_cell": 0
},
@@ -394,7 +395,7 @@
},
{
"cell_type": "markdown",
"id": "bb70c597",
"id": "7ca15d3f",
"metadata": {},
"source": [
" "
@@ -402,7 +403,7 @@
},
{
"cell_type": "markdown",
"id": "f6953d33",
"id": "9e9f5977",
"metadata": {},
"source": [
"## Family-Wise Error Rate\n",
@@ -417,13 +418,13 @@
{
"cell_type": "code",
"execution_count": 8,
"id": "369b5bd3",
"id": "0295fe68",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.319399Z",
"iopub.status.busy": "2023-08-07T00:19:50.319268Z",
"iopub.status.idle": "2023-08-07T00:19:50.674219Z",
"shell.execute_reply": "2023-08-07T00:19:50.673887Z"
"iopub.execute_input": "2023-08-21T02:29:17.692568Z",
"iopub.status.busy": "2023-08-21T02:29:17.692459Z",
"iopub.status.idle": "2023-08-21T02:29:17.899403Z",
"shell.execute_reply": "2023-08-21T02:29:17.899081Z"
}
},
"outputs": [
@@ -454,7 +455,7 @@
},
{
"cell_type": "markdown",
"id": "3a81479e",
"id": "fecaca9e",
"metadata": {},
"source": [
"As discussed previously, even for moderate values of $m$ such as $50$,\n",
@@ -473,13 +474,13 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "9ce7a19f",
"id": "406e59a8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.676029Z",
"iopub.status.busy": "2023-08-07T00:19:50.675909Z",
"iopub.status.idle": "2023-08-07T00:19:50.715451Z",
"shell.execute_reply": "2023-08-07T00:19:50.715145Z"
"iopub.execute_input": "2023-08-21T02:29:17.901146Z",
"iopub.status.busy": "2023-08-21T02:29:17.901041Z",
"iopub.status.idle": "2023-08-21T02:29:17.939312Z",
"shell.execute_reply": "2023-08-21T02:29:17.939019Z"
}
},
"outputs": [
@@ -505,7 +506,7 @@
},
{
"cell_type": "markdown",
"id": "7561e3a3",
"id": "87bab88b",
"metadata": {},
"source": [
"The $p$-values are low for Managers One and Three, and high for the\n",
@@ -530,7 +531,7 @@
},
{
"cell_type": "markdown",
"id": "5b608e46",
"id": "d0c7a2a0",
"metadata": {},
"source": [
"The `mult_test()` function takes $p$-values and a `method` argument, as well as an optional\n",
@@ -541,13 +542,13 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "de6cffed",
"id": "d4f6a247",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.717476Z",
"iopub.status.busy": "2023-08-07T00:19:50.717258Z",
"iopub.status.idle": "2023-08-07T00:19:50.719841Z",
"shell.execute_reply": "2023-08-07T00:19:50.719577Z"
"iopub.execute_input": "2023-08-21T02:29:17.941032Z",
"iopub.status.busy": "2023-08-21T02:29:17.940919Z",
"iopub.status.idle": "2023-08-21T02:29:17.943369Z",
"shell.execute_reply": "2023-08-21T02:29:17.943081Z"
},
"lines_to_next_cell": 2
},
@@ -570,7 +571,7 @@
},
{
"cell_type": "markdown",
"id": "5135c6b9",
"id": "4d5bc7e7",
"metadata": {},
"source": [
"The $p$-values `bonf` are simply the `fund_mini_pvalues` multiplied by 5 and truncated to be less than\n",
@@ -580,13 +581,13 @@
{
"cell_type": "code",
"execution_count": 11,
"id": "0de71500",
"id": "01a29d71",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.721450Z",
"iopub.status.busy": "2023-08-07T00:19:50.721342Z",
"iopub.status.idle": "2023-08-07T00:19:50.723962Z",
"shell.execute_reply": "2023-08-07T00:19:50.723691Z"
"iopub.execute_input": "2023-08-21T02:29:17.944859Z",
"iopub.status.busy": "2023-08-21T02:29:17.944760Z",
"iopub.status.idle": "2023-08-21T02:29:17.946888Z",
"shell.execute_reply": "2023-08-21T02:29:17.946639Z"
}
},
"outputs": [
@@ -608,7 +609,7 @@
},
{
"cell_type": "markdown",
"id": "1f0bc112",
"id": "e68c9051",
"metadata": {},
"source": [
"Therefore, using Bonferronis method, we are able to reject the null hypothesis only for Manager\n",
@@ -622,13 +623,13 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "f7e87bdb",
"id": "95454eb4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.725450Z",
"iopub.status.busy": "2023-08-07T00:19:50.725349Z",
"iopub.status.idle": "2023-08-07T00:19:50.777055Z",
"shell.execute_reply": "2023-08-07T00:19:50.776612Z"
"iopub.execute_input": "2023-08-21T02:29:17.948474Z",
"iopub.status.busy": "2023-08-21T02:29:17.948372Z",
"iopub.status.idle": "2023-08-21T02:29:17.990740Z",
"shell.execute_reply": "2023-08-21T02:29:17.990464Z"
},
"lines_to_next_cell": 2
},
@@ -651,23 +652,24 @@
},
{
"cell_type": "markdown",
"id": "f762fecd",
"id": "587b5b48",
"metadata": {},
"source": [
"As discussed previously, Manager One seems to perform particularly\n",
"well, whereas Manager Two has poor performance."
"well, whereas Manager Two has poor performance.\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "e88be376",
"id": "1f1ac764",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.779763Z",
"iopub.status.busy": "2023-08-07T00:19:50.779554Z",
"iopub.status.idle": "2023-08-07T00:19:50.783798Z",
"shell.execute_reply": "2023-08-07T00:19:50.783418Z"
"iopub.execute_input": "2023-08-21T02:29:17.992261Z",
"iopub.status.busy": "2023-08-21T02:29:17.992149Z",
"iopub.status.idle": "2023-08-21T02:29:17.995141Z",
"shell.execute_reply": "2023-08-21T02:29:17.994894Z"
},
"lines_to_next_cell": 2
},
@@ -694,7 +696,7 @@
},
{
"cell_type": "markdown",
"id": "88dbf0a6",
"id": "e3c4b716",
"metadata": {},
"source": [
"Is there evidence of a meaningful difference in performance between\n",
@@ -705,13 +707,13 @@
{
"cell_type": "code",
"execution_count": 14,
"id": "41149af6",
"id": "298d975d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.786752Z",
"iopub.status.busy": "2023-08-07T00:19:50.786580Z",
"iopub.status.idle": "2023-08-07T00:19:50.791095Z",
"shell.execute_reply": "2023-08-07T00:19:50.790607Z"
"iopub.execute_input": "2023-08-21T02:29:17.996686Z",
"iopub.status.busy": "2023-08-21T02:29:17.996590Z",
"iopub.status.idle": "2023-08-21T02:29:17.999332Z",
"shell.execute_reply": "2023-08-21T02:29:17.999076Z"
}
},
"outputs": [
@@ -733,7 +735,7 @@
},
{
"cell_type": "markdown",
"id": "1aca6122",
"id": "3908d7d2",
"metadata": {},
"source": [
"The test results in a $p$-value of 0.038,\n",
@@ -757,13 +759,13 @@
{
"cell_type": "code",
"execution_count": 15,
"id": "61aabda7",
"id": "be117713",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:50.793577Z",
"iopub.status.busy": "2023-08-07T00:19:50.793394Z",
"iopub.status.idle": "2023-08-07T00:19:51.318999Z",
"shell.execute_reply": "2023-08-07T00:19:51.318071Z"
"iopub.execute_input": "2023-08-21T02:29:18.000853Z",
"iopub.status.busy": "2023-08-21T02:29:18.000747Z",
"iopub.status.idle": "2023-08-21T02:29:18.487357Z",
"shell.execute_reply": "2023-08-21T02:29:18.487078Z"
},
"lines_to_next_cell": 2
},
@@ -799,7 +801,7 @@
},
{
"cell_type": "markdown",
"id": "e0084fc5",
"id": "0fdf963f",
"metadata": {},
"source": [
"The `pairwise_tukeyhsd()` function provides confidence intervals\n",
@@ -817,13 +819,13 @@
{
"cell_type": "code",
"execution_count": 16,
"id": "cbcad4de",
"id": "537c4ea8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:51.321200Z",
"iopub.status.busy": "2023-08-07T00:19:51.321046Z",
"iopub.status.idle": "2023-08-07T00:19:51.411142Z",
"shell.execute_reply": "2023-08-07T00:19:51.410682Z"
"iopub.execute_input": "2023-08-21T02:29:18.489069Z",
"iopub.status.busy": "2023-08-21T02:29:18.488949Z",
"iopub.status.idle": "2023-08-21T02:29:18.570869Z",
"shell.execute_reply": "2023-08-21T02:29:18.570427Z"
}
},
"outputs": [
@@ -845,7 +847,7 @@
},
{
"cell_type": "markdown",
"id": "6278d13c",
"id": "8590f246",
"metadata": {},
"source": [
"## False Discovery Rate\n",
@@ -858,13 +860,13 @@
{
"cell_type": "code",
"execution_count": 17,
"id": "b5842190",
"id": "2c88ec87",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:51.413331Z",
"iopub.status.busy": "2023-08-07T00:19:51.413176Z",
"iopub.status.idle": "2023-08-07T00:19:51.848427Z",
"shell.execute_reply": "2023-08-07T00:19:51.847956Z"
"iopub.execute_input": "2023-08-21T02:29:18.572454Z",
"iopub.status.busy": "2023-08-21T02:29:18.572341Z",
"iopub.status.idle": "2023-08-21T02:29:19.005707Z",
"shell.execute_reply": "2023-08-21T02:29:19.005387Z"
}
},
"outputs": [],
@@ -876,7 +878,7 @@
},
{
"cell_type": "markdown",
"id": "80fc2fcc",
"id": "80e77fab",
"metadata": {},
"source": [
"There are far too many managers to consider trying to control the FWER.\n",
@@ -887,13 +889,13 @@
{
"cell_type": "code",
"execution_count": 18,
"id": "7c9d8bed",
"id": "b6d56819",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:51.850663Z",
"iopub.status.busy": "2023-08-07T00:19:51.850523Z",
"iopub.status.idle": "2023-08-07T00:19:51.854777Z",
"shell.execute_reply": "2023-08-07T00:19:51.854196Z"
"iopub.execute_input": "2023-08-21T02:29:19.007847Z",
"iopub.status.busy": "2023-08-21T02:29:19.007564Z",
"iopub.status.idle": "2023-08-21T02:29:19.010742Z",
"shell.execute_reply": "2023-08-21T02:29:19.010371Z"
}
},
"outputs": [
@@ -916,7 +918,7 @@
},
{
"cell_type": "markdown",
"id": "4f73096d",
"id": "b4662444",
"metadata": {},
"source": [
"The *q-values* output by the\n",
@@ -932,13 +934,13 @@
{
"cell_type": "code",
"execution_count": 19,
"id": "bfa39f7c",
"id": "b00da3a1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:51.856795Z",
"iopub.status.busy": "2023-08-07T00:19:51.856678Z",
"iopub.status.idle": "2023-08-07T00:19:51.859719Z",
"shell.execute_reply": "2023-08-07T00:19:51.859327Z"
"iopub.execute_input": "2023-08-21T02:29:19.012400Z",
"iopub.status.busy": "2023-08-21T02:29:19.012298Z",
"iopub.status.idle": "2023-08-21T02:29:19.015314Z",
"shell.execute_reply": "2023-08-21T02:29:19.014978Z"
},
"lines_to_next_cell": 0
},
@@ -960,7 +962,7 @@
},
{
"cell_type": "markdown",
"id": "ccb44c8d",
"id": "fdccb808",
"metadata": {},
"source": [
"We find that 146 of the 2,000 fund managers have a $q$-value below\n",
@@ -976,13 +978,13 @@
{
"cell_type": "code",
"execution_count": 20,
"id": "70b69b47",
"id": "1c230117",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:51.861924Z",
"iopub.status.busy": "2023-08-07T00:19:51.861522Z",
"iopub.status.idle": "2023-08-07T00:19:51.864394Z",
"shell.execute_reply": "2023-08-07T00:19:51.863987Z"
"iopub.execute_input": "2023-08-21T02:29:19.016857Z",
"iopub.status.busy": "2023-08-21T02:29:19.016769Z",
"iopub.status.idle": "2023-08-21T02:29:19.019332Z",
"shell.execute_reply": "2023-08-21T02:29:19.019032Z"
},
"lines_to_next_cell": 2
},
@@ -1004,7 +1006,7 @@
},
{
"cell_type": "markdown",
"id": "c8a969f4",
"id": "6112239d",
"metadata": {},
"source": [
"Figure 13.6 displays the ordered\n",
@@ -1026,13 +1028,13 @@
{
"cell_type": "code",
"execution_count": 21,
"id": "4c0ddea1",
"id": "62289650",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:51.865978Z",
"iopub.status.busy": "2023-08-07T00:19:51.865869Z",
"iopub.status.idle": "2023-08-07T00:19:51.868792Z",
"shell.execute_reply": "2023-08-07T00:19:51.868357Z"
"iopub.execute_input": "2023-08-21T02:29:19.021112Z",
"iopub.status.busy": "2023-08-21T02:29:19.020904Z",
"iopub.status.idle": "2023-08-21T02:29:19.023622Z",
"shell.execute_reply": "2023-08-21T02:29:19.023338Z"
}
},
"outputs": [],
@@ -1051,7 +1053,7 @@
},
{
"cell_type": "markdown",
"id": "ddeb3900",
"id": "c36b13b7",
"metadata": {},
"source": [
"We now reproduce the middle panel of Figure 13.6."
@@ -1060,13 +1062,13 @@
{
"cell_type": "code",
"execution_count": 22,
"id": "0314eac9",
"id": "18b3c0ed",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:51.871473Z",
"iopub.status.busy": "2023-08-07T00:19:51.871214Z",
"iopub.status.idle": "2023-08-07T00:19:52.126671Z",
"shell.execute_reply": "2023-08-07T00:19:52.126261Z"
"iopub.execute_input": "2023-08-21T02:29:19.025191Z",
"iopub.status.busy": "2023-08-21T02:29:19.025074Z",
"iopub.status.idle": "2023-08-21T02:29:19.262207Z",
"shell.execute_reply": "2023-08-21T02:29:19.261823Z"
},
"lines_to_next_cell": 2
},
@@ -1096,7 +1098,7 @@
},
{
"cell_type": "markdown",
"id": "83416f4a",
"id": "d87198e4",
"metadata": {},
"source": [
"## A Re-Sampling Approach\n",
@@ -1110,13 +1112,13 @@
{
"cell_type": "code",
"execution_count": 23,
"id": "b59b8137",
"id": "eb79e606",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:52.129167Z",
"iopub.status.busy": "2023-08-07T00:19:52.128842Z",
"iopub.status.idle": "2023-08-07T00:19:52.208320Z",
"shell.execute_reply": "2023-08-07T00:19:52.207936Z"
"iopub.execute_input": "2023-08-21T02:29:19.264174Z",
"iopub.status.busy": "2023-08-21T02:29:19.264030Z",
"iopub.status.idle": "2023-08-21T02:29:19.339232Z",
"shell.execute_reply": "2023-08-21T02:29:19.338912Z"
},
"lines_to_next_cell": 2
},
@@ -1145,7 +1147,7 @@
},
{
"cell_type": "markdown",
"id": "5534c8d4",
"id": "659ee2b8",
"metadata": {},
"source": [
"There are four classes of cancer. For each gene, we compare the mean\n",
@@ -1161,13 +1163,13 @@
{
"cell_type": "code",
"execution_count": 24,
"id": "96fb2f61",
"id": "1afbcf47",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:52.210235Z",
"iopub.status.busy": "2023-08-07T00:19:52.210101Z",
"iopub.status.idle": "2023-08-07T00:19:52.215004Z",
"shell.execute_reply": "2023-08-07T00:19:52.214604Z"
"iopub.execute_input": "2023-08-21T02:29:19.341009Z",
"iopub.status.busy": "2023-08-21T02:29:19.340889Z",
"iopub.status.idle": "2023-08-21T02:29:19.344670Z",
"shell.execute_reply": "2023-08-21T02:29:19.344391Z"
},
"lines_to_next_cell": 2
},
@@ -1195,7 +1197,7 @@
},
{
"cell_type": "markdown",
"id": "3131124e",
"id": "61f24919",
"metadata": {},
"source": [
"However, this $p$-value relies on the assumption that under the null\n",
@@ -1214,13 +1216,13 @@
{
"cell_type": "code",
"execution_count": 25,
"id": "fdc229fa",
"id": "f73f4c6d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:52.217206Z",
"iopub.status.busy": "2023-08-07T00:19:52.217085Z",
"iopub.status.idle": "2023-08-07T00:19:54.776066Z",
"shell.execute_reply": "2023-08-07T00:19:54.775642Z"
"iopub.execute_input": "2023-08-21T02:29:19.346368Z",
"iopub.status.busy": "2023-08-21T02:29:19.346227Z",
"iopub.status.idle": "2023-08-21T02:29:21.776569Z",
"shell.execute_reply": "2023-08-21T02:29:21.776267Z"
},
"lines_to_next_cell": 2
},
@@ -1253,7 +1255,7 @@
},
{
"cell_type": "markdown",
"id": "c7fc4557",
"id": "a97f74af",
"metadata": {},
"source": [
"This fraction, 0.0398,\n",
@@ -1265,13 +1267,13 @@
{
"cell_type": "code",
"execution_count": 26,
"id": "e3894695",
"id": "062daf19",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:54.778563Z",
"iopub.status.busy": "2023-08-07T00:19:54.778388Z",
"iopub.status.idle": "2023-08-07T00:19:55.017161Z",
"shell.execute_reply": "2023-08-07T00:19:55.016821Z"
"iopub.execute_input": "2023-08-21T02:29:21.778366Z",
"iopub.status.busy": "2023-08-21T02:29:21.778242Z",
"iopub.status.idle": "2023-08-21T02:29:21.990476Z",
"shell.execute_reply": "2023-08-21T02:29:21.989965Z"
},
"lines_to_next_cell": 0
},
@@ -1307,7 +1309,7 @@
},
{
"cell_type": "markdown",
"id": "3bd21158",
"id": "e81b939b",
"metadata": {},
"source": [
"The re-sampling-based null distribution is almost identical to the theoretical null distribution, which is displayed in red.\n",
@@ -1325,13 +1327,13 @@
{
"cell_type": "code",
"execution_count": 27,
"id": "3b7392cb",
"id": "6d14fcad",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:19:55.019036Z",
"iopub.status.busy": "2023-08-07T00:19:55.018920Z",
"iopub.status.idle": "2023-08-07T00:39:19.291005Z",
"shell.execute_reply": "2023-08-07T00:39:19.287314Z"
"iopub.execute_input": "2023-08-21T02:29:21.992665Z",
"iopub.status.busy": "2023-08-21T02:29:21.992515Z",
"iopub.status.idle": "2023-08-21T02:34:05.930300Z",
"shell.execute_reply": "2023-08-21T02:34:05.929181Z"
}
},
"outputs": [],
@@ -1358,7 +1360,7 @@
},
{
"cell_type": "markdown",
"id": "1b92df1b",
"id": "06286699",
"metadata": {},
"source": [
"Next, we compute the number of rejected null hypotheses $R$, the\n",
@@ -1371,13 +1373,13 @@
{
"cell_type": "code",
"execution_count": 28,
"id": "cac15616",
"id": "8f0ec909",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:39:19.314420Z",
"iopub.status.busy": "2023-08-07T00:39:19.313452Z",
"iopub.status.idle": "2023-08-07T00:39:19.544251Z",
"shell.execute_reply": "2023-08-07T00:39:19.543932Z"
"iopub.execute_input": "2023-08-21T02:34:05.935513Z",
"iopub.status.busy": "2023-08-21T02:34:05.935323Z",
"iopub.status.idle": "2023-08-21T02:34:06.118079Z",
"shell.execute_reply": "2023-08-21T02:34:06.117633Z"
}
},
"outputs": [],
@@ -1394,7 +1396,7 @@
},
{
"cell_type": "markdown",
"id": "f6779ea0",
"id": "e26b64c6",
"metadata": {},
"source": [
"Now, for any given FDR, we can find the genes that will be\n",
@@ -1412,13 +1414,13 @@
{
"cell_type": "code",
"execution_count": 29,
"id": "9661eb10",
"id": "f11339e5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:39:19.546693Z",
"iopub.status.busy": "2023-08-07T00:39:19.546543Z",
"iopub.status.idle": "2023-08-07T00:39:19.549970Z",
"shell.execute_reply": "2023-08-07T00:39:19.549697Z"
"iopub.execute_input": "2023-08-21T02:34:06.120138Z",
"iopub.status.busy": "2023-08-21T02:34:06.119994Z",
"iopub.status.idle": "2023-08-21T02:34:06.123846Z",
"shell.execute_reply": "2023-08-21T02:34:06.123478Z"
}
},
"outputs": [
@@ -1457,7 +1459,7 @@
},
{
"cell_type": "markdown",
"id": "001e3fc1",
"id": "e145621b",
"metadata": {},
"source": [
"At an FDR threshold of 0.2, more genes are selected, at the cost of having a higher expected\n",
@@ -1467,13 +1469,13 @@
{
"cell_type": "code",
"execution_count": 30,
"id": "18ad4900",
"id": "d2600773",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:39:19.552090Z",
"iopub.status.busy": "2023-08-07T00:39:19.552004Z",
"iopub.status.idle": "2023-08-07T00:39:19.554743Z",
"shell.execute_reply": "2023-08-07T00:39:19.554473Z"
"iopub.execute_input": "2023-08-21T02:34:06.126460Z",
"iopub.status.busy": "2023-08-21T02:34:06.126346Z",
"iopub.status.idle": "2023-08-21T02:34:06.129561Z",
"shell.execute_reply": "2023-08-21T02:34:06.129124Z"
}
},
"outputs": [
@@ -1524,7 +1526,7 @@
},
{
"cell_type": "markdown",
"id": "8767f70c",
"id": "32e600ff",
"metadata": {},
"source": [
"The next line generates Figure 13.11, which is similar\n",
@@ -1535,13 +1537,13 @@
{
"cell_type": "code",
"execution_count": 31,
"id": "28c276b6",
"id": "924b7705",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-07T00:39:19.556715Z",
"iopub.status.busy": "2023-08-07T00:39:19.556515Z",
"iopub.status.idle": "2023-08-07T00:39:19.650514Z",
"shell.execute_reply": "2023-08-07T00:39:19.650181Z"
"iopub.execute_input": "2023-08-21T02:34:06.131323Z",
"iopub.status.busy": "2023-08-21T02:34:06.131207Z",
"iopub.status.idle": "2023-08-21T02:34:06.216626Z",
"shell.execute_reply": "2023-08-21T02:34:06.216270Z"
},
"lines_to_next_cell": 0
},
@@ -1566,10 +1568,10 @@
},
{
"cell_type": "markdown",
"id": "e4b5d621",
"id": "b9f54695",
"metadata": {},
"source": [
"\n"
" \n"
]
}
],

View File

@@ -1,25 +1,23 @@
# ISLP_labs
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/intro-stat-learning/ISLP_labs/v2)
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/intro-stat-learning/ISLP_labs/v2.1)
Up-to-date version of labs for ISLP.
This repo will track labs for ISLP as their source code changes. The
intent is that building a conda environment with
`requirements.txt` will reproduce
the results in this repo.
`requirements.txt` will reproduce the results in this repo.
To install the current version of the requirements run
```
pip install -r https://raw.githubusercontent.com/intro-stat-learning/ISLP_labs/v2/requirements.txt;
pip install -r https://raw.githubusercontent.com/intro-stat-learning/ISLP_labs/v2.1/requirements.txt;
```
The labs can now be run from this directory:
```
cd notebook;
jupyter lab Ch02-statlearning-lab.ipynb
```

View File

@@ -1,17 +1,16 @@
numpy>=1.7.1
scipy>=0.9
pandas>=0.20
pandas<=1.9
lxml # pandas needs this for html
scikit-learn>=1.2
joblib
statsmodels>=0.13
lifelines
pygam # for GAM in Ch7
l0bnb==1.0.0 # for bestsubsets
ISLP
torch
pytorch_lightning
torchmetrics
torchvision
torchinfo
numpy==1.24.2
scipy==1.11.1
pandas==1.5.3
lxml==4.9.3
scikit-learn==1.3.0
joblib==1.3.1
statsmodels==0.14.0
lifelines==0.27.7
pygam==0.9.0
l0bnb==1.0.0
torch==2.0.1
torchvision==0.15.2
pytorch-lightning==2.0.6
torchinfo==1.8.0
torchmetrics==1.0.1
ISLP==0.3.19