v2 version of labs

This commit is contained in:
Jonathan Taylor
2023-08-06 10:59:09 -07:00
parent a066a9e696
commit 53fc0b42d2
11 changed files with 4413 additions and 3824 deletions

View File

@@ -5,7 +5,9 @@
"id": "c7f4eb5a",
"metadata": {},
"source": [
"# Chapter 11"
"\n",
"# Chapter 11\n",
"\n"
]
},
{
@@ -32,10 +34,10 @@
"id": "91ac40fd",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:36.910686Z",
"iopub.status.busy": "2023-07-31T02:15:36.910197Z",
"iopub.status.idle": "2023-07-31T02:15:38.043363Z",
"shell.execute_reply": "2023-07-31T02:15:38.043019Z"
"iopub.execute_input": "2023-08-06T17:35:46.303006Z",
"iopub.status.busy": "2023-08-06T17:35:46.302426Z",
"iopub.status.idle": "2023-08-06T17:35:47.388674Z",
"shell.execute_reply": "2023-08-06T17:35:47.388193Z"
}
},
"outputs": [],
@@ -44,7 +46,7 @@
"import numpy as np\n",
"import pandas as pd\n",
"from ISLP.models import ModelSpec as MS\n",
"from ISLP import load_data"
"from ISLP import load_data\n"
]
},
{
@@ -62,10 +64,10 @@
"id": "99782418",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.045310Z",
"iopub.status.busy": "2023-07-31T02:15:38.045152Z",
"iopub.status.idle": "2023-07-31T02:15:38.137179Z",
"shell.execute_reply": "2023-07-31T02:15:38.136883Z"
"iopub.execute_input": "2023-08-06T17:35:47.390741Z",
"iopub.status.busy": "2023-08-06T17:35:47.390555Z",
"iopub.status.idle": "2023-08-06T17:35:47.484777Z",
"shell.execute_reply": "2023-08-06T17:35:47.484422Z"
}
},
"outputs": [],
@@ -76,7 +78,7 @@
"from lifelines.statistics import \\\n",
" (logrank_test,\n",
" multivariate_logrank_test)\n",
"from ISLP.survival import sim_time"
"from ISLP.survival import sim_time\n"
]
},
{
@@ -95,10 +97,10 @@
"id": "3137149a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.138950Z",
"iopub.status.busy": "2023-07-31T02:15:38.138844Z",
"iopub.status.idle": "2023-07-31T02:15:38.145075Z",
"shell.execute_reply": "2023-07-31T02:15:38.144817Z"
"iopub.execute_input": "2023-08-06T17:35:47.486501Z",
"iopub.status.busy": "2023-08-06T17:35:47.486385Z",
"iopub.status.idle": "2023-08-06T17:35:47.493362Z",
"shell.execute_reply": "2023-08-06T17:35:47.493058Z"
}
},
"outputs": [
@@ -115,7 +117,7 @@
],
"source": [
"BrainCancer = load_data('BrainCancer')\n",
"BrainCancer.columns"
"BrainCancer.columns\n"
]
},
{
@@ -133,11 +135,12 @@
"id": "45963c92",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.146568Z",
"iopub.status.busy": "2023-07-31T02:15:38.146458Z",
"iopub.status.idle": "2023-07-31T02:15:38.149482Z",
"shell.execute_reply": "2023-07-31T02:15:38.149235Z"
}
"iopub.execute_input": "2023-08-06T17:35:47.494963Z",
"iopub.status.busy": "2023-08-06T17:35:47.494863Z",
"iopub.status.idle": "2023-08-06T17:35:47.497996Z",
"shell.execute_reply": "2023-08-06T17:35:47.497741Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -154,7 +157,7 @@
}
],
"source": [
"BrainCancer['sex'].value_counts()"
"BrainCancer['sex'].value_counts()\n"
]
},
{
@@ -163,11 +166,12 @@
"id": "73be61f6",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.151583Z",
"iopub.status.busy": "2023-07-31T02:15:38.151488Z",
"iopub.status.idle": "2023-07-31T02:15:38.153999Z",
"shell.execute_reply": "2023-07-31T02:15:38.153712Z"
}
"iopub.execute_input": "2023-08-06T17:35:47.499414Z",
"iopub.status.busy": "2023-08-06T17:35:47.499312Z",
"iopub.status.idle": "2023-08-06T17:35:47.502029Z",
"shell.execute_reply": "2023-08-06T17:35:47.501779Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -186,7 +190,7 @@
}
],
"source": [
"BrainCancer['diagnosis'].value_counts()"
"BrainCancer['diagnosis'].value_counts()\n"
]
},
{
@@ -195,11 +199,12 @@
"id": "572f0b9e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.155561Z",
"iopub.status.busy": "2023-07-31T02:15:38.155460Z",
"iopub.status.idle": "2023-07-31T02:15:38.158411Z",
"shell.execute_reply": "2023-07-31T02:15:38.158146Z"
}
"iopub.execute_input": "2023-08-06T17:35:47.503331Z",
"iopub.status.busy": "2023-08-06T17:35:47.503251Z",
"iopub.status.idle": "2023-08-06T17:35:47.506059Z",
"shell.execute_reply": "2023-08-06T17:35:47.505826Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -216,7 +221,7 @@
}
],
"source": [
"BrainCancer['status'].value_counts()"
"BrainCancer['status'].value_counts()\n"
]
},
{
@@ -250,10 +255,10 @@
"id": "92c39707",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.159867Z",
"iopub.status.busy": "2023-07-31T02:15:38.159768Z",
"iopub.status.idle": "2023-07-31T02:15:38.465840Z",
"shell.execute_reply": "2023-07-31T02:15:38.465244Z"
"iopub.execute_input": "2023-08-06T17:35:47.507415Z",
"iopub.status.busy": "2023-08-06T17:35:47.507329Z",
"iopub.status.idle": "2023-08-06T17:35:47.692016Z",
"shell.execute_reply": "2023-08-06T17:35:47.690384Z"
}
},
"outputs": [
@@ -282,7 +287,7 @@
"fig, ax = subplots(figsize=(8,8))\n",
"km = KaplanMeierFitter()\n",
"km_brain = km.fit(BrainCancer['time'], BrainCancer['status'])\n",
"km_brain.plot(label='Kaplan Meier estimate', ax=ax)"
"km_brain.plot(label='Kaplan Meier estimate', ax=ax)\n"
]
},
{
@@ -316,10 +321,10 @@
"id": "3fc7848c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.468449Z",
"iopub.status.busy": "2023-07-31T02:15:38.468262Z",
"iopub.status.idle": "2023-07-31T02:15:38.604720Z",
"shell.execute_reply": "2023-07-31T02:15:38.604313Z"
"iopub.execute_input": "2023-08-06T17:35:47.696231Z",
"iopub.status.busy": "2023-08-06T17:35:47.695950Z",
"iopub.status.idle": "2023-08-06T17:35:47.857113Z",
"shell.execute_reply": "2023-08-06T17:35:47.856731Z"
}
},
"outputs": [
@@ -340,7 +345,7 @@
"for sex, df in BrainCancer.groupby('sex'):\n",
" by_sex[sex] = df\n",
" km_sex = km.fit(df['time'], df['status'])\n",
" km_sex.plot(label='Sex=%s' % sex, ax=ax)"
" km_sex.plot(label='Sex=%s' % sex, ax=ax)\n"
]
},
{
@@ -361,11 +366,12 @@
"id": "bf30d26f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.606730Z",
"iopub.status.busy": "2023-07-31T02:15:38.606598Z",
"iopub.status.idle": "2023-07-31T02:15:38.666348Z",
"shell.execute_reply": "2023-07-31T02:15:38.665926Z"
}
"iopub.execute_input": "2023-08-06T17:35:47.858891Z",
"iopub.status.busy": "2023-08-06T17:35:47.858766Z",
"iopub.status.idle": "2023-08-06T17:35:47.913319Z",
"shell.execute_reply": "2023-08-06T17:35:47.913028Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -451,7 +457,7 @@
"logrank_test(by_sex['Male']['time'],\n",
" by_sex['Female']['time'],\n",
" by_sex['Male']['status'],\n",
" by_sex['Female']['status'])"
" by_sex['Female']['status'])\n"
]
},
{
@@ -473,10 +479,10 @@
"id": "2ab78e07",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.668086Z",
"iopub.status.busy": "2023-07-31T02:15:38.667893Z",
"iopub.status.idle": "2023-07-31T02:15:38.695653Z",
"shell.execute_reply": "2023-07-31T02:15:38.695352Z"
"iopub.execute_input": "2023-08-06T17:35:47.914969Z",
"iopub.status.busy": "2023-08-06T17:35:47.914826Z",
"iopub.status.idle": "2023-08-06T17:35:47.941528Z",
"shell.execute_reply": "2023-08-06T17:35:47.941277Z"
}
},
"outputs": [
@@ -542,7 +548,7 @@
"cox_fit = coxph().fit(model_df,\n",
" 'time',\n",
" 'status')\n",
"cox_fit.summary[['coef', 'se(coef)', 'p']]"
"cox_fit.summary[['coef', 'se(coef)', 'p']]\n"
]
},
{
@@ -566,10 +572,10 @@
"id": "4716b7b0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.697533Z",
"iopub.status.busy": "2023-07-31T02:15:38.697411Z",
"iopub.status.idle": "2023-07-31T02:15:38.703099Z",
"shell.execute_reply": "2023-07-31T02:15:38.702785Z"
"iopub.execute_input": "2023-08-06T17:35:47.943061Z",
"iopub.status.busy": "2023-08-06T17:35:47.942963Z",
"iopub.status.idle": "2023-08-06T17:35:47.948065Z",
"shell.execute_reply": "2023-08-06T17:35:47.947785Z"
}
},
"outputs": [
@@ -648,7 +654,7 @@
}
],
"source": [
"cox_fit.log_likelihood_ratio_test()"
"cox_fit.log_likelihood_ratio_test()\n"
]
},
{
@@ -672,10 +678,10 @@
"id": "c2767d88",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.704759Z",
"iopub.status.busy": "2023-07-31T02:15:38.704654Z",
"iopub.status.idle": "2023-07-31T02:15:38.742754Z",
"shell.execute_reply": "2023-07-31T02:15:38.742458Z"
"iopub.execute_input": "2023-08-06T17:35:47.949725Z",
"iopub.status.busy": "2023-08-06T17:35:47.949641Z",
"iopub.status.idle": "2023-08-06T17:35:47.982167Z",
"shell.execute_reply": "2023-08-06T17:35:47.981821Z"
}
},
"outputs": [
@@ -789,7 +795,7 @@
"fit_all = coxph().fit(all_df,\n",
" 'time',\n",
" 'status')\n",
"fit_all.summary[['coef', 'se(coef)', 'p']]"
"fit_all.summary[['coef', 'se(coef)', 'p']]\n"
]
},
{
@@ -820,10 +826,10 @@
"id": "ede1d219",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.744507Z",
"iopub.status.busy": "2023-07-31T02:15:38.744382Z",
"iopub.status.idle": "2023-07-31T02:15:38.748245Z",
"shell.execute_reply": "2023-07-31T02:15:38.747960Z"
"iopub.execute_input": "2023-08-06T17:35:47.983958Z",
"iopub.status.busy": "2023-08-06T17:35:47.983832Z",
"iopub.status.idle": "2023-08-06T17:35:47.989895Z",
"shell.execute_reply": "2023-08-06T17:35:47.989591Z"
}
},
"outputs": [],
@@ -834,7 +840,7 @@
" return pd.Series.mode(series)\n",
" else:\n",
" return series.mean()\n",
"modal_data = cleaned.apply(representative, axis=0)"
"modal_data = cleaned.apply(representative, axis=0)\n"
]
},
{
@@ -853,10 +859,10 @@
"id": "dc032a71",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.749853Z",
"iopub.status.busy": "2023-07-31T02:15:38.749753Z",
"iopub.status.idle": "2023-07-31T02:15:38.755760Z",
"shell.execute_reply": "2023-07-31T02:15:38.755479Z"
"iopub.execute_input": "2023-08-06T17:35:47.991841Z",
"iopub.status.busy": "2023-08-06T17:35:47.991705Z",
"iopub.status.idle": "2023-08-06T17:35:47.997910Z",
"shell.execute_reply": "2023-08-06T17:35:47.997622Z"
}
},
"outputs": [
@@ -963,7 +969,7 @@
"modal_df = pd.DataFrame(\n",
" [modal_data.iloc[0] for _ in range(len(levels))])\n",
"modal_df['diagnosis'] = levels\n",
"modal_df"
"modal_df\n"
]
},
{
@@ -981,10 +987,10 @@
"id": "e7c1fe43",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.757256Z",
"iopub.status.busy": "2023-07-31T02:15:38.757143Z",
"iopub.status.idle": "2023-07-31T02:15:38.764838Z",
"shell.execute_reply": "2023-07-31T02:15:38.764420Z"
"iopub.execute_input": "2023-08-06T17:35:47.999542Z",
"iopub.status.busy": "2023-08-06T17:35:47.999430Z",
"iopub.status.idle": "2023-08-06T17:35:48.007263Z",
"shell.execute_reply": "2023-08-06T17:35:48.006958Z"
}
},
"outputs": [
@@ -1106,7 +1112,7 @@
"source": [
"modal_X = all_MS.transform(modal_df)\n",
"modal_X.index = levels\n",
"modal_X"
"modal_X\n"
]
},
{
@@ -1123,11 +1129,12 @@
"id": "f89fbed7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.766834Z",
"iopub.status.busy": "2023-07-31T02:15:38.766698Z",
"iopub.status.idle": "2023-07-31T02:15:38.774286Z",
"shell.execute_reply": "2023-07-31T02:15:38.774004Z"
}
"iopub.execute_input": "2023-08-06T17:35:48.008740Z",
"iopub.status.busy": "2023-08-06T17:35:48.008640Z",
"iopub.status.idle": "2023-08-06T17:35:48.015006Z",
"shell.execute_reply": "2023-08-06T17:35:48.014745Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1264,7 +1271,7 @@
],
"source": [
"predicted_survival = fit_all.predict_survival_function(modal_X)\n",
"predicted_survival"
"predicted_survival\n"
]
},
{
@@ -1283,11 +1290,12 @@
"id": "8f0329b4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.776165Z",
"iopub.status.busy": "2023-07-31T02:15:38.776021Z",
"iopub.status.idle": "2023-07-31T02:15:38.882750Z",
"shell.execute_reply": "2023-07-31T02:15:38.882417Z"
}
"iopub.execute_input": "2023-08-06T17:35:48.016512Z",
"iopub.status.busy": "2023-08-06T17:35:48.016391Z",
"iopub.status.idle": "2023-08-06T17:35:48.128436Z",
"shell.execute_reply": "2023-08-06T17:35:48.127998Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1303,7 +1311,7 @@
],
"source": [
"fig, ax = subplots(figsize=(8, 8))\n",
"predicted_survival.plot(ax=ax);"
"predicted_survival.plot(ax=ax);\n"
]
},
{
@@ -1325,10 +1333,10 @@
"id": "3045bfc0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:38.884377Z",
"iopub.status.busy": "2023-07-31T02:15:38.884272Z",
"iopub.status.idle": "2023-07-31T02:15:38.999104Z",
"shell.execute_reply": "2023-07-31T02:15:38.998778Z"
"iopub.execute_input": "2023-08-06T17:35:48.130335Z",
"iopub.status.busy": "2023-08-06T17:35:48.130198Z",
"iopub.status.idle": "2023-08-06T17:35:48.248098Z",
"shell.execute_reply": "2023-08-06T17:35:48.247765Z"
}
},
"outputs": [
@@ -1350,7 +1358,7 @@
"for result, df in Publication.groupby('posres'):\n",
" by_result[result] = df\n",
" km_result = km.fit(df['time'], df['status'])\n",
" km_result.plot(label='Result=%d' % result, ax=ax)"
" km_result.plot(label='Result=%d' % result, ax=ax)\n"
]
},
{
@@ -1370,11 +1378,12 @@
"id": "d070f716",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.000963Z",
"iopub.status.busy": "2023-07-31T02:15:39.000831Z",
"iopub.status.idle": "2023-07-31T02:15:39.031731Z",
"shell.execute_reply": "2023-07-31T02:15:39.031439Z"
}
"iopub.execute_input": "2023-08-06T17:35:48.249785Z",
"iopub.status.busy": "2023-08-06T17:35:48.249668Z",
"iopub.status.idle": "2023-08-06T17:35:48.282954Z",
"shell.execute_reply": "2023-08-06T17:35:48.282630Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1439,7 +1448,7 @@
"posres_fit = coxph().fit(posres_df,\n",
" 'time',\n",
" 'status')\n",
"posres_fit.summary[['coef', 'se(coef)', 'p']]"
"posres_fit.summary[['coef', 'se(coef)', 'p']]\n"
]
},
{
@@ -1458,10 +1467,10 @@
"id": "2bbcdd0c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.033405Z",
"iopub.status.busy": "2023-07-31T02:15:39.033297Z",
"iopub.status.idle": "2023-07-31T02:15:39.071871Z",
"shell.execute_reply": "2023-07-31T02:15:39.071602Z"
"iopub.execute_input": "2023-08-06T17:35:48.284714Z",
"iopub.status.busy": "2023-08-06T17:35:48.284593Z",
"iopub.status.idle": "2023-08-06T17:35:48.323890Z",
"shell.execute_reply": "2023-08-06T17:35:48.323545Z"
}
},
"outputs": [
@@ -1559,7 +1568,7 @@
" intercept=False)\n",
"coxph().fit(model.fit_transform(Publication),\n",
" 'time',\n",
" 'status').summary[['coef', 'se(coef)', 'p']]"
" 'status').summary[['coef', 'se(coef)', 'p']]\n"
]
},
{
@@ -1593,7 +1602,7 @@
"`Time` of day (Morning, Afternoon, or Evening). We generate data\n",
"for these covariates so that all possibilities are equally likely: for\n",
"instance, morning, afternoon and evening calls are equally likely, and\n",
"any number of operators from $5$ to $15$ is equally likely."
"any number of operators from $5$ to $15$ is equally likely. "
]
},
{
@@ -1602,10 +1611,10 @@
"id": "b8ece43a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.073478Z",
"iopub.status.busy": "2023-07-31T02:15:39.073379Z",
"iopub.status.idle": "2023-07-31T02:15:39.076907Z",
"shell.execute_reply": "2023-07-31T02:15:39.076640Z"
"iopub.execute_input": "2023-08-06T17:35:48.325634Z",
"iopub.status.busy": "2023-08-06T17:35:48.325517Z",
"iopub.status.idle": "2023-08-06T17:35:48.329272Z",
"shell.execute_reply": "2023-08-06T17:35:48.328978Z"
}
},
"outputs": [],
@@ -1640,10 +1649,10 @@
"id": "3e4f766f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.078347Z",
"iopub.status.busy": "2023-07-31T02:15:39.078266Z",
"iopub.status.idle": "2023-07-31T02:15:39.085129Z",
"shell.execute_reply": "2023-07-31T02:15:39.084865Z"
"iopub.execute_input": "2023-08-06T17:35:48.330871Z",
"iopub.status.busy": "2023-08-06T17:35:48.330782Z",
"iopub.status.idle": "2023-08-06T17:35:48.337958Z",
"shell.execute_reply": "2023-08-06T17:35:48.337672Z"
}
},
"outputs": [],
@@ -1672,10 +1681,10 @@
"id": "72f42d14",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.086703Z",
"iopub.status.busy": "2023-07-31T02:15:39.086584Z",
"iopub.status.idle": "2023-07-31T02:15:39.090844Z",
"shell.execute_reply": "2023-07-31T02:15:39.090578Z"
"iopub.execute_input": "2023-08-06T17:35:48.339669Z",
"iopub.status.busy": "2023-08-06T17:35:48.339578Z",
"iopub.status.idle": "2023-08-06T17:35:48.343948Z",
"shell.execute_reply": "2023-08-06T17:35:48.343688Z"
}
},
"outputs": [
@@ -1767,7 +1776,7 @@
}
],
"source": [
"X[:5]"
"X[:5]\n"
]
},
{
@@ -1784,17 +1793,17 @@
"id": "8b921536",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.092615Z",
"iopub.status.busy": "2023-07-31T02:15:39.092336Z",
"iopub.status.idle": "2023-07-31T02:15:39.103571Z",
"shell.execute_reply": "2023-07-31T02:15:39.101990Z"
"iopub.execute_input": "2023-08-06T17:35:48.345389Z",
"iopub.status.busy": "2023-08-06T17:35:48.345291Z",
"iopub.status.idle": "2023-08-06T17:35:48.350596Z",
"shell.execute_reply": "2023-08-06T17:35:48.349892Z"
}
},
"outputs": [],
"source": [
"true_beta = np.array([0.04, -0.3, 0, 0.2, -0.2])\n",
"true_linpred = X.dot(true_beta)\n",
"hazard = lambda t: 1e-5 * t"
"hazard = lambda t: 1e-5 * t\n"
]
},
{
@@ -1831,15 +1840,16 @@
"id": "96ce0f99",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.107799Z",
"iopub.status.busy": "2023-07-31T02:15:39.107513Z",
"iopub.status.idle": "2023-07-31T02:15:39.111944Z",
"shell.execute_reply": "2023-07-31T02:15:39.110837Z"
}
"iopub.execute_input": "2023-08-06T17:35:48.356110Z",
"iopub.status.busy": "2023-08-06T17:35:48.355787Z",
"iopub.status.idle": "2023-08-06T17:35:48.360120Z",
"shell.execute_reply": "2023-08-06T17:35:48.358812Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
"cum_hazard = lambda t: 1e-5 * t**2 / 2"
"cum_hazard = lambda t: 1e-5 * t**2 / 2\n"
]
},
{
@@ -1861,17 +1871,17 @@
"id": "63d78ff9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.116637Z",
"iopub.status.busy": "2023-07-31T02:15:39.116086Z",
"iopub.status.idle": "2023-07-31T02:15:39.297425Z",
"shell.execute_reply": "2023-07-31T02:15:39.293868Z"
"iopub.execute_input": "2023-08-06T17:35:48.363547Z",
"iopub.status.busy": "2023-08-06T17:35:48.363232Z",
"iopub.status.idle": "2023-08-06T17:35:48.547724Z",
"shell.execute_reply": "2023-08-06T17:35:48.547018Z"
}
},
"outputs": [],
"source": [
"W = np.array([sim_time(l, cum_hazard, rng)\n",
" for l in true_linpred])\n",
"D['Wait time'] = np.clip(W, 0, 1000)"
"D['Wait time'] = np.clip(W, 0, 1000)\n"
]
},
{
@@ -1890,11 +1900,12 @@
"id": "fe008dbf",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.312187Z",
"iopub.status.busy": "2023-07-31T02:15:39.311513Z",
"iopub.status.idle": "2023-07-31T02:15:39.327004Z",
"shell.execute_reply": "2023-07-31T02:15:39.326346Z"
}
"iopub.execute_input": "2023-08-06T17:35:48.552971Z",
"iopub.status.busy": "2023-08-06T17:35:48.552635Z",
"iopub.status.idle": "2023-08-06T17:35:48.584979Z",
"shell.execute_reply": "2023-08-06T17:35:48.563731Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1988,7 +1999,7 @@
"D['Failed'] = rng.choice([1, 0],\n",
" N,\n",
" p=[0.9, 0.1])\n",
"D[:5]"
"D[:5]\n"
]
},
{
@@ -1997,10 +2008,10 @@
"id": "c3a2bec7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.330721Z",
"iopub.status.busy": "2023-07-31T02:15:39.330474Z",
"iopub.status.idle": "2023-07-31T02:15:39.363217Z",
"shell.execute_reply": "2023-07-31T02:15:39.362899Z"
"iopub.execute_input": "2023-08-06T17:35:48.615161Z",
"iopub.status.busy": "2023-08-06T17:35:48.614999Z",
"iopub.status.idle": "2023-08-06T17:35:48.618097Z",
"shell.execute_reply": "2023-08-06T17:35:48.617615Z"
}
},
"outputs": [
@@ -2016,7 +2027,7 @@
}
],
"source": [
"D['Failed'].mean()"
"D['Failed'].mean()\n"
]
},
{
@@ -2033,10 +2044,10 @@
"id": "2b27af56",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.364912Z",
"iopub.status.busy": "2023-07-31T02:15:39.364808Z",
"iopub.status.idle": "2023-07-31T02:15:39.567228Z",
"shell.execute_reply": "2023-07-31T02:15:39.566838Z"
"iopub.execute_input": "2023-08-06T17:35:48.620507Z",
"iopub.status.busy": "2023-08-06T17:35:48.620371Z",
"iopub.status.idle": "2023-08-06T17:35:48.790525Z",
"shell.execute_reply": "2023-08-06T17:35:48.790116Z"
}
},
"outputs": [
@@ -2068,7 +2079,7 @@
" by_center[center] = df\n",
" km_center = km.fit(df['Wait time'], df['Failed'])\n",
" km_center.plot(label='Center=%s' % center, ax=ax)\n",
"ax.set_title(\"Probability of Still Being on Hold\")"
"ax.set_title(\"Probability of Still Being on Hold\")\n"
]
},
{
@@ -2085,10 +2096,10 @@
"id": "9625598d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.569040Z",
"iopub.status.busy": "2023-07-31T02:15:39.568920Z",
"iopub.status.idle": "2023-07-31T02:15:39.823091Z",
"shell.execute_reply": "2023-07-31T02:15:39.822627Z"
"iopub.execute_input": "2023-08-06T17:35:48.792256Z",
"iopub.status.busy": "2023-08-06T17:35:48.792159Z",
"iopub.status.idle": "2023-08-06T17:35:49.004599Z",
"shell.execute_reply": "2023-08-06T17:35:49.004246Z"
}
},
"outputs": [
@@ -2120,7 +2131,7 @@
" by_time[time] = df\n",
" km_time = km.fit(df['Wait time'], df['Failed'])\n",
" km_time.plot(label='Time=%s' % time, ax=ax)\n",
"ax.set_title(\"Probability of Still Being on Hold\")"
"ax.set_title(\"Probability of Still Being on Hold\")\n"
]
},
{
@@ -2141,11 +2152,12 @@
"id": "75a744ef",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.825758Z",
"iopub.status.busy": "2023-07-31T02:15:39.825581Z",
"iopub.status.idle": "2023-07-31T02:15:39.847974Z",
"shell.execute_reply": "2023-07-31T02:15:39.847624Z"
}
"iopub.execute_input": "2023-08-06T17:35:49.006368Z",
"iopub.status.busy": "2023-08-06T17:35:49.006251Z",
"iopub.status.idle": "2023-08-06T17:35:49.026122Z",
"shell.execute_reply": "2023-08-06T17:35:49.025786Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -2230,7 +2242,7 @@
"source": [
"multivariate_logrank_test(D['Wait time'],\n",
" D['Center'],\n",
" D['Failed'])"
" D['Failed'])\n"
]
},
{
@@ -2247,11 +2259,12 @@
"id": "9badb3e3",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.850725Z",
"iopub.status.busy": "2023-07-31T02:15:39.850538Z",
"iopub.status.idle": "2023-07-31T02:15:39.873327Z",
"shell.execute_reply": "2023-07-31T02:15:39.873038Z"
}
"iopub.execute_input": "2023-08-06T17:35:49.027909Z",
"iopub.status.busy": "2023-08-06T17:35:49.027782Z",
"iopub.status.idle": "2023-08-06T17:35:49.046955Z",
"shell.execute_reply": "2023-08-06T17:35:49.046606Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -2336,7 +2349,7 @@
"source": [
"multivariate_logrank_test(D['Wait time'],\n",
" D['Time'],\n",
" D['Failed'])"
" D['Failed'])\n"
]
},
{
@@ -2356,11 +2369,12 @@
"id": "026e9ff8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:39.875394Z",
"iopub.status.busy": "2023-07-31T02:15:39.875265Z",
"iopub.status.idle": "2023-07-31T02:15:40.004667Z",
"shell.execute_reply": "2023-07-31T02:15:40.004293Z"
}
"iopub.execute_input": "2023-08-06T17:35:49.048485Z",
"iopub.status.busy": "2023-08-06T17:35:49.048378Z",
"iopub.status.idle": "2023-08-06T17:35:49.175538Z",
"shell.execute_reply": "2023-08-06T17:35:49.175268Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -2443,7 +2457,7 @@
" 'Center'],\n",
" intercept=False).fit_transform(D)\n",
"F = coxph().fit(X, 'Wait time', 'Failed')\n",
"F.log_likelihood_ratio_test()"
"F.log_likelihood_ratio_test()\n"
]
},
{
@@ -2460,11 +2474,12 @@
"id": "7cab3789",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:40.006886Z",
"iopub.status.busy": "2023-07-31T02:15:40.006736Z",
"iopub.status.idle": "2023-07-31T02:15:40.134843Z",
"shell.execute_reply": "2023-07-31T02:15:40.134522Z"
}
"iopub.execute_input": "2023-08-06T17:35:49.177180Z",
"iopub.status.busy": "2023-08-06T17:35:49.177056Z",
"iopub.status.idle": "2023-08-06T17:35:49.306158Z",
"shell.execute_reply": "2023-08-06T17:35:49.305806Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -2547,7 +2562,7 @@
" 'Time'],\n",
" intercept=False).fit_transform(D)\n",
"F = coxph().fit(X, 'Wait time', 'Failed')\n",
"F.log_likelihood_ratio_test()"
"F.log_likelihood_ratio_test()\n"
]
},
{
@@ -2567,11 +2582,12 @@
"id": "5cc4b898",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:40.136769Z",
"iopub.status.busy": "2023-07-31T02:15:40.136659Z",
"iopub.status.idle": "2023-07-31T02:15:40.462485Z",
"shell.execute_reply": "2023-07-31T02:15:40.457034Z"
}
"iopub.execute_input": "2023-08-06T17:35:49.308130Z",
"iopub.status.busy": "2023-08-06T17:35:49.307985Z",
"iopub.status.idle": "2023-08-06T17:35:49.596821Z",
"shell.execute_reply": "2023-08-06T17:35:49.585317Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -2663,7 +2679,7 @@
" X,\n",
" 'Wait time',\n",
" 'Failed')\n",
"fit_queuing.summary[['coef', 'se(coef)', 'p']]"
"fit_queuing.summary[['coef', 'se(coef)', 'p']]\n"
]
},
{
@@ -2679,15 +2695,16 @@
" `Operators`, `Center = B`, `Center = C`, \n",
"`Time = Even.` and `Time = Morn.` are $0.04$, $-0.3$,\n",
"$0$, $0.2$, and $-0.2$, respectively. The coefficient estimates\n",
"from the fitted Cox model are fairly accurate."
"from the fitted Cox model are fairly accurate.\n",
"\n"
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"formats": "ipynb,md:myst",
"main_language": "python"
"main_language": "python",
"notebook_metadata_filter": "-all"
},
"language_info": {
"codemirror_mode": {

File diff suppressed because it is too large Load Diff

View File

@@ -5,9 +5,11 @@
"id": "75b2d75c",
"metadata": {},
"source": [
"\n",
"# Chapter 13\n",
"\n",
"# Lab: Multiple Testing"
"# Lab: Multiple Testing\n",
" "
]
},
{
@@ -24,10 +26,10 @@
"id": "1f928b2d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:50.715151Z",
"iopub.status.busy": "2023-07-31T02:15:50.714766Z",
"iopub.status.idle": "2023-07-31T02:15:51.778071Z",
"shell.execute_reply": "2023-07-31T02:15:51.777549Z"
"iopub.execute_input": "2023-08-06T17:36:00.152825Z",
"iopub.status.busy": "2023-08-06T17:36:00.151975Z",
"iopub.status.idle": "2023-08-06T17:36:01.254245Z",
"shell.execute_reply": "2023-08-06T17:36:01.253710Z"
}
},
"outputs": [],
@@ -36,7 +38,7 @@
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import statsmodels.api as sm\n",
"from ISLP import load_data"
"from ISLP import load_data\n"
]
},
{
@@ -54,11 +56,12 @@
"id": "eb4b32aa",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:51.780386Z",
"iopub.status.busy": "2023-07-31T02:15:51.780091Z",
"iopub.status.idle": "2023-07-31T02:15:51.782579Z",
"shell.execute_reply": "2023-07-31T02:15:51.782295Z"
}
"iopub.execute_input": "2023-08-06T17:36:01.256495Z",
"iopub.status.busy": "2023-08-06T17:36:01.256216Z",
"iopub.status.idle": "2023-08-06T17:36:01.258709Z",
"shell.execute_reply": "2023-08-06T17:36:01.258467Z"
},
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
@@ -70,7 +73,7 @@
"from statsmodels.stats.multicomp import \\\n",
" pairwise_tukeyhsd\n",
"from statsmodels.stats.multitest import \\\n",
" multipletests as mult_test"
" multipletests as mult_test\n"
]
},
{
@@ -92,10 +95,10 @@
"id": "e12ac0cd",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:51.784074Z",
"iopub.status.busy": "2023-07-31T02:15:51.783969Z",
"iopub.status.idle": "2023-07-31T02:15:51.786285Z",
"shell.execute_reply": "2023-07-31T02:15:51.786035Z"
"iopub.execute_input": "2023-08-06T17:36:01.260260Z",
"iopub.status.busy": "2023-08-06T17:36:01.260153Z",
"iopub.status.idle": "2023-08-06T17:36:01.262414Z",
"shell.execute_reply": "2023-08-06T17:36:01.262174Z"
}
},
"outputs": [],
@@ -103,7 +106,7 @@
"rng = np.random.default_rng(12)\n",
"X = rng.standard_normal((10, 100))\n",
"true_mean = np.array([0.5]*50 + [0]*50)\n",
"X += true_mean[None,:]"
"X += true_mean[None,:]\n"
]
},
{
@@ -122,10 +125,10 @@
"id": "04d0f49e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:51.787724Z",
"iopub.status.busy": "2023-07-31T02:15:51.787627Z",
"iopub.status.idle": "2023-07-31T02:15:51.791755Z",
"shell.execute_reply": "2023-07-31T02:15:51.791495Z"
"iopub.execute_input": "2023-08-06T17:36:01.263887Z",
"iopub.status.busy": "2023-08-06T17:36:01.263792Z",
"iopub.status.idle": "2023-08-06T17:36:01.267718Z",
"shell.execute_reply": "2023-08-06T17:36:01.267462Z"
}
},
"outputs": [
@@ -142,7 +145,7 @@
],
"source": [
"result = ttest_1samp(X[:,0], 0)\n",
"result.pvalue"
"result.pvalue\n"
]
},
{
@@ -169,11 +172,12 @@
"id": "d1f0c695",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:51.793305Z",
"iopub.status.busy": "2023-07-31T02:15:51.793226Z",
"iopub.status.idle": "2023-07-31T02:15:51.815977Z",
"shell.execute_reply": "2023-07-31T02:15:51.815697Z"
}
"iopub.execute_input": "2023-08-06T17:36:01.269204Z",
"iopub.status.busy": "2023-08-06T17:36:01.269116Z",
"iopub.status.idle": "2023-08-06T17:36:01.292380Z",
"shell.execute_reply": "2023-08-06T17:36:01.292124Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
@@ -186,7 +190,7 @@
" 'Do not reject H0'])\n",
"truth = pd.Categorical(true_mean == 0,\n",
" categories=[True, False],\n",
" ordered=True)"
" ordered=True)\n"
]
},
{
@@ -204,11 +208,12 @@
"id": "7a9594a0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:51.817571Z",
"iopub.status.busy": "2023-07-31T02:15:51.817485Z",
"iopub.status.idle": "2023-07-31T02:15:51.826998Z",
"shell.execute_reply": "2023-07-31T02:15:51.826728Z"
}
"iopub.execute_input": "2023-08-06T17:36:01.293910Z",
"iopub.status.busy": "2023-08-06T17:36:01.293823Z",
"iopub.status.idle": "2023-08-06T17:36:01.302891Z",
"shell.execute_reply": "2023-08-06T17:36:01.302612Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -272,7 +277,7 @@
"pd.crosstab(decision,\n",
" truth,\n",
" rownames=['Decision'],\n",
" colnames=['H0'])"
" colnames=['H0'])\n"
]
},
{
@@ -294,7 +299,7 @@
"amounts to quite a weak signal, and it resulted in a high number of\n",
"Type II errors. Lets instead simulate data with a stronger signal,\n",
"so that the ratio of the mean to the standard deviation for the false\n",
"null hypotheses equals $1$. We make only 10 Type II errors."
"null hypotheses equals $1$. We make only 10 Type II errors.\n"
]
},
{
@@ -303,11 +308,12 @@
"id": "25f7fc5d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:51.828557Z",
"iopub.status.busy": "2023-07-31T02:15:51.828471Z",
"iopub.status.idle": "2023-07-31T02:15:51.857144Z",
"shell.execute_reply": "2023-07-31T02:15:51.856863Z"
}
"iopub.execute_input": "2023-08-06T17:36:01.304398Z",
"iopub.status.busy": "2023-08-06T17:36:01.304317Z",
"iopub.status.idle": "2023-08-06T17:36:01.331987Z",
"shell.execute_reply": "2023-08-06T17:36:01.331720Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -383,7 +389,15 @@
"pd.crosstab(decision,\n",
" truth,\n",
" rownames=['Decision'],\n",
" colnames=['H0'])"
" colnames=['H0'])\n"
]
},
{
"cell_type": "markdown",
"id": "bb70c597",
"metadata": {},
"source": [
" "
]
},
{
@@ -406,10 +420,10 @@
"id": "369b5bd3",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:51.858641Z",
"iopub.status.busy": "2023-07-31T02:15:51.858551Z",
"iopub.status.idle": "2023-07-31T02:15:52.158944Z",
"shell.execute_reply": "2023-07-31T02:15:52.158640Z"
"iopub.execute_input": "2023-08-06T17:36:01.333446Z",
"iopub.status.busy": "2023-08-06T17:36:01.333362Z",
"iopub.status.idle": "2023-08-06T17:36:01.583878Z",
"shell.execute_reply": "2023-08-06T17:36:01.583084Z"
}
},
"outputs": [
@@ -435,7 +449,7 @@
"ax.set_xlabel('Number of Hypotheses')\n",
"ax.set_ylabel('Family-Wise Error Rate')\n",
"ax.legend()\n",
"ax.axhline(0.05, c='k', ls='--');"
"ax.axhline(0.05, c='k', ls='--');\n"
]
},
{
@@ -462,10 +476,10 @@
"id": "9ce7a19f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:52.160608Z",
"iopub.status.busy": "2023-07-31T02:15:52.160488Z",
"iopub.status.idle": "2023-07-31T02:15:52.198900Z",
"shell.execute_reply": "2023-07-31T02:15:52.198590Z"
"iopub.execute_input": "2023-08-06T17:36:01.592359Z",
"iopub.status.busy": "2023-08-06T17:36:01.591524Z",
"iopub.status.idle": "2023-08-06T17:36:01.636450Z",
"shell.execute_reply": "2023-08-06T17:36:01.636160Z"
}
},
"outputs": [
@@ -486,7 +500,7 @@
"fund_mini_pvals = np.empty(5)\n",
"for i in range(5):\n",
" fund_mini_pvals[i] = ttest_1samp(fund_mini.iloc[:,i], 0).pvalue\n",
"fund_mini_pvals"
"fund_mini_pvals\n"
]
},
{
@@ -530,11 +544,12 @@
"id": "de6cffed",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:52.200735Z",
"iopub.status.busy": "2023-07-31T02:15:52.200617Z",
"iopub.status.idle": "2023-07-31T02:15:52.202992Z",
"shell.execute_reply": "2023-07-31T02:15:52.202738Z"
}
"iopub.execute_input": "2023-08-06T17:36:01.638004Z",
"iopub.status.busy": "2023-08-06T17:36:01.637923Z",
"iopub.status.idle": "2023-08-06T17:36:01.640151Z",
"shell.execute_reply": "2023-08-06T17:36:01.639891Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -550,7 +565,7 @@
],
"source": [
"reject, bonf = mult_test(fund_mini_pvals, method = \"bonferroni\")[:2]\n",
"reject"
"reject\n"
]
},
{
@@ -568,10 +583,10 @@
"id": "0de71500",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:52.204499Z",
"iopub.status.busy": "2023-07-31T02:15:52.204407Z",
"iopub.status.idle": "2023-07-31T02:15:52.206753Z",
"shell.execute_reply": "2023-07-31T02:15:52.206498Z"
"iopub.execute_input": "2023-08-06T17:36:01.641646Z",
"iopub.status.busy": "2023-08-06T17:36:01.641554Z",
"iopub.status.idle": "2023-08-06T17:36:01.643766Z",
"shell.execute_reply": "2023-08-06T17:36:01.643529Z"
}
},
"outputs": [
@@ -588,7 +603,7 @@
}
],
"source": [
"bonf, np.minimum(fund_mini_pvals * 5, 1)"
"bonf, np.minimum(fund_mini_pvals * 5, 1)\n"
]
},
{
@@ -610,11 +625,12 @@
"id": "f7e87bdb",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:52.208134Z",
"iopub.status.busy": "2023-07-31T02:15:52.208036Z",
"iopub.status.idle": "2023-07-31T02:15:52.255948Z",
"shell.execute_reply": "2023-07-31T02:15:52.255602Z"
}
"iopub.execute_input": "2023-08-06T17:36:01.645254Z",
"iopub.status.busy": "2023-08-06T17:36:01.645162Z",
"iopub.status.idle": "2023-08-06T17:36:01.687110Z",
"shell.execute_reply": "2023-08-06T17:36:01.686827Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -630,7 +646,7 @@
}
],
"source": [
"mult_test(fund_mini_pvals, method = \"holm\", alpha=0.05)[:2]"
"mult_test(fund_mini_pvals, method = \"holm\", alpha=0.05)[:2]\n"
]
},
{
@@ -648,11 +664,12 @@
"id": "e88be376",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:52.257656Z",
"iopub.status.busy": "2023-07-31T02:15:52.257540Z",
"iopub.status.idle": "2023-07-31T02:15:52.260759Z",
"shell.execute_reply": "2023-07-31T02:15:52.260443Z"
}
"iopub.execute_input": "2023-08-06T17:36:01.688627Z",
"iopub.status.busy": "2023-08-06T17:36:01.688527Z",
"iopub.status.idle": "2023-08-06T17:36:01.691393Z",
"shell.execute_reply": "2023-08-06T17:36:01.691161Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -672,7 +689,7 @@
}
],
"source": [
"fund_mini.mean()"
"fund_mini.mean()\n"
]
},
{
@@ -691,10 +708,10 @@
"id": "41149af6",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:52.262161Z",
"iopub.status.busy": "2023-07-31T02:15:52.262063Z",
"iopub.status.idle": "2023-07-31T02:15:52.264821Z",
"shell.execute_reply": "2023-07-31T02:15:52.264573Z"
"iopub.execute_input": "2023-08-06T17:36:01.692844Z",
"iopub.status.busy": "2023-08-06T17:36:01.692751Z",
"iopub.status.idle": "2023-08-06T17:36:01.695119Z",
"shell.execute_reply": "2023-08-06T17:36:01.694881Z"
}
},
"outputs": [
@@ -711,7 +728,7 @@
],
"source": [
"ttest_rel(fund_mini['Manager1'],\n",
" fund_mini['Manager2']).pvalue"
" fund_mini['Manager2']).pvalue\n"
]
},
{
@@ -743,11 +760,12 @@
"id": "61aabda7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:52.266290Z",
"iopub.status.busy": "2023-07-31T02:15:52.266197Z",
"iopub.status.idle": "2023-07-31T02:15:52.746435Z",
"shell.execute_reply": "2023-07-31T02:15:52.746118Z"
}
"iopub.execute_input": "2023-08-06T17:36:01.696563Z",
"iopub.status.busy": "2023-08-06T17:36:01.696465Z",
"iopub.status.idle": "2023-08-06T17:36:02.177873Z",
"shell.execute_reply": "2023-08-06T17:36:02.177587Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -776,7 +794,7 @@
"returns = np.hstack([fund_mini.iloc[:,i] for i in range(5)])\n",
"managers = np.hstack([[i+1]*50 for i in range(5)])\n",
"tukey = pairwise_tukeyhsd(returns, managers)\n",
"print(tukey.summary())"
"print(tukey.summary())\n"
]
},
{
@@ -802,10 +820,10 @@
"id": "cbcad4de",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:52.748078Z",
"iopub.status.busy": "2023-07-31T02:15:52.747975Z",
"iopub.status.idle": "2023-07-31T02:15:52.829340Z",
"shell.execute_reply": "2023-07-31T02:15:52.829068Z"
"iopub.execute_input": "2023-08-06T17:36:02.179444Z",
"iopub.status.busy": "2023-08-06T17:36:02.179343Z",
"iopub.status.idle": "2023-08-06T17:36:02.260168Z",
"shell.execute_reply": "2023-08-06T17:36:02.259865Z"
}
},
"outputs": [
@@ -822,7 +840,7 @@
],
"source": [
"fig, ax = plt.subplots(figsize=(8,8))\n",
"tukey.plot_simultaneous(ax=ax);"
"tukey.plot_simultaneous(ax=ax);\n"
]
},
{
@@ -843,17 +861,17 @@
"id": "b5842190",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:52.831064Z",
"iopub.status.busy": "2023-07-31T02:15:52.830953Z",
"iopub.status.idle": "2023-07-31T02:15:53.237615Z",
"shell.execute_reply": "2023-07-31T02:15:53.237218Z"
"iopub.execute_input": "2023-08-06T17:36:02.261922Z",
"iopub.status.busy": "2023-08-06T17:36:02.261817Z",
"iopub.status.idle": "2023-08-06T17:36:02.675908Z",
"shell.execute_reply": "2023-08-06T17:36:02.675505Z"
}
},
"outputs": [],
"source": [
"fund_pvalues = np.empty(2000)\n",
"for i, manager in enumerate(Fund.columns):\n",
" fund_pvalues[i] = ttest_1samp(Fund[manager], 0).pvalue"
" fund_pvalues[i] = ttest_1samp(Fund[manager], 0).pvalue\n"
]
},
{
@@ -872,10 +890,10 @@
"id": "7c9d8bed",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:53.239456Z",
"iopub.status.busy": "2023-07-31T02:15:53.239345Z",
"iopub.status.idle": "2023-07-31T02:15:53.242247Z",
"shell.execute_reply": "2023-07-31T02:15:53.241963Z"
"iopub.execute_input": "2023-08-06T17:36:02.677787Z",
"iopub.status.busy": "2023-08-06T17:36:02.677666Z",
"iopub.status.idle": "2023-08-06T17:36:02.680351Z",
"shell.execute_reply": "2023-08-06T17:36:02.680097Z"
}
},
"outputs": [
@@ -893,7 +911,7 @@
],
"source": [
"fund_qvalues = mult_test(fund_pvalues, method = \"fdr_bh\")[1]\n",
"fund_qvalues[:10]"
"fund_qvalues[:10]\n"
]
},
{
@@ -917,11 +935,12 @@
"id": "bfa39f7c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:53.243699Z",
"iopub.status.busy": "2023-07-31T02:15:53.243594Z",
"iopub.status.idle": "2023-07-31T02:15:53.245817Z",
"shell.execute_reply": "2023-07-31T02:15:53.245569Z"
}
"iopub.execute_input": "2023-08-06T17:36:02.681878Z",
"iopub.status.busy": "2023-08-06T17:36:02.681776Z",
"iopub.status.idle": "2023-08-06T17:36:02.684078Z",
"shell.execute_reply": "2023-08-06T17:36:02.683782Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -936,7 +955,7 @@
}
],
"source": [
"(fund_qvalues <= 0.1).sum()"
"(fund_qvalues <= 0.1).sum()\n"
]
},
{
@@ -960,11 +979,12 @@
"id": "70b69b47",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:53.247278Z",
"iopub.status.busy": "2023-07-31T02:15:53.247171Z",
"iopub.status.idle": "2023-07-31T02:15:53.249292Z",
"shell.execute_reply": "2023-07-31T02:15:53.249050Z"
}
"iopub.execute_input": "2023-08-06T17:36:02.685580Z",
"iopub.status.busy": "2023-08-06T17:36:02.685487Z",
"iopub.status.idle": "2023-08-06T17:36:02.687581Z",
"shell.execute_reply": "2023-08-06T17:36:02.687313Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -979,7 +999,7 @@
}
],
"source": [
"(fund_pvalues <= 0.1 / 2000).sum()"
"(fund_pvalues <= 0.1 / 2000).sum()\n"
]
},
{
@@ -1009,10 +1029,10 @@
"id": "4c0ddea1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:53.250720Z",
"iopub.status.busy": "2023-07-31T02:15:53.250626Z",
"iopub.status.idle": "2023-07-31T02:15:53.253039Z",
"shell.execute_reply": "2023-07-31T02:15:53.252775Z"
"iopub.execute_input": "2023-08-06T17:36:02.689041Z",
"iopub.status.busy": "2023-08-06T17:36:02.688941Z",
"iopub.status.idle": "2023-08-06T17:36:02.691386Z",
"shell.execute_reply": "2023-08-06T17:36:02.691129Z"
}
},
"outputs": [],
@@ -1026,7 +1046,7 @@
" sorted_set_ = np.arange(sorted_set_.max())\n",
"else:\n",
" selected_ = []\n",
" sorted_set_ = []"
" sorted_set_ = []\n"
]
},
{
@@ -1043,11 +1063,12 @@
"id": "0314eac9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:53.254539Z",
"iopub.status.busy": "2023-07-31T02:15:53.254447Z",
"iopub.status.idle": "2023-07-31T02:15:53.487410Z",
"shell.execute_reply": "2023-07-31T02:15:53.487100Z"
}
"iopub.execute_input": "2023-08-06T17:36:02.692825Z",
"iopub.status.busy": "2023-08-06T17:36:02.692729Z",
"iopub.status.idle": "2023-08-06T17:36:02.922587Z",
"shell.execute_reply": "2023-08-06T17:36:02.922278Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1070,7 +1091,7 @@
"ax.set_ylabel('P-Value')\n",
"ax.set_xlabel('Index')\n",
"ax.scatter(sorted_set_+1, sorted_[sorted_set_], c='r', s=20)\n",
"ax.axline((0, 0), (1,q/m), c='k', ls='--', linewidth=3);"
"ax.axline((0, 0), (1,q/m), c='k', ls='--', linewidth=3);\n"
]
},
{
@@ -1092,11 +1113,12 @@
"id": "b59b8137",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:53.489154Z",
"iopub.status.busy": "2023-07-31T02:15:53.489039Z",
"iopub.status.idle": "2023-07-31T02:15:53.563916Z",
"shell.execute_reply": "2023-07-31T02:15:53.563610Z"
}
"iopub.execute_input": "2023-08-06T17:36:02.924316Z",
"iopub.status.busy": "2023-08-06T17:36:02.924196Z",
"iopub.status.idle": "2023-08-06T17:36:02.997644Z",
"shell.execute_reply": "2023-08-06T17:36:02.997332Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1118,7 +1140,7 @@
"Khan = load_data('Khan') \n",
"D = pd.concat([Khan['xtrain'], Khan['xtest']])\n",
"D['Y'] = pd.concat([Khan['ytrain'], Khan['ytest']])\n",
"D['Y'].value_counts()"
"D['Y'].value_counts()\n"
]
},
{
@@ -1142,11 +1164,12 @@
"id": "96fb2f61",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:53.565515Z",
"iopub.status.busy": "2023-07-31T02:15:53.565402Z",
"iopub.status.idle": "2023-07-31T02:15:53.569018Z",
"shell.execute_reply": "2023-07-31T02:15:53.568748Z"
}
"iopub.execute_input": "2023-08-06T17:36:02.999309Z",
"iopub.status.busy": "2023-08-06T17:36:02.999199Z",
"iopub.status.idle": "2023-08-06T17:36:03.003203Z",
"shell.execute_reply": "2023-08-06T17:36:03.002963Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1167,7 +1190,7 @@
"observedT, pvalue = ttest_ind(D2[gene_11],\n",
" D4[gene_11],\n",
" equal_var=True)\n",
"observedT, pvalue"
"observedT, pvalue\n"
]
},
{
@@ -1194,11 +1217,12 @@
"id": "fdc229fa",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:53.570437Z",
"iopub.status.busy": "2023-07-31T02:15:53.570359Z",
"iopub.status.idle": "2023-07-31T02:15:55.953846Z",
"shell.execute_reply": "2023-07-31T02:15:55.953572Z"
}
"iopub.execute_input": "2023-08-06T17:36:03.004671Z",
"iopub.status.busy": "2023-08-06T17:36:03.004588Z",
"iopub.status.idle": "2023-08-06T17:36:05.379699Z",
"shell.execute_reply": "2023-08-06T17:36:05.379380Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1224,7 +1248,7 @@
" D_null[n_:],\n",
" equal_var=True)\n",
" Tnull[b] = ttest_.statistic\n",
"(np.abs(Tnull) > np.abs(observedT)).mean()"
"(np.abs(Tnull) > np.abs(observedT)).mean()\n"
]
},
{
@@ -1244,11 +1268,12 @@
"id": "e3894695",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:55.955682Z",
"iopub.status.busy": "2023-07-31T02:15:55.955570Z",
"iopub.status.idle": "2023-07-31T02:15:56.168609Z",
"shell.execute_reply": "2023-07-31T02:15:56.168280Z"
}
"iopub.execute_input": "2023-08-06T17:36:05.381564Z",
"iopub.status.busy": "2023-08-06T17:36:05.381435Z",
"iopub.status.idle": "2023-08-06T17:36:05.597223Z",
"shell.execute_reply": "2023-08-06T17:36:05.596880Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1277,7 +1302,7 @@
" c='b',\n",
" label='Observed')\n",
"ax.legend()\n",
"ax.set_xlabel(\"Null Distribution of Test Statistic\");"
"ax.set_xlabel(\"Null Distribution of Test Statistic\");\n"
]
},
{
@@ -1303,10 +1328,10 @@
"id": "3b7392cb",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:56.170411Z",
"iopub.status.busy": "2023-07-31T02:15:56.170304Z",
"iopub.status.idle": "2023-07-31T02:19:52.282774Z",
"shell.execute_reply": "2023-07-31T02:19:52.282319Z"
"iopub.execute_input": "2023-08-06T17:36:05.599334Z",
"iopub.status.busy": "2023-08-06T17:36:05.599205Z",
"iopub.status.idle": "2023-08-06T17:40:01.929123Z",
"shell.execute_reply": "2023-08-06T17:40:01.928341Z"
}
},
"outputs": [],
@@ -1328,7 +1353,7 @@
" ttest_ = ttest_ind(D_null[:n_],\n",
" D_null[n_:],\n",
" equal_var=True)\n",
" Tnull_vals[j,b] = ttest_.statistic"
" Tnull_vals[j,b] = ttest_.statistic\n"
]
},
{
@@ -1349,10 +1374,10 @@
"id": "cac15616",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:19:52.285179Z",
"iopub.status.busy": "2023-07-31T02:19:52.285030Z",
"iopub.status.idle": "2023-07-31T02:19:52.430820Z",
"shell.execute_reply": "2023-07-31T02:19:52.430483Z"
"iopub.execute_input": "2023-08-06T17:40:01.931393Z",
"iopub.status.busy": "2023-08-06T17:40:01.931250Z",
"iopub.status.idle": "2023-08-06T17:40:02.050525Z",
"shell.execute_reply": "2023-08-06T17:40:02.050215Z"
}
},
"outputs": [],
@@ -1364,7 +1389,7 @@
" V = np.sum(np.abs(Tnull_vals) >= cutoffs[j]) / B\n",
" Rs[j] = R\n",
" Vs[j] = V\n",
" FDRs[j] = V / R"
" FDRs[j] = V / R\n"
]
},
{
@@ -1390,10 +1415,10 @@
"id": "9661eb10",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:19:52.432747Z",
"iopub.status.busy": "2023-07-31T02:19:52.432615Z",
"iopub.status.idle": "2023-07-31T02:19:52.435385Z",
"shell.execute_reply": "2023-07-31T02:19:52.435090Z"
"iopub.execute_input": "2023-08-06T17:40:02.052324Z",
"iopub.status.busy": "2023-08-06T17:40:02.052224Z",
"iopub.status.idle": "2023-08-06T17:40:02.054968Z",
"shell.execute_reply": "2023-08-06T17:40:02.054729Z"
}
},
"outputs": [
@@ -1427,7 +1452,7 @@
}
],
"source": [
"sorted(idx[np.abs(T_vals) >= cutoffs[FDRs < 0.1].min()])"
"sorted(idx[np.abs(T_vals) >= cutoffs[FDRs < 0.1].min()])\n"
]
},
{
@@ -1445,10 +1470,10 @@
"id": "18ad4900",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:19:52.436985Z",
"iopub.status.busy": "2023-07-31T02:19:52.436868Z",
"iopub.status.idle": "2023-07-31T02:19:52.439478Z",
"shell.execute_reply": "2023-07-31T02:19:52.439213Z"
"iopub.execute_input": "2023-08-06T17:40:02.056480Z",
"iopub.status.busy": "2023-08-06T17:40:02.056382Z",
"iopub.status.idle": "2023-08-06T17:40:02.058766Z",
"shell.execute_reply": "2023-08-06T17:40:02.058503Z"
}
},
"outputs": [
@@ -1494,7 +1519,7 @@
}
],
"source": [
"sorted(idx[np.abs(T_vals) >= cutoffs[FDRs < 0.2].min()])"
"sorted(idx[np.abs(T_vals) >= cutoffs[FDRs < 0.2].min()])\n"
]
},
{
@@ -1513,11 +1538,12 @@
"id": "28c276b6",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:19:52.441008Z",
"iopub.status.busy": "2023-07-31T02:19:52.440893Z",
"iopub.status.idle": "2023-07-31T02:19:52.515423Z",
"shell.execute_reply": "2023-07-31T02:19:52.515126Z"
}
"iopub.execute_input": "2023-08-06T17:40:02.060234Z",
"iopub.status.busy": "2023-08-06T17:40:02.060135Z",
"iopub.status.idle": "2023-08-06T17:40:02.135583Z",
"shell.execute_reply": "2023-08-06T17:40:02.135228Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1535,15 +1561,23 @@
"fig, ax = plt.subplots()\n",
"ax.plot(Rs, FDRs, 'b', linewidth=3)\n",
"ax.set_xlabel(\"Number of Rejections\")\n",
"ax.set_ylabel(\"False Discovery Rate\");"
"ax.set_ylabel(\"False Discovery Rate\");\n"
]
},
{
"cell_type": "markdown",
"id": "e4b5d621",
"metadata": {},
"source": [
"\n"
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"formats": "ipynb,md:myst",
"main_language": "python"
"main_language": "python",
"notebook_metadata_filter": "-all"
},
"language_info": {
"codemirror_mode": {

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,9 @@
"id": "82bce88a",
"metadata": {},
"source": [
"# Chapter 3"
"\n",
"# Chapter 3\n",
"\n"
]
},
{
@@ -26,17 +28,18 @@
"id": "ca5277a6",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:37.098059Z",
"iopub.status.busy": "2023-07-31T02:14:37.097629Z",
"iopub.status.idle": "2023-07-31T02:14:37.599285Z",
"shell.execute_reply": "2023-07-31T02:14:37.598959Z"
}
"iopub.execute_input": "2023-08-06T17:34:45.279319Z",
"iopub.status.busy": "2023-08-06T17:34:45.279082Z",
"iopub.status.idle": "2023-08-06T17:34:45.953848Z",
"shell.execute_reply": "2023-08-06T17:34:45.953518Z"
},
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from matplotlib.pyplot import subplots"
"from matplotlib.pyplot import subplots\n"
]
},
{
@@ -58,15 +61,16 @@
"id": "675f24e6",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:37.601365Z",
"iopub.status.busy": "2023-07-31T02:14:37.601115Z",
"iopub.status.idle": "2023-07-31T02:14:38.189274Z",
"shell.execute_reply": "2023-07-31T02:14:38.188923Z"
}
"iopub.execute_input": "2023-08-06T17:34:45.955884Z",
"iopub.status.busy": "2023-08-06T17:34:45.955666Z",
"iopub.status.idle": "2023-08-06T17:34:46.765820Z",
"shell.execute_reply": "2023-08-06T17:34:46.765525Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
"import statsmodels.api as sm"
"import statsmodels.api as sm\n"
]
},
{
@@ -90,17 +94,17 @@
"id": "a0ee23c2",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.191201Z",
"iopub.status.busy": "2023-07-31T02:14:38.191048Z",
"iopub.status.idle": "2023-07-31T02:14:38.194262Z",
"shell.execute_reply": "2023-07-31T02:14:38.194012Z"
"iopub.execute_input": "2023-08-06T17:34:46.767689Z",
"iopub.status.busy": "2023-08-06T17:34:46.767547Z",
"iopub.status.idle": "2023-08-06T17:34:46.770326Z",
"shell.execute_reply": "2023-08-06T17:34:46.770048Z"
}
},
"outputs": [],
"source": [
"from statsmodels.stats.outliers_influence \\\n",
" import variance_inflation_factor as VIF\n",
"from statsmodels.stats.anova import anova_lm"
"from statsmodels.stats.anova import anova_lm\n"
]
},
{
@@ -121,10 +125,10 @@
"id": "b35eb887",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.195737Z",
"iopub.status.busy": "2023-07-31T02:14:38.195636Z",
"iopub.status.idle": "2023-07-31T02:14:38.348861Z",
"shell.execute_reply": "2023-07-31T02:14:38.348443Z"
"iopub.execute_input": "2023-08-06T17:34:46.771834Z",
"iopub.status.busy": "2023-08-06T17:34:46.771733Z",
"iopub.status.idle": "2023-08-06T17:34:46.958904Z",
"shell.execute_reply": "2023-08-06T17:34:46.958467Z"
}
},
"outputs": [],
@@ -132,7 +136,7 @@
"from ISLP import load_data\n",
"from ISLP.models import (ModelSpec as MS,\n",
" summarize,\n",
" poly)"
" poly)\n"
]
},
{
@@ -153,11 +157,12 @@
"id": "961908f7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.351011Z",
"iopub.status.busy": "2023-07-31T02:14:38.350696Z",
"iopub.status.idle": "2023-07-31T02:14:38.354455Z",
"shell.execute_reply": "2023-07-31T02:14:38.354198Z"
}
"iopub.execute_input": "2023-08-06T17:34:46.960948Z",
"iopub.status.busy": "2023-08-06T17:34:46.960687Z",
"iopub.status.idle": "2023-08-06T17:34:46.964347Z",
"shell.execute_reply": "2023-08-06T17:34:46.964073Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -208,7 +213,7 @@
}
],
"source": [
"dir()"
"dir()\n"
]
},
{
@@ -233,11 +238,12 @@
"id": "662caa15",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.355929Z",
"iopub.status.busy": "2023-07-31T02:14:38.355825Z",
"iopub.status.idle": "2023-07-31T02:14:38.358768Z",
"shell.execute_reply": "2023-07-31T02:14:38.358500Z"
}
"iopub.execute_input": "2023-08-06T17:34:46.966063Z",
"iopub.status.busy": "2023-08-06T17:34:46.965960Z",
"iopub.status.idle": "2023-08-06T17:34:46.968939Z",
"shell.execute_reply": "2023-08-06T17:34:46.968662Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -418,7 +424,7 @@
],
"source": [
"A = np.array([3,5,11])\n",
"dir(A)"
"dir(A)\n"
]
},
{
@@ -436,11 +442,12 @@
"id": "ebb7d126",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.360173Z",
"iopub.status.busy": "2023-07-31T02:14:38.360095Z",
"iopub.status.idle": "2023-07-31T02:14:38.362221Z",
"shell.execute_reply": "2023-07-31T02:14:38.361978Z"
}
"iopub.execute_input": "2023-08-06T17:34:46.970359Z",
"iopub.status.busy": "2023-08-06T17:34:46.970263Z",
"iopub.status.idle": "2023-08-06T17:34:46.972364Z",
"shell.execute_reply": "2023-08-06T17:34:46.972124Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -455,7 +462,15 @@
}
],
"source": [
"A.sum()"
"A.sum()\n"
]
},
{
"cell_type": "markdown",
"id": "3b9db985",
"metadata": {},
"source": [
" "
]
},
{
@@ -485,10 +500,10 @@
"id": "1ea46cee",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.363717Z",
"iopub.status.busy": "2023-07-31T02:14:38.363629Z",
"iopub.status.idle": "2023-07-31T02:14:38.368806Z",
"shell.execute_reply": "2023-07-31T02:14:38.368543Z"
"iopub.execute_input": "2023-08-06T17:34:46.973818Z",
"iopub.status.busy": "2023-08-06T17:34:46.973741Z",
"iopub.status.idle": "2023-08-06T17:34:46.982452Z",
"shell.execute_reply": "2023-08-06T17:34:46.982201Z"
}
},
"outputs": [
@@ -507,7 +522,7 @@
],
"source": [
"Boston = load_data(\"Boston\")\n",
"Boston.columns"
"Boston.columns\n"
]
},
{
@@ -520,7 +535,7 @@
"We start by using the `sm.OLS()` function to fit a\n",
"simple linear regression model. Our response will be\n",
" `medv` and `lstat` will be the single predictor.\n",
"For this model, we can create the model matrix by hand."
"For this model, we can create the model matrix by hand.\n"
]
},
{
@@ -529,10 +544,10 @@
"id": "26c0ba88",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.370249Z",
"iopub.status.busy": "2023-07-31T02:14:38.370153Z",
"iopub.status.idle": "2023-07-31T02:14:38.375404Z",
"shell.execute_reply": "2023-07-31T02:14:38.375134Z"
"iopub.execute_input": "2023-08-06T17:34:46.983850Z",
"iopub.status.busy": "2023-08-06T17:34:46.983745Z",
"iopub.status.idle": "2023-08-06T17:34:46.990247Z",
"shell.execute_reply": "2023-08-06T17:34:46.990002Z"
}
},
"outputs": [
@@ -602,7 +617,7 @@
"source": [
"X = pd.DataFrame({'intercept': np.ones(Boston.shape[0]),\n",
" 'lstat': Boston['lstat']})\n",
"X[:4]"
"X[:4]\n"
]
},
{
@@ -619,17 +634,18 @@
"id": "d4dd511b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.376828Z",
"iopub.status.busy": "2023-07-31T02:14:38.376741Z",
"iopub.status.idle": "2023-07-31T02:14:38.379010Z",
"shell.execute_reply": "2023-07-31T02:14:38.378764Z"
}
"iopub.execute_input": "2023-08-06T17:34:46.991664Z",
"iopub.status.busy": "2023-08-06T17:34:46.991561Z",
"iopub.status.idle": "2023-08-06T17:34:46.994290Z",
"shell.execute_reply": "2023-08-06T17:34:46.994049Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
"y = Boston['medv']\n",
"model = sm.OLS(y, X)\n",
"results = model.fit()"
"results = model.fit()\n"
]
},
{
@@ -653,11 +669,12 @@
"id": "eef9f8e3",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.380359Z",
"iopub.status.busy": "2023-07-31T02:14:38.380275Z",
"iopub.status.idle": "2023-07-31T02:14:38.437623Z",
"shell.execute_reply": "2023-07-31T02:14:38.437333Z"
}
"iopub.execute_input": "2023-08-06T17:34:46.995620Z",
"iopub.status.busy": "2023-08-06T17:34:46.995530Z",
"iopub.status.idle": "2023-08-06T17:34:47.057569Z",
"shell.execute_reply": "2023-08-06T17:34:47.057305Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -718,7 +735,7 @@
}
],
"source": [
"summarize(results)"
"summarize(results)\n"
]
},
{
@@ -754,7 +771,7 @@
"initial computations on it, as specified in the transform object.\n",
"For example, it may compute means and standard deviations for centering and scaling.\n",
"The `transform()` \n",
"method applies the fitted transformation to the array of data, and produces the model matrix."
"method applies the fitted transformation to the array of data, and produces the model matrix.\n"
]
},
{
@@ -763,11 +780,12 @@
"id": "557170d4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.439217Z",
"iopub.status.busy": "2023-07-31T02:14:38.439073Z",
"iopub.status.idle": "2023-07-31T02:14:38.443761Z",
"shell.execute_reply": "2023-07-31T02:14:38.443507Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.059196Z",
"iopub.status.busy": "2023-08-06T17:34:47.059050Z",
"iopub.status.idle": "2023-08-06T17:34:47.063793Z",
"shell.execute_reply": "2023-08-06T17:34:47.063548Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -857,11 +875,12 @@
"id": "b83ec097",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.445159Z",
"iopub.status.busy": "2023-07-31T02:14:38.445084Z",
"iopub.status.idle": "2023-07-31T02:14:38.449274Z",
"shell.execute_reply": "2023-07-31T02:14:38.449028Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.065239Z",
"iopub.status.busy": "2023-08-06T17:34:47.065152Z",
"iopub.status.idle": "2023-08-06T17:34:47.069286Z",
"shell.execute_reply": "2023-08-06T17:34:47.068998Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -960,10 +979,10 @@
"id": "d4dce5f6",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.450699Z",
"iopub.status.busy": "2023-07-31T02:14:38.450620Z",
"iopub.status.idle": "2023-07-31T02:14:38.459848Z",
"shell.execute_reply": "2023-07-31T02:14:38.459594Z"
"iopub.execute_input": "2023-08-06T17:34:47.070742Z",
"iopub.status.busy": "2023-08-06T17:34:47.070666Z",
"iopub.status.idle": "2023-08-06T17:34:47.079919Z",
"shell.execute_reply": "2023-08-06T17:34:47.079672Z"
}
},
"outputs": [
@@ -982,10 +1001,10 @@
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 601.6</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Sun, 30 Jul 2023</td> <th> Prob (F-statistic):</th> <td>5.08e-88</td>\n",
" <th>Date:</th> <td>Sun, 06 Aug 2023</td> <th> Prob (F-statistic):</th> <td>5.08e-88</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>22:14:38</td> <th> Log-Likelihood: </th> <td> -1641.5</td>\n",
" <th>Time:</th> <td>10:34:47</td> <th> Log-Likelihood: </th> <td> -1641.5</td>\n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 506</td> <th> AIC: </th> <td> 3287.</td>\n",
@@ -1033,8 +1052,8 @@
"\\textbf{Dep. Variable:} & medv & \\textbf{ R-squared: } & 0.544 \\\\\n",
"\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.543 \\\\\n",
"\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 601.6 \\\\\n",
"\\textbf{Date:} & Sun, 30 Jul 2023 & \\textbf{ Prob (F-statistic):} & 5.08e-88 \\\\\n",
"\\textbf{Time:} & 22:14:38 & \\textbf{ Log-Likelihood: } & -1641.5 \\\\\n",
"\\textbf{Date:} & Sun, 06 Aug 2023 & \\textbf{ Prob (F-statistic):} & 5.08e-88 \\\\\n",
"\\textbf{Time:} & 10:34:47 & \\textbf{ Log-Likelihood: } & -1641.5 \\\\\n",
"\\textbf{No. Observations:} & 506 & \\textbf{ AIC: } & 3287. \\\\\n",
"\\textbf{Df Residuals:} & 504 & \\textbf{ BIC: } & 3295. \\\\\n",
"\\textbf{Df Model:} & 1 & \\textbf{ } & \\\\\n",
@@ -1069,8 +1088,8 @@
"Dep. Variable: medv R-squared: 0.544\n",
"Model: OLS Adj. R-squared: 0.543\n",
"Method: Least Squares F-statistic: 601.6\n",
"Date: Sun, 30 Jul 2023 Prob (F-statistic): 5.08e-88\n",
"Time: 22:14:38 Log-Likelihood: -1641.5\n",
"Date: Sun, 06 Aug 2023 Prob (F-statistic): 5.08e-88\n",
"Time: 10:34:47 Log-Likelihood: -1641.5\n",
"No. Observations: 506 AIC: 3287.\n",
"Df Residuals: 504 BIC: 3295.\n",
"Df Model: 1 \n",
@@ -1098,7 +1117,7 @@
}
],
"source": [
"results.summary()"
"results.summary()\n"
]
},
{
@@ -1116,11 +1135,12 @@
"id": "a0edf555",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.461298Z",
"iopub.status.busy": "2023-07-31T02:14:38.461215Z",
"iopub.status.idle": "2023-07-31T02:14:38.463809Z",
"shell.execute_reply": "2023-07-31T02:14:38.463563Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.081336Z",
"iopub.status.busy": "2023-08-06T17:34:47.081257Z",
"iopub.status.idle": "2023-08-06T17:34:47.083680Z",
"shell.execute_reply": "2023-08-06T17:34:47.083425Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1137,7 +1157,7 @@
}
],
"source": [
"results.params"
"results.params\n"
]
},
{
@@ -1158,10 +1178,10 @@
"id": "fdc5a3f3",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.465231Z",
"iopub.status.busy": "2023-07-31T02:14:38.465150Z",
"iopub.status.idle": "2023-07-31T02:14:38.468821Z",
"shell.execute_reply": "2023-07-31T02:14:38.468569Z"
"iopub.execute_input": "2023-08-06T17:34:47.085093Z",
"iopub.status.busy": "2023-08-06T17:34:47.085015Z",
"iopub.status.idle": "2023-08-06T17:34:47.088625Z",
"shell.execute_reply": "2023-08-06T17:34:47.088392Z"
}
},
"outputs": [
@@ -1225,7 +1245,7 @@
"source": [
"new_df = pd.DataFrame({'lstat':[5, 10, 15]})\n",
"newX = design.transform(new_df)\n",
"newX"
"newX\n"
]
},
{
@@ -1242,11 +1262,12 @@
"id": "2c6acbf0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.470242Z",
"iopub.status.busy": "2023-07-31T02:14:38.470162Z",
"iopub.status.idle": "2023-07-31T02:14:38.472450Z",
"shell.execute_reply": "2023-07-31T02:14:38.472199Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.090091Z",
"iopub.status.busy": "2023-08-06T17:34:47.090008Z",
"iopub.status.idle": "2023-08-06T17:34:47.092428Z",
"shell.execute_reply": "2023-08-06T17:34:47.092183Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1262,7 +1283,7 @@
],
"source": [
"new_predictions = results.get_prediction(newX);\n",
"new_predictions.predicted_mean"
"new_predictions.predicted_mean\n"
]
},
{
@@ -1279,11 +1300,12 @@
"id": "c472ef33",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.473835Z",
"iopub.status.busy": "2023-07-31T02:14:38.473762Z",
"iopub.status.idle": "2023-07-31T02:14:38.476010Z",
"shell.execute_reply": "2023-07-31T02:14:38.475756Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.093785Z",
"iopub.status.busy": "2023-08-06T17:34:47.093688Z",
"iopub.status.idle": "2023-08-06T17:34:47.096010Z",
"shell.execute_reply": "2023-08-06T17:34:47.095781Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1300,7 +1322,7 @@
}
],
"source": [
"new_predictions.conf_int(alpha=0.05)"
"new_predictions.conf_int(alpha=0.05)\n"
]
},
{
@@ -1317,11 +1339,12 @@
"id": "3e2ffc7a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.477420Z",
"iopub.status.busy": "2023-07-31T02:14:38.477344Z",
"iopub.status.idle": "2023-07-31T02:14:38.479639Z",
"shell.execute_reply": "2023-07-31T02:14:38.479371Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.097324Z",
"iopub.status.busy": "2023-08-06T17:34:47.097234Z",
"iopub.status.idle": "2023-08-06T17:34:47.099513Z",
"shell.execute_reply": "2023-08-06T17:34:47.099275Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1338,7 +1361,7 @@
}
],
"source": [
"new_predictions.conf_int(obs=True, alpha=0.05)"
"new_predictions.conf_int(obs=True, alpha=0.05)\n"
]
},
{
@@ -1376,11 +1399,12 @@
"id": "4e56a1d3",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.481027Z",
"iopub.status.busy": "2023-07-31T02:14:38.480949Z",
"iopub.status.idle": "2023-07-31T02:14:38.482803Z",
"shell.execute_reply": "2023-07-31T02:14:38.482553Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.100972Z",
"iopub.status.busy": "2023-08-06T17:34:47.100892Z",
"iopub.status.idle": "2023-08-06T17:34:47.102793Z",
"shell.execute_reply": "2023-08-06T17:34:47.102549Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
@@ -1388,7 +1412,7 @@
" \"Add a line with slope m and intercept b to ax\"\n",
" xlim = ax.get_xlim()\n",
" ylim = [m * xlim[0] + b, m * xlim[1] + b]\n",
" ax.plot(xlim, ylim)"
" ax.plot(xlim, ylim)\n"
]
},
{
@@ -1409,11 +1433,12 @@
"id": "7f43ffe7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.484281Z",
"iopub.status.busy": "2023-07-31T02:14:38.484202Z",
"iopub.status.idle": "2023-07-31T02:14:38.486090Z",
"shell.execute_reply": "2023-07-31T02:14:38.485818Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.104148Z",
"iopub.status.busy": "2023-08-06T17:34:47.104079Z",
"iopub.status.idle": "2023-08-06T17:34:47.106062Z",
"shell.execute_reply": "2023-08-06T17:34:47.105836Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
@@ -1421,7 +1446,7 @@
" \"Add a line with slope m and intercept b to ax\"\n",
" xlim = ax.get_xlim()\n",
" ylim = [m * xlim[0] + b, m * xlim[1] + b]\n",
" ax.plot(xlim, ylim, *args, **kwargs)"
" ax.plot(xlim, ylim, *args, **kwargs)\n"
]
},
{
@@ -1448,11 +1473,12 @@
"id": "3f7b67c9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.487491Z",
"iopub.status.busy": "2023-07-31T02:14:38.487422Z",
"iopub.status.idle": "2023-07-31T02:14:38.594015Z",
"shell.execute_reply": "2023-07-31T02:14:38.593271Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.107460Z",
"iopub.status.busy": "2023-08-06T17:34:47.107386Z",
"iopub.status.idle": "2023-08-06T17:34:47.254700Z",
"shell.execute_reply": "2023-08-06T17:34:47.253915Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1472,7 +1498,7 @@
" results.params[0],\n",
" results.params[1],\n",
" 'r--',\n",
" linewidth=3)"
" linewidth=3)\n"
]
},
{
@@ -1485,7 +1511,8 @@
"an argument to make it of width 3.\n",
"There is some evidence for non-linearity in the relationship between `lstat` and `medv`. We will explore this issue later in this lab.\n",
"\n",
"As mentioned above, there is an existing function to add a line to a plot --- `ax.axline()` --- but knowing how to write such functions empowers us to create more expressive displays."
"As mentioned above, there is an existing function to add a line to a plot --- `ax.axline()` --- but knowing how to write such functions empowers us to create more expressive displays.\n",
"\n"
]
},
{
@@ -1510,11 +1537,12 @@
"id": "b35a2fd3",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.597846Z",
"iopub.status.busy": "2023-07-31T02:14:38.597587Z",
"iopub.status.idle": "2023-07-31T02:14:38.716373Z",
"shell.execute_reply": "2023-07-31T02:14:38.714325Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.259557Z",
"iopub.status.busy": "2023-08-06T17:34:47.259229Z",
"iopub.status.idle": "2023-08-06T17:34:47.402403Z",
"shell.execute_reply": "2023-08-06T17:34:47.402135Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1533,7 +1561,7 @@
"ax.scatter(results.fittedvalues, results.resid)\n",
"ax.set_xlabel('Fitted value')\n",
"ax.set_ylabel('Residual')\n",
"ax.axhline(0, c='k', ls='--');"
"ax.axhline(0, c='k', ls='--');\n"
]
},
{
@@ -1557,11 +1585,12 @@
"id": "82673b80",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.720789Z",
"iopub.status.busy": "2023-07-31T02:14:38.720353Z",
"iopub.status.idle": "2023-07-31T02:14:38.829000Z",
"shell.execute_reply": "2023-07-31T02:14:38.828696Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.404190Z",
"iopub.status.busy": "2023-08-06T17:34:47.404052Z",
"iopub.status.idle": "2023-08-06T17:34:47.494230Z",
"shell.execute_reply": "2023-08-06T17:34:47.493905Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1591,7 +1620,7 @@
"ax.scatter(np.arange(X.shape[0]), infl.hat_matrix_diag)\n",
"ax.set_xlabel('Index')\n",
"ax.set_ylabel('Leverage')\n",
"np.argmax(infl.hat_matrix_diag)"
"np.argmax(infl.hat_matrix_diag)\n"
]
},
{
@@ -1624,11 +1653,12 @@
"id": "54596dc4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.830833Z",
"iopub.status.busy": "2023-07-31T02:14:38.830723Z",
"iopub.status.idle": "2023-07-31T02:14:38.841593Z",
"shell.execute_reply": "2023-07-31T02:14:38.841314Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.495999Z",
"iopub.status.busy": "2023-08-06T17:34:47.495872Z",
"iopub.status.idle": "2023-08-06T17:34:47.506251Z",
"shell.execute_reply": "2023-08-06T17:34:47.505979Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1721,10 +1751,10 @@
"id": "75c78238",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.843301Z",
"iopub.status.busy": "2023-07-31T02:14:38.843182Z",
"iopub.status.idle": "2023-07-31T02:14:38.845817Z",
"shell.execute_reply": "2023-07-31T02:14:38.845544Z"
"iopub.execute_input": "2023-08-06T17:34:47.508238Z",
"iopub.status.busy": "2023-08-06T17:34:47.508087Z",
"iopub.status.idle": "2023-08-06T17:34:47.510826Z",
"shell.execute_reply": "2023-08-06T17:34:47.510491Z"
}
},
"outputs": [
@@ -1743,7 +1773,7 @@
],
"source": [
"terms = Boston.columns.drop('medv')\n",
"terms"
"terms\n"
]
},
{
@@ -1761,10 +1791,10 @@
"id": "f14b9e1a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.847324Z",
"iopub.status.busy": "2023-07-31T02:14:38.847229Z",
"iopub.status.idle": "2023-07-31T02:14:38.861947Z",
"shell.execute_reply": "2023-07-31T02:14:38.861682Z"
"iopub.execute_input": "2023-08-06T17:34:47.512561Z",
"iopub.status.busy": "2023-08-06T17:34:47.512448Z",
"iopub.status.idle": "2023-08-06T17:34:47.526980Z",
"shell.execute_reply": "2023-08-06T17:34:47.526612Z"
}
},
"outputs": [
@@ -1917,7 +1947,7 @@
"X = MS(terms).fit_transform(Boston)\n",
"model = sm.OLS(y, X)\n",
"results = model.fit()\n",
"summarize(results)"
"summarize(results)\n"
]
},
{
@@ -1937,10 +1967,10 @@
"id": "0a2714b1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.863470Z",
"iopub.status.busy": "2023-07-31T02:14:38.863382Z",
"iopub.status.idle": "2023-07-31T02:14:38.877283Z",
"shell.execute_reply": "2023-07-31T02:14:38.877001Z"
"iopub.execute_input": "2023-08-06T17:34:47.528637Z",
"iopub.status.busy": "2023-08-06T17:34:47.528526Z",
"iopub.status.idle": "2023-08-06T17:34:47.542270Z",
"shell.execute_reply": "2023-08-06T17:34:47.541976Z"
}
},
"outputs": [
@@ -2085,7 +2115,7 @@
"minus_age = Boston.columns.drop(['medv', 'age']) \n",
"Xma = MS(minus_age).fit_transform(Boston)\n",
"model1 = sm.OLS(y, Xma)\n",
"summarize(model1.fit())"
"summarize(model1.fit())\n"
]
},
{
@@ -2116,7 +2146,7 @@
"lists of `Python` objects. The language also supports\n",
"dictionary and *generator* comprehension, though these are\n",
"beyond our scope here. Let's look at an example. We compute the VIF for each of the variables\n",
"in the model matrix `X`, using the function `variance_inflation_factor()`."
"in the model matrix `X`, using the function `variance_inflation_factor()`.\n"
]
},
{
@@ -2125,11 +2155,12 @@
"id": "961c9128",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.878849Z",
"iopub.status.busy": "2023-07-31T02:14:38.878736Z",
"iopub.status.idle": "2023-07-31T02:14:38.885983Z",
"shell.execute_reply": "2023-07-31T02:14:38.885709Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.543839Z",
"iopub.status.busy": "2023-08-06T17:34:47.543752Z",
"iopub.status.idle": "2023-08-06T17:34:47.551040Z",
"shell.execute_reply": "2023-08-06T17:34:47.550770Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -2235,7 +2266,7 @@
" for i in range(1, X.shape[1])]\n",
"vif = pd.DataFrame({'vif':vals},\n",
" index=X.columns[1:])\n",
"vif"
"vif\n"
]
},
{
@@ -2256,17 +2287,18 @@
"id": "4886f9e9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.887620Z",
"iopub.status.busy": "2023-07-31T02:14:38.887509Z",
"iopub.status.idle": "2023-07-31T02:14:38.892708Z",
"shell.execute_reply": "2023-07-31T02:14:38.892460Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.552596Z",
"iopub.status.busy": "2023-08-06T17:34:47.552485Z",
"iopub.status.idle": "2023-08-06T17:34:47.557775Z",
"shell.execute_reply": "2023-08-06T17:34:47.557510Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
"vals = []\n",
"for i in range(1, X.values.shape[1]):\n",
" vals.append(VIF(X.values, i))"
" vals.append(VIF(X.values, i))\n"
]
},
{
@@ -2288,11 +2320,12 @@
"id": "b54d2da1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.894293Z",
"iopub.status.busy": "2023-07-31T02:14:38.894204Z",
"iopub.status.idle": "2023-07-31T02:14:38.904859Z",
"shell.execute_reply": "2023-07-31T02:14:38.904591Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.559401Z",
"iopub.status.busy": "2023-08-06T17:34:47.559315Z",
"iopub.status.idle": "2023-08-06T17:34:47.570264Z",
"shell.execute_reply": "2023-08-06T17:34:47.570004Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -2373,7 +2406,7 @@
" 'age',\n",
" ('lstat', 'age')]).fit_transform(Boston)\n",
"model2 = sm.OLS(y, X)\n",
"summarize(model2.fit())"
"summarize(model2.fit())\n"
]
},
{
@@ -2395,11 +2428,12 @@
"id": "1b71633a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.906973Z",
"iopub.status.busy": "2023-07-31T02:14:38.906845Z",
"iopub.status.idle": "2023-07-31T02:14:38.918896Z",
"shell.execute_reply": "2023-07-31T02:14:38.918604Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.571718Z",
"iopub.status.busy": "2023-08-06T17:34:47.571635Z",
"iopub.status.idle": "2023-08-06T17:34:47.583621Z",
"shell.execute_reply": "2023-08-06T17:34:47.583342Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -2479,7 +2513,7 @@
"X = MS([poly('lstat', degree=2), 'age']).fit_transform(Boston)\n",
"model3 = sm.OLS(y, X)\n",
"results3 = model3.fit()\n",
"summarize(results3)"
"summarize(results3)\n"
]
},
{
@@ -2512,11 +2546,12 @@
"id": "6d30a306",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.920531Z",
"iopub.status.busy": "2023-07-31T02:14:38.920425Z",
"iopub.status.idle": "2023-07-31T02:14:38.926296Z",
"shell.execute_reply": "2023-07-31T02:14:38.926036Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.585282Z",
"iopub.status.busy": "2023-08-06T17:34:47.585169Z",
"iopub.status.idle": "2023-08-06T17:34:47.591106Z",
"shell.execute_reply": "2023-08-06T17:34:47.590768Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -2583,7 +2618,7 @@
}
],
"source": [
"anova_lm(results1, results3)"
"anova_lm(results1, results3)\n"
]
},
{
@@ -2612,7 +2647,7 @@
"The function `anova_lm()` can take more than two nested models\n",
"as input, in which case it compares every successive pair of models.\n",
"That also explains why their are `NaN`s in the first row above, since\n",
"there is no previous model with which to compare the first."
"there is no previous model with which to compare the first.\n"
]
},
{
@@ -2621,17 +2656,18 @@
"id": "9a5ec13f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:38.927819Z",
"iopub.status.busy": "2023-07-31T02:14:38.927721Z",
"iopub.status.idle": "2023-07-31T02:14:39.027635Z",
"shell.execute_reply": "2023-07-31T02:14:39.027301Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.592639Z",
"iopub.status.busy": "2023-08-06T17:34:47.592536Z",
"iopub.status.idle": "2023-08-06T17:34:47.692002Z",
"shell.execute_reply": "2023-08-06T17:34:47.691684Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.lines.Line2D at 0x154a82f50>"
"<matplotlib.lines.Line2D at 0x15f00b090>"
]
},
"execution_count": 34,
@@ -2654,7 +2690,7 @@
"ax.scatter(results3.fittedvalues, results3.resid)\n",
"ax.set_xlabel('Fitted value')\n",
"ax.set_ylabel('Residual')\n",
"ax.axhline(0, c='k', ls='--')"
"ax.axhline(0, c='k', ls='--')\n"
]
},
{
@@ -2665,7 +2701,7 @@
"We see that when the quadratic term is included in the model,\n",
"there is little discernible pattern in the residuals.\n",
"In order to create a cubic or higher-degree polynomial fit, we can simply change the degree argument\n",
"to `poly()`."
"to `poly()`.\n"
]
},
{
@@ -2686,11 +2722,12 @@
"id": "09bbc0c6",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:39.029410Z",
"iopub.status.busy": "2023-07-31T02:14:39.029292Z",
"iopub.status.idle": "2023-07-31T02:14:39.034985Z",
"shell.execute_reply": "2023-07-31T02:14:39.034677Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.693776Z",
"iopub.status.busy": "2023-08-06T17:34:47.693649Z",
"iopub.status.idle": "2023-08-06T17:34:47.700319Z",
"shell.execute_reply": "2023-08-06T17:34:47.700055Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -2708,7 +2745,7 @@
],
"source": [
"Carseats = load_data('Carseats')\n",
"Carseats.columns"
"Carseats.columns\n"
]
},
{
@@ -2736,11 +2773,12 @@
"id": "2e1da1fa",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:39.036665Z",
"iopub.status.busy": "2023-07-31T02:14:39.036539Z",
"iopub.status.idle": "2023-07-31T02:14:39.057391Z",
"shell.execute_reply": "2023-07-31T02:14:39.057099Z"
}
"iopub.execute_input": "2023-08-06T17:34:47.701981Z",
"iopub.status.busy": "2023-08-06T17:34:47.701852Z",
"iopub.status.idle": "2023-08-06T17:34:47.722346Z",
"shell.execute_reply": "2023-08-06T17:34:47.722062Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -2903,7 +2941,7 @@
" ('Price', 'Age')]\n",
"X = MS(final).fit_transform(Carseats)\n",
"model = sm.OLS(y, X)\n",
"summarize(model.fit())"
"summarize(model.fit())\n"
]
},
{
@@ -2922,15 +2960,16 @@
"positive indicates that a good shelving location is associated with high sales (relative to a bad location).\n",
"And `ShelveLoc[Medium]` has a smaller positive coefficient,\n",
"indicating that a medium shelving location leads to higher sales than a bad\n",
"shelving location, but lower sales than a good shelving location."
"shelving location, but lower sales than a good shelving location.\n",
"\n"
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"formats": "ipynb,md:myst",
"main_language": "python"
"main_language": "python",
"notebook_metadata_filter": "-all"
},
"language_info": {
"codemirror_mode": {

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,9 @@
"id": "3a3f2f85",
"metadata": {},
"source": [
"# Chapter 5"
"\n",
"# Chapter 5\n",
"\n"
]
},
{
@@ -27,11 +29,12 @@
"id": "60fad148",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:45.345335Z",
"iopub.status.busy": "2023-07-31T02:14:45.345019Z",
"iopub.status.idle": "2023-07-31T02:14:46.175442Z",
"shell.execute_reply": "2023-07-31T02:14:46.174945Z"
}
"iopub.execute_input": "2023-08-06T17:34:54.032413Z",
"iopub.status.busy": "2023-08-06T17:34:54.032111Z",
"iopub.status.idle": "2023-08-06T17:34:54.873921Z",
"shell.execute_reply": "2023-08-06T17:34:54.873325Z"
},
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
@@ -41,7 +44,7 @@
"from ISLP.models import (ModelSpec as MS,\n",
" summarize,\n",
" poly)\n",
"from sklearn.model_selection import train_test_split"
"from sklearn.model_selection import train_test_split\n"
]
},
{
@@ -58,11 +61,12 @@
"id": "2478aeb4",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:46.177622Z",
"iopub.status.busy": "2023-07-31T02:14:46.177374Z",
"iopub.status.idle": "2023-07-31T02:14:46.179577Z",
"shell.execute_reply": "2023-07-31T02:14:46.179312Z"
}
"iopub.execute_input": "2023-08-06T17:34:54.876060Z",
"iopub.status.busy": "2023-08-06T17:34:54.875842Z",
"iopub.status.idle": "2023-08-06T17:34:54.878002Z",
"shell.execute_reply": "2023-08-06T17:34:54.877731Z"
},
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
@@ -72,7 +76,7 @@
" KFold,\n",
" ShuffleSplit)\n",
"from sklearn.base import clone\n",
"from ISLP.models import sklearn_sm"
"from ISLP.models import sklearn_sm\n"
]
},
{
@@ -92,7 +96,7 @@
"when performing operations like this that contain an\n",
"element of randomness, so that the results obtained can be reproduced\n",
"precisely at a later time. We set the random seed of the splitter\n",
"with the argument `random_state=0`."
"with the argument `random_state=0`. "
]
},
{
@@ -101,10 +105,10 @@
"id": "99c95faf",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:46.181100Z",
"iopub.status.busy": "2023-07-31T02:14:46.180998Z",
"iopub.status.idle": "2023-07-31T02:14:46.187844Z",
"shell.execute_reply": "2023-07-31T02:14:46.187574Z"
"iopub.execute_input": "2023-08-06T17:34:54.879485Z",
"iopub.status.busy": "2023-08-06T17:34:54.879378Z",
"iopub.status.idle": "2023-08-06T17:34:54.886401Z",
"shell.execute_reply": "2023-08-06T17:34:54.886117Z"
}
},
"outputs": [],
@@ -112,7 +116,7 @@
"Auto = load_data('Auto')\n",
"Auto_train, Auto_valid = train_test_split(Auto,\n",
" test_size=196,\n",
" random_state=0)"
" random_state=0)\n"
]
},
{
@@ -129,10 +133,10 @@
"id": "41b0717d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:46.189426Z",
"iopub.status.busy": "2023-07-31T02:14:46.189344Z",
"iopub.status.idle": "2023-07-31T02:14:46.193204Z",
"shell.execute_reply": "2023-07-31T02:14:46.192935Z"
"iopub.execute_input": "2023-08-06T17:34:54.887942Z",
"iopub.status.busy": "2023-08-06T17:34:54.887854Z",
"iopub.status.idle": "2023-08-06T17:34:54.891560Z",
"shell.execute_reply": "2023-08-06T17:34:54.891297Z"
}
},
"outputs": [],
@@ -141,7 +145,7 @@
"X_train = hp_mm.fit_transform(Auto_train)\n",
"y_train = Auto_train['mpg']\n",
"model = sm.OLS(y_train, X_train)\n",
"results = model.fit()"
"results = model.fit()\n"
]
},
{
@@ -159,10 +163,10 @@
"id": "d7ea3c0d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:46.194683Z",
"iopub.status.busy": "2023-07-31T02:14:46.194591Z",
"iopub.status.idle": "2023-07-31T02:14:46.198962Z",
"shell.execute_reply": "2023-07-31T02:14:46.198705Z"
"iopub.execute_input": "2023-08-06T17:34:54.893051Z",
"iopub.status.busy": "2023-08-06T17:34:54.892965Z",
"iopub.status.idle": "2023-08-06T17:34:54.897036Z",
"shell.execute_reply": "2023-08-06T17:34:54.896785Z"
}
},
"outputs": [
@@ -181,7 +185,7 @@
"X_valid = hp_mm.transform(Auto_valid)\n",
"y_valid = Auto_valid['mpg']\n",
"valid_pred = results.predict(X_valid)\n",
"np.mean((y_valid - valid_pred)**2)"
"np.mean((y_valid - valid_pred)**2)\n"
]
},
{
@@ -203,10 +207,10 @@
"id": "a02a2d05",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:46.200468Z",
"iopub.status.busy": "2023-07-31T02:14:46.200388Z",
"iopub.status.idle": "2023-07-31T02:14:46.202553Z",
"shell.execute_reply": "2023-07-31T02:14:46.202309Z"
"iopub.execute_input": "2023-08-06T17:34:54.898500Z",
"iopub.status.busy": "2023-08-06T17:34:54.898412Z",
"iopub.status.idle": "2023-08-06T17:34:54.900767Z",
"shell.execute_reply": "2023-08-06T17:34:54.900515Z"
}
},
"outputs": [],
@@ -226,7 +230,7 @@
" results = sm.OLS(y_train, X_train).fit()\n",
" test_pred = results.predict(X_test)\n",
"\n",
" return np.mean((y_test - test_pred)**2)"
" return np.mean((y_test - test_pred)**2)\n"
]
},
{
@@ -246,10 +250,10 @@
"id": "51d93dea",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:46.203992Z",
"iopub.status.busy": "2023-07-31T02:14:46.203891Z",
"iopub.status.idle": "2023-07-31T02:14:46.213999Z",
"shell.execute_reply": "2023-07-31T02:14:46.213730Z"
"iopub.execute_input": "2023-08-06T17:34:54.902229Z",
"iopub.status.busy": "2023-08-06T17:34:54.902150Z",
"iopub.status.idle": "2023-08-06T17:34:54.912255Z",
"shell.execute_reply": "2023-08-06T17:34:54.912027Z"
}
},
"outputs": [
@@ -271,7 +275,7 @@
" 'mpg',\n",
" Auto_train,\n",
" Auto_valid)\n",
"MSE"
"MSE\n"
]
},
{
@@ -290,10 +294,10 @@
"id": "83432f06",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:46.215488Z",
"iopub.status.busy": "2023-07-31T02:14:46.215401Z",
"iopub.status.idle": "2023-07-31T02:14:46.225856Z",
"shell.execute_reply": "2023-07-31T02:14:46.225598Z"
"iopub.execute_input": "2023-08-06T17:34:54.913788Z",
"iopub.status.busy": "2023-08-06T17:34:54.913696Z",
"iopub.status.idle": "2023-08-06T17:34:54.924230Z",
"shell.execute_reply": "2023-08-06T17:34:54.923983Z"
}
},
"outputs": [
@@ -373,11 +377,12 @@
"id": "bcfc433f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:46.227362Z",
"iopub.status.busy": "2023-07-31T02:14:46.227281Z",
"iopub.status.idle": "2023-07-31T02:14:46.785216Z",
"shell.execute_reply": "2023-07-31T02:14:46.784936Z"
}
"iopub.execute_input": "2023-08-06T17:34:54.925794Z",
"iopub.status.busy": "2023-08-06T17:34:54.925711Z",
"iopub.status.idle": "2023-08-06T17:34:55.485718Z",
"shell.execute_reply": "2023-08-06T17:34:55.485445Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -400,7 +405,7 @@
" Y,\n",
" cv=Auto.shape[0])\n",
"cv_err = np.mean(cv_results['test_score'])\n",
"cv_err"
"cv_err\n"
]
},
{
@@ -440,11 +445,12 @@
"id": "f951ffc8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:46.786874Z",
"iopub.status.busy": "2023-07-31T02:14:46.786773Z",
"iopub.status.idle": "2023-07-31T02:14:47.387263Z",
"shell.execute_reply": "2023-07-31T02:14:47.386958Z"
}
"iopub.execute_input": "2023-08-06T17:34:55.487370Z",
"iopub.status.busy": "2023-08-06T17:34:55.487270Z",
"iopub.status.idle": "2023-08-06T17:34:56.086269Z",
"shell.execute_reply": "2023-08-06T17:34:56.085986Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -469,7 +475,7 @@
" Y,\n",
" cv=Auto.shape[0])\n",
" cv_error[i] = np.mean(M_CV['test_score'])\n",
"cv_error"
"cv_error\n"
]
},
{
@@ -487,7 +493,7 @@
"It has two arrays as\n",
"arguments, and then forms a larger\n",
"array where the operation is applied to each pair of elements of the\n",
"two arrays."
"two arrays. "
]
},
{
@@ -496,10 +502,10 @@
"id": "e3610b5a",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.388905Z",
"iopub.status.busy": "2023-07-31T02:14:47.388796Z",
"iopub.status.idle": "2023-07-31T02:14:47.391290Z",
"shell.execute_reply": "2023-07-31T02:14:47.391034Z"
"iopub.execute_input": "2023-08-06T17:34:56.087846Z",
"iopub.status.busy": "2023-08-06T17:34:56.087739Z",
"iopub.status.idle": "2023-08-06T17:34:56.090120Z",
"shell.execute_reply": "2023-08-06T17:34:56.089850Z"
}
},
"outputs": [
@@ -519,7 +525,7 @@
"source": [
"A = np.array([3, 5, 9])\n",
"B = np.array([2, 4])\n",
"np.add.outer(A, B)"
"np.add.outer(A, B)\n"
]
},
{
@@ -538,11 +544,12 @@
"id": "1627460d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.392816Z",
"iopub.status.busy": "2023-07-31T02:14:47.392715Z",
"iopub.status.idle": "2023-07-31T02:14:47.414278Z",
"shell.execute_reply": "2023-07-31T02:14:47.414015Z"
}
"iopub.execute_input": "2023-08-06T17:34:56.091649Z",
"iopub.status.busy": "2023-08-06T17:34:56.091540Z",
"iopub.status.idle": "2023-08-06T17:34:56.113415Z",
"shell.execute_reply": "2023-08-06T17:34:56.113164Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -568,7 +575,7 @@
" Y,\n",
" cv=cv)\n",
" cv_error[i] = np.mean(M_CV['test_score'])\n",
"cv_error"
"cv_error\n"
]
},
{
@@ -602,11 +609,12 @@
"id": "8a636468",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.415780Z",
"iopub.status.busy": "2023-07-31T02:14:47.415699Z",
"iopub.status.idle": "2023-07-31T02:14:47.421148Z",
"shell.execute_reply": "2023-07-31T02:14:47.420911Z"
}
"iopub.execute_input": "2023-08-06T17:34:56.114990Z",
"iopub.status.busy": "2023-08-06T17:34:56.114909Z",
"iopub.status.idle": "2023-08-06T17:34:56.120375Z",
"shell.execute_reply": "2023-08-06T17:34:56.120121Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -628,7 +636,7 @@
" Auto.drop(['mpg'], axis=1),\n",
" Auto['mpg'],\n",
" cv=validation);\n",
"results['test_score']"
"results['test_score']\n"
]
},
{
@@ -645,10 +653,10 @@
"id": "746aeccd",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.422643Z",
"iopub.status.busy": "2023-07-31T02:14:47.422562Z",
"iopub.status.idle": "2023-07-31T02:14:47.442227Z",
"shell.execute_reply": "2023-07-31T02:14:47.441935Z"
"iopub.execute_input": "2023-08-06T17:34:56.121875Z",
"iopub.status.busy": "2023-08-06T17:34:56.121788Z",
"iopub.status.idle": "2023-08-06T17:34:56.141044Z",
"shell.execute_reply": "2023-08-06T17:34:56.140787Z"
}
},
"outputs": [
@@ -671,7 +679,7 @@
" Auto.drop(['mpg'], axis=1),\n",
" Auto['mpg'],\n",
" cv=validation)\n",
"results['test_score'].mean(), results['test_score'].std()"
"results['test_score'].mean(), results['test_score'].std()\n"
]
},
{
@@ -719,11 +727,12 @@
"id": "daa53d0c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.443783Z",
"iopub.status.busy": "2023-07-31T02:14:47.443671Z",
"iopub.status.idle": "2023-07-31T02:14:47.447006Z",
"shell.execute_reply": "2023-07-31T02:14:47.446693Z"
}
"iopub.execute_input": "2023-08-06T17:34:56.142563Z",
"iopub.status.busy": "2023-08-06T17:34:56.142482Z",
"iopub.status.idle": "2023-08-06T17:34:56.146459Z",
"shell.execute_reply": "2023-08-06T17:34:56.146215Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
@@ -731,7 +740,7 @@
"def alpha_func(D, idx):\n",
" cov_ = np.cov(D[['X','Y']].loc[idx], rowvar=False)\n",
" return ((cov_[1,1] - cov_[0,1]) /\n",
" (cov_[0,0]+cov_[1,1]-2*cov_[0,1]))"
" (cov_[0,0]+cov_[1,1]-2*cov_[0,1]))\n"
]
},
{
@@ -752,10 +761,10 @@
"id": "578c9564",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.448908Z",
"iopub.status.busy": "2023-07-31T02:14:47.448792Z",
"iopub.status.idle": "2023-07-31T02:14:47.451963Z",
"shell.execute_reply": "2023-07-31T02:14:47.451646Z"
"iopub.execute_input": "2023-08-06T17:34:56.147902Z",
"iopub.status.busy": "2023-08-06T17:34:56.147820Z",
"iopub.status.idle": "2023-08-06T17:34:56.150542Z",
"shell.execute_reply": "2023-08-06T17:34:56.150288Z"
}
},
"outputs": [
@@ -791,11 +800,12 @@
"id": "5754d6d5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.453581Z",
"iopub.status.busy": "2023-07-31T02:14:47.453484Z",
"iopub.status.idle": "2023-07-31T02:14:47.457771Z",
"shell.execute_reply": "2023-07-31T02:14:47.457529Z"
}
"iopub.execute_input": "2023-08-06T17:34:56.151951Z",
"iopub.status.busy": "2023-08-06T17:34:56.151874Z",
"iopub.status.idle": "2023-08-06T17:34:56.155780Z",
"shell.execute_reply": "2023-08-06T17:34:56.155537Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -833,11 +843,12 @@
"id": "8320a49c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.459153Z",
"iopub.status.busy": "2023-07-31T02:14:47.459066Z",
"iopub.status.idle": "2023-07-31T02:14:47.461370Z",
"shell.execute_reply": "2023-07-31T02:14:47.461113Z"
}
"iopub.execute_input": "2023-08-06T17:34:56.157150Z",
"iopub.status.busy": "2023-08-06T17:34:56.157060Z",
"iopub.status.idle": "2023-08-06T17:34:56.159342Z",
"shell.execute_reply": "2023-08-06T17:34:56.159133Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
@@ -868,7 +879,7 @@
"unimportant and simply makes sure the loop is executed `B` times.\n",
"\n",
"Lets use our function to evaluate the accuracy of our\n",
"estimate of $\\alpha$ using $B=1{,}000$ bootstrap replications."
"estimate of $\\alpha$ using $B=1{,}000$ bootstrap replications. "
]
},
{
@@ -877,10 +888,10 @@
"id": "e656aa1f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.462792Z",
"iopub.status.busy": "2023-07-31T02:14:47.462698Z",
"iopub.status.idle": "2023-07-31T02:14:47.745378Z",
"shell.execute_reply": "2023-07-31T02:14:47.745111Z"
"iopub.execute_input": "2023-08-06T17:34:56.160707Z",
"iopub.status.busy": "2023-08-06T17:34:56.160617Z",
"iopub.status.idle": "2023-08-06T17:34:56.455515Z",
"shell.execute_reply": "2023-08-06T17:34:56.455259Z"
}
},
"outputs": [
@@ -900,7 +911,7 @@
" Portfolio,\n",
" B=1000,\n",
" seed=0)\n",
"alpha_SE"
"alpha_SE\n"
]
},
{
@@ -943,11 +954,12 @@
"id": "c5d14195",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.746827Z",
"iopub.status.busy": "2023-07-31T02:14:47.746750Z",
"iopub.status.idle": "2023-07-31T02:14:47.748785Z",
"shell.execute_reply": "2023-07-31T02:14:47.748522Z"
}
"iopub.execute_input": "2023-08-06T17:34:56.457024Z",
"iopub.status.busy": "2023-08-06T17:34:56.456940Z",
"iopub.status.idle": "2023-08-06T17:34:56.459011Z",
"shell.execute_reply": "2023-08-06T17:34:56.458766Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
@@ -977,15 +989,16 @@
"id": "7e0523f0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.750186Z",
"iopub.status.busy": "2023-07-31T02:14:47.750111Z",
"iopub.status.idle": "2023-07-31T02:14:47.751794Z",
"shell.execute_reply": "2023-07-31T02:14:47.751573Z"
}
"iopub.execute_input": "2023-08-06T17:34:56.460430Z",
"iopub.status.busy": "2023-08-06T17:34:56.460350Z",
"iopub.status.idle": "2023-08-06T17:34:56.462034Z",
"shell.execute_reply": "2023-08-06T17:34:56.461808Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
"hp_func = partial(boot_OLS, MS(['horsepower']), 'mpg')"
"hp_func = partial(boot_OLS, MS(['horsepower']), 'mpg')\n"
]
},
{
@@ -1009,11 +1022,12 @@
"id": "32836e93",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.753215Z",
"iopub.status.busy": "2023-07-31T02:14:47.753126Z",
"iopub.status.idle": "2023-07-31T02:14:47.768431Z",
"shell.execute_reply": "2023-07-31T02:14:47.768162Z"
}
"iopub.execute_input": "2023-08-06T17:34:56.463386Z",
"iopub.status.busy": "2023-08-06T17:34:56.463311Z",
"iopub.status.idle": "2023-08-06T17:34:56.477900Z",
"shell.execute_reply": "2023-08-06T17:34:56.477641Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1041,7 +1055,7 @@
"np.array([hp_func(Auto,\n",
" rng.choice(392,\n",
" 392,\n",
" replace=True)) for _ in range(10)])"
" replace=True)) for _ in range(10)])\n"
]
},
{
@@ -1059,11 +1073,12 @@
"id": "14ce3afa",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:47.769912Z",
"iopub.status.busy": "2023-07-31T02:14:47.769830Z",
"iopub.status.idle": "2023-07-31T02:14:48.926601Z",
"shell.execute_reply": "2023-07-31T02:14:48.926292Z"
}
"iopub.execute_input": "2023-08-06T17:34:56.479382Z",
"iopub.status.busy": "2023-08-06T17:34:56.479299Z",
"iopub.status.idle": "2023-08-06T17:34:57.627980Z",
"shell.execute_reply": "2023-08-06T17:34:57.627706Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1084,7 +1099,7 @@
" Auto,\n",
" B=1000,\n",
" seed=10)\n",
"hp_se"
"hp_se\n"
]
},
{
@@ -1108,11 +1123,12 @@
"id": "6b1213ac",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:48.928216Z",
"iopub.status.busy": "2023-07-31T02:14:48.928105Z",
"iopub.status.idle": "2023-07-31T02:14:48.985288Z",
"shell.execute_reply": "2023-07-31T02:14:48.985012Z"
}
"iopub.execute_input": "2023-08-06T17:34:57.629628Z",
"iopub.status.busy": "2023-08-06T17:34:57.629520Z",
"iopub.status.idle": "2023-08-06T17:34:57.687018Z",
"shell.execute_reply": "2023-08-06T17:34:57.686748Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -1131,7 +1147,7 @@
"source": [
"hp_model.fit(Auto, Auto['mpg'])\n",
"model_se = summarize(hp_model.results_)['std err']\n",
"model_se"
"model_se\n"
]
},
{
@@ -1179,10 +1195,10 @@
"id": "af99b778",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:48.986895Z",
"iopub.status.busy": "2023-07-31T02:14:48.986777Z",
"iopub.status.idle": "2023-07-31T02:14:50.767999Z",
"shell.execute_reply": "2023-07-31T02:14:50.767687Z"
"iopub.execute_input": "2023-08-06T17:34:57.688662Z",
"iopub.status.busy": "2023-08-06T17:34:57.688521Z",
"iopub.status.idle": "2023-08-06T17:34:59.481117Z",
"shell.execute_reply": "2023-08-06T17:34:59.480813Z"
}
},
"outputs": [
@@ -1205,7 +1221,7 @@
"quad_func = partial(boot_OLS,\n",
" quad_model,\n",
" 'mpg')\n",
"boot_SE(quad_func, Auto, B=1000)"
"boot_SE(quad_func, Auto, B=1000)\n"
]
},
{
@@ -1222,11 +1238,12 @@
"id": "0206281e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:14:50.769670Z",
"iopub.status.busy": "2023-07-31T02:14:50.769556Z",
"iopub.status.idle": "2023-07-31T02:14:50.778195Z",
"shell.execute_reply": "2023-07-31T02:14:50.777948Z"
}
"iopub.execute_input": "2023-08-06T17:34:59.482760Z",
"iopub.status.busy": "2023-08-06T17:34:59.482640Z",
"iopub.status.idle": "2023-08-06T17:34:59.491295Z",
"shell.execute_reply": "2023-08-06T17:34:59.491041Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1246,15 +1263,24 @@
"source": [
"M = sm.OLS(Auto['mpg'],\n",
" quad_model.fit_transform(Auto))\n",
"summarize(M.fit())['std err']"
"summarize(M.fit())['std err']\n"
]
},
{
"cell_type": "markdown",
"id": "0c11a71f",
"metadata": {},
"source": [
"\n",
"\n"
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"formats": "ipynb,md:myst",
"main_language": "python"
"main_language": "python",
"notebook_metadata_filter": "-all"
},
"language_info": {
"codemirror_mode": {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -5,7 +5,9 @@
"id": "d45c6d2b",
"metadata": {},
"source": [
"# Chapter 9"
"\n",
"# Chapter 9\n",
"\n"
]
},
{
@@ -26,18 +28,19 @@
"id": "eeaa5be0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:31.933940Z",
"iopub.status.busy": "2023-07-31T02:15:31.933716Z",
"iopub.status.idle": "2023-07-31T02:15:32.774361Z",
"shell.execute_reply": "2023-07-31T02:15:32.773846Z"
}
"iopub.execute_input": "2023-08-06T17:35:41.109844Z",
"iopub.status.busy": "2023-08-06T17:35:41.109434Z",
"iopub.status.idle": "2023-08-06T17:35:41.992454Z",
"shell.execute_reply": "2023-08-06T17:35:41.991881Z"
},
"lines_to_next_cell": 0
},
"outputs": [],
"source": [
"import numpy as np\n",
"from matplotlib.pyplot import subplots, cm\n",
"import sklearn.model_selection as skm\n",
"from ISLP import load_data, confusion_table"
"from ISLP import load_data, confusion_table\n"
]
},
{
@@ -55,17 +58,17 @@
"id": "41a59634",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:32.776623Z",
"iopub.status.busy": "2023-07-31T02:15:32.776343Z",
"iopub.status.idle": "2023-07-31T02:15:32.807761Z",
"shell.execute_reply": "2023-07-31T02:15:32.807471Z"
"iopub.execute_input": "2023-08-06T17:35:41.994815Z",
"iopub.status.busy": "2023-08-06T17:35:41.994482Z",
"iopub.status.idle": "2023-08-06T17:35:42.026716Z",
"shell.execute_reply": "2023-08-06T17:35:42.026409Z"
}
},
"outputs": [],
"source": [
"from sklearn.svm import SVC\n",
"from ISLP.svm import plot as plot_svm\n",
"from sklearn.metrics import RocCurveDisplay"
"from sklearn.metrics import RocCurveDisplay\n"
]
},
{
@@ -83,15 +86,15 @@
"id": "c9a175d7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:32.809461Z",
"iopub.status.busy": "2023-07-31T02:15:32.809366Z",
"iopub.status.idle": "2023-07-31T02:15:32.811185Z",
"shell.execute_reply": "2023-07-31T02:15:32.810941Z"
"iopub.execute_input": "2023-08-06T17:35:42.028394Z",
"iopub.status.busy": "2023-08-06T17:35:42.028303Z",
"iopub.status.idle": "2023-08-06T17:35:42.030112Z",
"shell.execute_reply": "2023-08-06T17:35:42.029879Z"
}
},
"outputs": [],
"source": [
"roc_curve = RocCurveDisplay.from_estimator # shorthand"
"roc_curve = RocCurveDisplay.from_estimator # shorthand\n"
]
},
{
@@ -123,11 +126,12 @@
"id": "a7216b47",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:32.812622Z",
"iopub.status.busy": "2023-07-31T02:15:32.812526Z",
"iopub.status.idle": "2023-07-31T02:15:32.919064Z",
"shell.execute_reply": "2023-07-31T02:15:32.918299Z"
}
"iopub.execute_input": "2023-08-06T17:35:42.031508Z",
"iopub.status.busy": "2023-08-06T17:35:42.031412Z",
"iopub.status.idle": "2023-08-06T17:35:42.144107Z",
"shell.execute_reply": "2023-08-06T17:35:42.141512Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -150,7 +154,7 @@
"ax.scatter(X[:,0],\n",
" X[:,1],\n",
" c=y,\n",
" cmap=cm.coolwarm);"
" cmap=cm.coolwarm);\n"
]
},
{
@@ -167,11 +171,12 @@
"id": "ed329198",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:32.924346Z",
"iopub.status.busy": "2023-07-31T02:15:32.922626Z",
"iopub.status.idle": "2023-07-31T02:15:32.934623Z",
"shell.execute_reply": "2023-07-31T02:15:32.934041Z"
}
"iopub.execute_input": "2023-08-06T17:35:42.148661Z",
"iopub.status.busy": "2023-08-06T17:35:42.148275Z",
"iopub.status.idle": "2023-08-06T17:35:42.164597Z",
"shell.execute_reply": "2023-08-06T17:35:42.162951Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -190,7 +195,7 @@
],
"source": [
"svm_linear = SVC(C=10, kernel='linear')\n",
"svm_linear.fit(X, y)"
"svm_linear.fit(X, y)\n"
]
},
{
@@ -210,10 +215,10 @@
"id": "95494b8b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:32.938730Z",
"iopub.status.busy": "2023-07-31T02:15:32.937883Z",
"iopub.status.idle": "2023-07-31T02:15:33.117216Z",
"shell.execute_reply": "2023-07-31T02:15:33.116876Z"
"iopub.execute_input": "2023-08-06T17:35:42.170134Z",
"iopub.status.busy": "2023-08-06T17:35:42.169857Z",
"iopub.status.idle": "2023-08-06T17:35:42.356574Z",
"shell.execute_reply": "2023-08-06T17:35:42.356275Z"
}
},
"outputs": [
@@ -233,7 +238,7 @@
"plot_svm(X,\n",
" y,\n",
" svm_linear,\n",
" ax=ax)"
" ax=ax)\n"
]
},
{
@@ -255,11 +260,12 @@
"id": "98c2236f",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.119014Z",
"iopub.status.busy": "2023-07-31T02:15:33.118891Z",
"iopub.status.idle": "2023-07-31T02:15:33.258092Z",
"shell.execute_reply": "2023-07-31T02:15:33.257817Z"
}
"iopub.execute_input": "2023-08-06T17:35:42.358304Z",
"iopub.status.busy": "2023-08-06T17:35:42.358185Z",
"iopub.status.idle": "2023-08-06T17:35:42.497338Z",
"shell.execute_reply": "2023-08-06T17:35:42.496986Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -280,7 +286,7 @@
"plot_svm(X,\n",
" y,\n",
" svm_linear_small,\n",
" ax=ax)"
" ax=ax)\n"
]
},
{
@@ -300,11 +306,12 @@
"id": "b498f594",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.259755Z",
"iopub.status.busy": "2023-07-31T02:15:33.259627Z",
"iopub.status.idle": "2023-07-31T02:15:33.262432Z",
"shell.execute_reply": "2023-07-31T02:15:33.262105Z"
}
"iopub.execute_input": "2023-08-06T17:35:42.499133Z",
"iopub.status.busy": "2023-08-06T17:35:42.499013Z",
"iopub.status.idle": "2023-08-06T17:35:42.501656Z",
"shell.execute_reply": "2023-08-06T17:35:42.501370Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -319,7 +326,7 @@
}
],
"source": [
"svm_linear.coef_"
"svm_linear.coef_\n"
]
},
{
@@ -337,11 +344,12 @@
"id": "b65e80d6",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.263978Z",
"iopub.status.busy": "2023-07-31T02:15:33.263877Z",
"iopub.status.idle": "2023-07-31T02:15:33.290665Z",
"shell.execute_reply": "2023-07-31T02:15:33.290398Z"
}
"iopub.execute_input": "2023-08-06T17:35:42.503232Z",
"iopub.status.busy": "2023-08-06T17:35:42.503114Z",
"iopub.status.idle": "2023-08-06T17:35:42.530360Z",
"shell.execute_reply": "2023-08-06T17:35:42.530089Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -365,7 +373,7 @@
" cv=kfold,\n",
" scoring='accuracy')\n",
"grid.fit(X, y)\n",
"grid.best_params_"
"grid.best_params_\n"
]
},
{
@@ -384,11 +392,12 @@
"id": "bba8fad7",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.292219Z",
"iopub.status.busy": "2023-07-31T02:15:33.292135Z",
"iopub.status.idle": "2023-07-31T02:15:33.294550Z",
"shell.execute_reply": "2023-07-31T02:15:33.294318Z"
}
"iopub.execute_input": "2023-08-06T17:35:42.533039Z",
"iopub.status.busy": "2023-08-06T17:35:42.532868Z",
"iopub.status.idle": "2023-08-06T17:35:42.535494Z",
"shell.execute_reply": "2023-08-06T17:35:42.535174Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -403,7 +412,7 @@
}
],
"source": [
"grid.cv_results_[('mean_test_score')]"
"grid.cv_results_[('mean_test_score')]\n"
]
},
{
@@ -424,17 +433,17 @@
"id": "ad64269d",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.296023Z",
"iopub.status.busy": "2023-07-31T02:15:33.295928Z",
"iopub.status.idle": "2023-07-31T02:15:33.297787Z",
"shell.execute_reply": "2023-07-31T02:15:33.297542Z"
"iopub.execute_input": "2023-08-06T17:35:42.537255Z",
"iopub.status.busy": "2023-08-06T17:35:42.537133Z",
"iopub.status.idle": "2023-08-06T17:35:42.539335Z",
"shell.execute_reply": "2023-08-06T17:35:42.538987Z"
}
},
"outputs": [],
"source": [
"X_test = rng.standard_normal((20, 2))\n",
"y_test = np.array([-1]*10+[1]*10)\n",
"X_test[y_test==1] += 1"
"X_test[y_test==1] += 1\n"
]
},
{
@@ -453,10 +462,10 @@
"id": "5107fca1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.299207Z",
"iopub.status.busy": "2023-07-31T02:15:33.299107Z",
"iopub.status.idle": "2023-07-31T02:15:33.304700Z",
"shell.execute_reply": "2023-07-31T02:15:33.304434Z"
"iopub.execute_input": "2023-08-06T17:35:42.540795Z",
"iopub.status.busy": "2023-08-06T17:35:42.540697Z",
"iopub.status.idle": "2023-08-06T17:35:42.546310Z",
"shell.execute_reply": "2023-08-06T17:35:42.546056Z"
}
},
"outputs": [
@@ -520,7 +529,7 @@
"source": [
"best_ = grid.best_estimator_\n",
"y_test_hat = best_.predict(X_test)\n",
"confusion_table(y_test_hat, y_test)"
"confusion_table(y_test_hat, y_test)\n"
]
},
{
@@ -540,10 +549,10 @@
"id": "0320d9e0",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.306170Z",
"iopub.status.busy": "2023-07-31T02:15:33.306087Z",
"iopub.status.idle": "2023-07-31T02:15:33.310352Z",
"shell.execute_reply": "2023-07-31T02:15:33.310107Z"
"iopub.execute_input": "2023-08-06T17:35:42.547949Z",
"iopub.status.busy": "2023-08-06T17:35:42.547859Z",
"iopub.status.idle": "2023-08-06T17:35:42.552481Z",
"shell.execute_reply": "2023-08-06T17:35:42.552209Z"
}
},
"outputs": [
@@ -608,7 +617,7 @@
"svm_ = SVC(C=0.001,\n",
" kernel='linear').fit(X, y)\n",
"y_test_hat = svm_.predict(X_test)\n",
"confusion_table(y_test_hat, y_test)"
"confusion_table(y_test_hat, y_test)\n"
]
},
{
@@ -631,10 +640,10 @@
"id": "84d7e778",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.311857Z",
"iopub.status.busy": "2023-07-31T02:15:33.311774Z",
"iopub.status.idle": "2023-07-31T02:15:33.434376Z",
"shell.execute_reply": "2023-07-31T02:15:33.434084Z"
"iopub.execute_input": "2023-08-06T17:35:42.554007Z",
"iopub.status.busy": "2023-08-06T17:35:42.553921Z",
"iopub.status.idle": "2023-08-06T17:35:42.677356Z",
"shell.execute_reply": "2023-08-06T17:35:42.677072Z"
}
},
"outputs": [
@@ -652,7 +661,7 @@
"source": [
"X[y==1] += 1.9;\n",
"fig, ax = subplots(figsize=(8,8))\n",
"ax.scatter(X[:,0], X[:,1], c=y, cmap=cm.coolwarm);"
"ax.scatter(X[:,0], X[:,1], c=y, cmap=cm.coolwarm);\n"
]
},
{
@@ -669,10 +678,10 @@
"id": "abb1f8be",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.436068Z",
"iopub.status.busy": "2023-07-31T02:15:33.435948Z",
"iopub.status.idle": "2023-07-31T02:15:33.440665Z",
"shell.execute_reply": "2023-07-31T02:15:33.440404Z"
"iopub.execute_input": "2023-08-06T17:35:42.679000Z",
"iopub.status.busy": "2023-08-06T17:35:42.678886Z",
"iopub.status.idle": "2023-08-06T17:35:42.683759Z",
"shell.execute_reply": "2023-08-06T17:35:42.683448Z"
}
},
"outputs": [
@@ -736,7 +745,7 @@
"source": [
"svm_ = SVC(C=1e5, kernel='linear').fit(X, y)\n",
"y_hat = svm_.predict(X)\n",
"confusion_table(y_hat, y)"
"confusion_table(y_hat, y)\n"
]
},
{
@@ -747,7 +756,7 @@
"We fit the\n",
"support vector classifier and plot the resulting hyperplane, using a\n",
"very large value of `C` so that no observations are\n",
"misclassified."
"misclassified. "
]
},
{
@@ -756,11 +765,12 @@
"id": "2e4ed2f5",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.442227Z",
"iopub.status.busy": "2023-07-31T02:15:33.442140Z",
"iopub.status.idle": "2023-07-31T02:15:33.558482Z",
"shell.execute_reply": "2023-07-31T02:15:33.558203Z"
}
"iopub.execute_input": "2023-08-06T17:35:42.685443Z",
"iopub.status.busy": "2023-08-06T17:35:42.685337Z",
"iopub.status.idle": "2023-08-06T17:35:42.800013Z",
"shell.execute_reply": "2023-08-06T17:35:42.799680Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -779,7 +789,7 @@
"plot_svm(X,\n",
" y,\n",
" svm_,\n",
" ax=ax)"
" ax=ax)\n"
]
},
{
@@ -800,10 +810,10 @@
"id": "164a611c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.560147Z",
"iopub.status.busy": "2023-07-31T02:15:33.560028Z",
"iopub.status.idle": "2023-07-31T02:15:33.565003Z",
"shell.execute_reply": "2023-07-31T02:15:33.564720Z"
"iopub.execute_input": "2023-08-06T17:35:42.801762Z",
"iopub.status.busy": "2023-08-06T17:35:42.801639Z",
"iopub.status.idle": "2023-08-06T17:35:42.806674Z",
"shell.execute_reply": "2023-08-06T17:35:42.806389Z"
}
},
"outputs": [
@@ -867,7 +877,7 @@
"source": [
"svm_ = SVC(C=0.1, kernel='linear').fit(X, y)\n",
"y_hat = svm_.predict(X)\n",
"confusion_table(y_hat, y)"
"confusion_table(y_hat, y)\n"
]
},
{
@@ -887,11 +897,12 @@
"id": "c67591a1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.566489Z",
"iopub.status.busy": "2023-07-31T02:15:33.566402Z",
"iopub.status.idle": "2023-07-31T02:15:33.687614Z",
"shell.execute_reply": "2023-07-31T02:15:33.687292Z"
}
"iopub.execute_input": "2023-08-06T17:35:42.808296Z",
"iopub.status.busy": "2023-08-06T17:35:42.808164Z",
"iopub.status.idle": "2023-08-06T17:35:42.929878Z",
"shell.execute_reply": "2023-08-06T17:35:42.929590Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
@@ -910,7 +921,7 @@
"plot_svm(X,\n",
" y,\n",
" svm_,\n",
" ax=ax)"
" ax=ax)\n"
]
},
{
@@ -939,10 +950,10 @@
"id": "322be574",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.689275Z",
"iopub.status.busy": "2023-07-31T02:15:33.689167Z",
"iopub.status.idle": "2023-07-31T02:15:33.691316Z",
"shell.execute_reply": "2023-07-31T02:15:33.690975Z"
"iopub.execute_input": "2023-08-06T17:35:42.931502Z",
"iopub.status.busy": "2023-08-06T17:35:42.931386Z",
"iopub.status.idle": "2023-08-06T17:35:42.933592Z",
"shell.execute_reply": "2023-08-06T17:35:42.933305Z"
}
},
"outputs": [],
@@ -950,7 +961,7 @@
"X = rng.standard_normal((200, 2))\n",
"X[:100] += 2\n",
"X[100:150] -= 2\n",
"y = np.array([1]*150+[2]*50)"
"y = np.array([1]*150+[2]*50)\n"
]
},
{
@@ -967,17 +978,18 @@
"id": "04fda182",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.692938Z",
"iopub.status.busy": "2023-07-31T02:15:33.692827Z",
"iopub.status.idle": "2023-07-31T02:15:33.780286Z",
"shell.execute_reply": "2023-07-31T02:15:33.779983Z"
}
"iopub.execute_input": "2023-08-06T17:35:42.935158Z",
"iopub.status.busy": "2023-08-06T17:35:42.935059Z",
"iopub.status.idle": "2023-08-06T17:35:43.022251Z",
"shell.execute_reply": "2023-08-06T17:35:43.021963Z"
},
"lines_to_next_cell": 2
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x1474d1090>"
"<matplotlib.collections.PathCollection at 0x17fa62650>"
]
},
"execution_count": 20,
@@ -1000,7 +1012,7 @@
"ax.scatter(X[:,0],\n",
" X[:,1],\n",
" c=y,\n",
" cmap=cm.coolwarm)"
" cmap=cm.coolwarm)\n"
]
},
{
@@ -1019,10 +1031,10 @@
"id": "0c2690d1",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.782021Z",
"iopub.status.busy": "2023-07-31T02:15:33.781908Z",
"iopub.status.idle": "2023-07-31T02:15:33.786110Z",
"shell.execute_reply": "2023-07-31T02:15:33.785823Z"
"iopub.execute_input": "2023-08-06T17:35:43.024031Z",
"iopub.status.busy": "2023-08-06T17:35:43.023910Z",
"iopub.status.idle": "2023-08-06T17:35:43.028070Z",
"shell.execute_reply": "2023-08-06T17:35:43.027774Z"
}
},
"outputs": [
@@ -1049,7 +1061,7 @@
" test_size=0.5,\n",
" random_state=0)\n",
"svm_rbf = SVC(kernel=\"rbf\", gamma=1, C=1)\n",
"svm_rbf.fit(X_train, y_train)"
"svm_rbf.fit(X_train, y_train)\n"
]
},
{
@@ -1058,7 +1070,7 @@
"metadata": {},
"source": [
"The plot shows that the resulting SVM has a decidedly non-linear\n",
"boundary."
"boundary. "
]
},
{
@@ -1067,10 +1079,10 @@
"id": "3eb171e8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:33.787740Z",
"iopub.status.busy": "2023-07-31T02:15:33.787646Z",
"iopub.status.idle": "2023-07-31T02:15:34.044862Z",
"shell.execute_reply": "2023-07-31T02:15:34.044553Z"
"iopub.execute_input": "2023-08-06T17:35:43.029639Z",
"iopub.status.busy": "2023-08-06T17:35:43.029527Z",
"iopub.status.idle": "2023-08-06T17:35:43.285558Z",
"shell.execute_reply": "2023-08-06T17:35:43.285220Z"
}
},
"outputs": [
@@ -1090,7 +1102,7 @@
"plot_svm(X_train,\n",
" y_train,\n",
" svm_rbf,\n",
" ax=ax)"
" ax=ax)\n"
]
},
{
@@ -1111,10 +1123,10 @@
"id": "9a6b905b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:34.046569Z",
"iopub.status.busy": "2023-07-31T02:15:34.046449Z",
"iopub.status.idle": "2023-07-31T02:15:34.197967Z",
"shell.execute_reply": "2023-07-31T02:15:34.197661Z"
"iopub.execute_input": "2023-08-06T17:35:43.287370Z",
"iopub.status.busy": "2023-08-06T17:35:43.287120Z",
"iopub.status.idle": "2023-08-06T17:35:43.439266Z",
"shell.execute_reply": "2023-08-06T17:35:43.438933Z"
}
},
"outputs": [
@@ -1136,7 +1148,7 @@
"plot_svm(X_train,\n",
" y_train,\n",
" svm_rbf,\n",
" ax=ax)"
" ax=ax)\n"
]
},
{
@@ -1155,10 +1167,10 @@
"id": "5ab01d6c",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:34.199716Z",
"iopub.status.busy": "2023-07-31T02:15:34.199590Z",
"iopub.status.idle": "2023-07-31T02:15:34.297753Z",
"shell.execute_reply": "2023-07-31T02:15:34.297384Z"
"iopub.execute_input": "2023-08-06T17:35:43.440929Z",
"iopub.status.busy": "2023-08-06T17:35:43.440803Z",
"iopub.status.idle": "2023-08-06T17:35:43.533538Z",
"shell.execute_reply": "2023-08-06T17:35:43.533266Z"
}
},
"outputs": [
@@ -1184,7 +1196,7 @@
" cv=kfold,\n",
" scoring='accuracy');\n",
"grid.fit(X_train, y_train)\n",
"grid.best_params_"
"grid.best_params_\n"
]
},
{
@@ -1203,10 +1215,10 @@
"id": "166a6acb",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:34.299759Z",
"iopub.status.busy": "2023-07-31T02:15:34.299634Z",
"iopub.status.idle": "2023-07-31T02:15:34.531852Z",
"shell.execute_reply": "2023-07-31T02:15:34.531406Z"
"iopub.execute_input": "2023-08-06T17:35:43.535272Z",
"iopub.status.busy": "2023-08-06T17:35:43.535151Z",
"iopub.status.idle": "2023-08-06T17:35:43.767970Z",
"shell.execute_reply": "2023-08-06T17:35:43.767689Z"
}
},
"outputs": [
@@ -1286,7 +1298,7 @@
" ax=ax)\n",
"\n",
"y_hat_test = best_svm.predict(X_test)\n",
"confusion_table(y_hat_test, y_test)"
"confusion_table(y_hat_test, y_test)\n"
]
},
{
@@ -1337,11 +1349,12 @@
"id": "0607fc41",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:34.533790Z",
"iopub.status.busy": "2023-07-31T02:15:34.533662Z",
"iopub.status.idle": "2023-07-31T02:15:34.622732Z",
"shell.execute_reply": "2023-07-31T02:15:34.622432Z"
}
"iopub.execute_input": "2023-08-06T17:35:43.769863Z",
"iopub.status.busy": "2023-08-06T17:35:43.769754Z",
"iopub.status.idle": "2023-08-06T17:35:43.862697Z",
"shell.execute_reply": "2023-08-06T17:35:43.862378Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1362,7 +1375,7 @@
" y_train,\n",
" name='Training',\n",
" color='r',\n",
" ax=ax);"
" ax=ax);\n"
]
},
{
@@ -1381,10 +1394,10 @@
"id": "5211a882",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:34.624457Z",
"iopub.status.busy": "2023-07-31T02:15:34.624340Z",
"iopub.status.idle": "2023-07-31T02:15:34.760006Z",
"shell.execute_reply": "2023-07-31T02:15:34.759724Z"
"iopub.execute_input": "2023-08-06T17:35:43.864472Z",
"iopub.status.busy": "2023-08-06T17:35:43.864353Z",
"iopub.status.idle": "2023-08-06T17:35:44.004720Z",
"shell.execute_reply": "2023-08-06T17:35:44.004393Z"
}
},
"outputs": [
@@ -1410,7 +1423,7 @@
" y_train,\n",
" name='Training $\\gamma=50$',\n",
" color='r',\n",
" ax=ax);"
" ax=ax);\n"
]
},
{
@@ -1430,10 +1443,10 @@
"id": "12acc4ff",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:34.761683Z",
"iopub.status.busy": "2023-07-31T02:15:34.761571Z",
"iopub.status.idle": "2023-07-31T02:15:34.765670Z",
"shell.execute_reply": "2023-07-31T02:15:34.765355Z"
"iopub.execute_input": "2023-08-06T17:35:44.006556Z",
"iopub.status.busy": "2023-08-06T17:35:44.006435Z",
"iopub.status.idle": "2023-08-06T17:35:44.010443Z",
"shell.execute_reply": "2023-08-06T17:35:44.010158Z"
}
},
"outputs": [],
@@ -1444,7 +1457,7 @@
" name='Test $\\gamma=50$',\n",
" color='b',\n",
" ax=ax)\n",
"fig;"
"fig;\n"
]
},
{
@@ -1461,10 +1474,10 @@
"id": "21c81913",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:34.767320Z",
"iopub.status.busy": "2023-07-31T02:15:34.767196Z",
"iopub.status.idle": "2023-07-31T02:15:34.862379Z",
"shell.execute_reply": "2023-07-31T02:15:34.862109Z"
"iopub.execute_input": "2023-08-06T17:35:44.012048Z",
"iopub.status.busy": "2023-08-06T17:35:44.011950Z",
"iopub.status.idle": "2023-08-06T17:35:44.108784Z",
"shell.execute_reply": "2023-08-06T17:35:44.108364Z"
}
},
"outputs": [
@@ -1492,7 +1505,7 @@
" y_,\n",
" name=name,\n",
" ax=ax,\n",
" color=c)"
" color=c)\n"
]
},
{
@@ -1516,10 +1529,10 @@
"id": "2fff4fa8",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:34.864078Z",
"iopub.status.busy": "2023-07-31T02:15:34.863962Z",
"iopub.status.idle": "2023-07-31T02:15:34.950893Z",
"shell.execute_reply": "2023-07-31T02:15:34.950574Z"
"iopub.execute_input": "2023-08-06T17:35:44.110511Z",
"iopub.status.busy": "2023-08-06T17:35:44.110386Z",
"iopub.status.idle": "2023-08-06T17:35:44.198408Z",
"shell.execute_reply": "2023-08-06T17:35:44.198028Z"
}
},
"outputs": [
@@ -1540,7 +1553,7 @@
"y = np.hstack([y, [0]*50])\n",
"X[y==0,1] += 2\n",
"fig, ax = subplots(figsize=(8,8))\n",
"ax.scatter(X[:,0], X[:,1], c=y, cmap=cm.coolwarm);"
"ax.scatter(X[:,0], X[:,1], c=y, cmap=cm.coolwarm);\n"
]
},
{
@@ -1557,11 +1570,12 @@
"id": "5396f2df",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:34.952587Z",
"iopub.status.busy": "2023-07-31T02:15:34.952475Z",
"iopub.status.idle": "2023-07-31T02:15:35.530841Z",
"shell.execute_reply": "2023-07-31T02:15:35.530380Z"
}
"iopub.execute_input": "2023-08-06T17:35:44.200116Z",
"iopub.status.busy": "2023-08-06T17:35:44.200009Z",
"iopub.status.idle": "2023-08-06T17:35:44.778024Z",
"shell.execute_reply": "2023-08-06T17:35:44.777739Z"
},
"lines_to_next_cell": 0
},
"outputs": [
{
@@ -1586,7 +1600,7 @@
" y,\n",
" svm_rbf_3,\n",
" scatter_cmap=cm.tab10,\n",
" ax=ax)"
" ax=ax)\n"
]
},
{
@@ -1620,10 +1634,10 @@
"id": "f63c575e",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:35.532698Z",
"iopub.status.busy": "2023-07-31T02:15:35.532574Z",
"iopub.status.idle": "2023-07-31T02:15:35.607009Z",
"shell.execute_reply": "2023-07-31T02:15:35.606701Z"
"iopub.execute_input": "2023-08-06T17:35:44.779762Z",
"iopub.status.busy": "2023-08-06T17:35:44.779640Z",
"iopub.status.idle": "2023-08-06T17:35:44.856770Z",
"shell.execute_reply": "2023-08-06T17:35:44.856276Z"
}
},
"outputs": [
@@ -1640,7 +1654,7 @@
],
"source": [
"Khan = load_data('Khan')\n",
"Khan['xtrain'].shape, Khan['xtest'].shape"
"Khan['xtrain'].shape, Khan['xtest'].shape\n"
]
},
{
@@ -1657,7 +1671,7 @@
"large number of features relative to the number of observations. This\n",
"suggests that we should use a linear kernel, because the additional\n",
"flexibility that will result from using a polynomial or radial kernel \n",
"is unnecessary."
"is unnecessary. "
]
},
{
@@ -1666,10 +1680,10 @@
"id": "32091338",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:35.608711Z",
"iopub.status.busy": "2023-07-31T02:15:35.608596Z",
"iopub.status.idle": "2023-07-31T02:15:35.637522Z",
"shell.execute_reply": "2023-07-31T02:15:35.637236Z"
"iopub.execute_input": "2023-08-06T17:35:44.858879Z",
"iopub.status.busy": "2023-08-06T17:35:44.858701Z",
"iopub.status.idle": "2023-08-06T17:35:44.889028Z",
"shell.execute_reply": "2023-08-06T17:35:44.888737Z"
}
},
"outputs": [
@@ -1758,7 +1772,7 @@
"khan_linear = SVC(kernel='linear', C=10)\n",
"khan_linear.fit(Khan['xtrain'], Khan['ytrain'])\n",
"confusion_table(khan_linear.predict(Khan['xtrain']),\n",
" Khan['ytrain'])"
" Khan['ytrain'])\n"
]
},
{
@@ -1780,10 +1794,10 @@
"id": "d9058023",
"metadata": {
"execution": {
"iopub.execute_input": "2023-07-31T02:15:35.639109Z",
"iopub.status.busy": "2023-07-31T02:15:35.639016Z",
"iopub.status.idle": "2023-07-31T02:15:35.649929Z",
"shell.execute_reply": "2023-07-31T02:15:35.649661Z"
"iopub.execute_input": "2023-08-06T17:35:44.891247Z",
"iopub.status.busy": "2023-08-06T17:35:44.891089Z",
"iopub.status.idle": "2023-08-06T17:35:44.902454Z",
"shell.execute_reply": "2023-08-06T17:35:44.902106Z"
}
},
"outputs": [
@@ -1870,7 +1884,7 @@
],
"source": [
"confusion_table(khan_linear.predict(Khan['xtest']),\n",
" Khan['ytest'])"
" Khan['ytest'])\n"
]
},
{
@@ -1878,15 +1892,16 @@
"id": "d0d5aba4",
"metadata": {},
"source": [
"We see that using `C=10` yields two test set errors on these data."
"We see that using `C=10` yields two test set errors on these data.\n",
"\n"
]
}
],
"metadata": {
"jupytext": {
"cell_metadata_filter": "-all",
"formats": "ipynb,md:myst",
"main_language": "python"
"main_language": "python",
"notebook_metadata_filter": "-all"
},
"language_info": {
"codemirror_mode": {