v2 version of labs
This commit is contained in:
@@ -5,7 +5,9 @@
|
||||
"id": "c7f4eb5a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Chapter 11"
|
||||
"\n",
|
||||
"# Chapter 11\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -32,10 +34,10 @@
|
||||
"id": "91ac40fd",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:36.910686Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:36.910197Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.043363Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.043019Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:46.303006Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:46.302426Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.388674Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.388193Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -44,7 +46,7 @@
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from ISLP.models import ModelSpec as MS\n",
|
||||
"from ISLP import load_data"
|
||||
"from ISLP import load_data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -62,10 +64,10 @@
|
||||
"id": "99782418",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.045310Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.045152Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.137179Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.136883Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.390741Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.390555Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.484777Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.484422Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -76,7 +78,7 @@
|
||||
"from lifelines.statistics import \\\n",
|
||||
" (logrank_test,\n",
|
||||
" multivariate_logrank_test)\n",
|
||||
"from ISLP.survival import sim_time"
|
||||
"from ISLP.survival import sim_time\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -95,10 +97,10 @@
|
||||
"id": "3137149a",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.138950Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.138844Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.145075Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.144817Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.486501Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.486385Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.493362Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.493058Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -115,7 +117,7 @@
|
||||
],
|
||||
"source": [
|
||||
"BrainCancer = load_data('BrainCancer')\n",
|
||||
"BrainCancer.columns"
|
||||
"BrainCancer.columns\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -133,11 +135,12 @@
|
||||
"id": "45963c92",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.146568Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.146458Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.149482Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.149235Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.494963Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.494863Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.497996Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.497741Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -154,7 +157,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"BrainCancer['sex'].value_counts()"
|
||||
"BrainCancer['sex'].value_counts()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -163,11 +166,12 @@
|
||||
"id": "73be61f6",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.151583Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.151488Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.153999Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.153712Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.499414Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.499312Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.502029Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.501779Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -186,7 +190,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"BrainCancer['diagnosis'].value_counts()"
|
||||
"BrainCancer['diagnosis'].value_counts()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -195,11 +199,12 @@
|
||||
"id": "572f0b9e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.155561Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.155460Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.158411Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.158146Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.503331Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.503251Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.506059Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.505826Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -216,7 +221,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"BrainCancer['status'].value_counts()"
|
||||
"BrainCancer['status'].value_counts()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -250,10 +255,10 @@
|
||||
"id": "92c39707",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.159867Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.159768Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.465840Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.465244Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.507415Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.507329Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.692016Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.690384Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -282,7 +287,7 @@
|
||||
"fig, ax = subplots(figsize=(8,8))\n",
|
||||
"km = KaplanMeierFitter()\n",
|
||||
"km_brain = km.fit(BrainCancer['time'], BrainCancer['status'])\n",
|
||||
"km_brain.plot(label='Kaplan Meier estimate', ax=ax)"
|
||||
"km_brain.plot(label='Kaplan Meier estimate', ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -316,10 +321,10 @@
|
||||
"id": "3fc7848c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.468449Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.468262Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.604720Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.604313Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.696231Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.695950Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.857113Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.856731Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -340,7 +345,7 @@
|
||||
"for sex, df in BrainCancer.groupby('sex'):\n",
|
||||
" by_sex[sex] = df\n",
|
||||
" km_sex = km.fit(df['time'], df['status'])\n",
|
||||
" km_sex.plot(label='Sex=%s' % sex, ax=ax)"
|
||||
" km_sex.plot(label='Sex=%s' % sex, ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -361,11 +366,12 @@
|
||||
"id": "bf30d26f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.606730Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.606598Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.666348Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.665926Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.858891Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.858766Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.913319Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.913028Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -451,7 +457,7 @@
|
||||
"logrank_test(by_sex['Male']['time'],\n",
|
||||
" by_sex['Female']['time'],\n",
|
||||
" by_sex['Male']['status'],\n",
|
||||
" by_sex['Female']['status'])"
|
||||
" by_sex['Female']['status'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -473,10 +479,10 @@
|
||||
"id": "2ab78e07",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.668086Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.667893Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.695653Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.695352Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.914969Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.914826Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.941528Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.941277Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -542,7 +548,7 @@
|
||||
"cox_fit = coxph().fit(model_df,\n",
|
||||
" 'time',\n",
|
||||
" 'status')\n",
|
||||
"cox_fit.summary[['coef', 'se(coef)', 'p']]"
|
||||
"cox_fit.summary[['coef', 'se(coef)', 'p']]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -566,10 +572,10 @@
|
||||
"id": "4716b7b0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.697533Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.697411Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.703099Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.702785Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.943061Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.942963Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.948065Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.947785Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -648,7 +654,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"cox_fit.log_likelihood_ratio_test()"
|
||||
"cox_fit.log_likelihood_ratio_test()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -672,10 +678,10 @@
|
||||
"id": "c2767d88",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.704759Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.704654Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.742754Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.742458Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.949725Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.949641Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.982167Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.981821Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -789,7 +795,7 @@
|
||||
"fit_all = coxph().fit(all_df,\n",
|
||||
" 'time',\n",
|
||||
" 'status')\n",
|
||||
"fit_all.summary[['coef', 'se(coef)', 'p']]"
|
||||
"fit_all.summary[['coef', 'se(coef)', 'p']]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -820,10 +826,10 @@
|
||||
"id": "ede1d219",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.744507Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.744382Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.748245Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.747960Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.983958Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.983832Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.989895Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.989591Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -834,7 +840,7 @@
|
||||
" return pd.Series.mode(series)\n",
|
||||
" else:\n",
|
||||
" return series.mean()\n",
|
||||
"modal_data = cleaned.apply(representative, axis=0)"
|
||||
"modal_data = cleaned.apply(representative, axis=0)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -853,10 +859,10 @@
|
||||
"id": "dc032a71",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.749853Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.749753Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.755760Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.755479Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.991841Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.991705Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:47.997910Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:47.997622Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -963,7 +969,7 @@
|
||||
"modal_df = pd.DataFrame(\n",
|
||||
" [modal_data.iloc[0] for _ in range(len(levels))])\n",
|
||||
"modal_df['diagnosis'] = levels\n",
|
||||
"modal_df"
|
||||
"modal_df\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -981,10 +987,10 @@
|
||||
"id": "e7c1fe43",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.757256Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.757143Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.764838Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.764420Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:47.999542Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:47.999430Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.007263Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.006958Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1106,7 +1112,7 @@
|
||||
"source": [
|
||||
"modal_X = all_MS.transform(modal_df)\n",
|
||||
"modal_X.index = levels\n",
|
||||
"modal_X"
|
||||
"modal_X\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1123,11 +1129,12 @@
|
||||
"id": "f89fbed7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.766834Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.766698Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.774286Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.774004Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.008740Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.008640Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.015006Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.014745Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1264,7 +1271,7 @@
|
||||
],
|
||||
"source": [
|
||||
"predicted_survival = fit_all.predict_survival_function(modal_X)\n",
|
||||
"predicted_survival"
|
||||
"predicted_survival\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1283,11 +1290,12 @@
|
||||
"id": "8f0329b4",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.776165Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.776021Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.882750Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.882417Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.016512Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.016391Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.128436Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.127998Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1303,7 +1311,7 @@
|
||||
],
|
||||
"source": [
|
||||
"fig, ax = subplots(figsize=(8, 8))\n",
|
||||
"predicted_survival.plot(ax=ax);"
|
||||
"predicted_survival.plot(ax=ax);\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1325,10 +1333,10 @@
|
||||
"id": "3045bfc0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:38.884377Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:38.884272Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:38.999104Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:38.998778Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.130335Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.130198Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.248098Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.247765Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1350,7 +1358,7 @@
|
||||
"for result, df in Publication.groupby('posres'):\n",
|
||||
" by_result[result] = df\n",
|
||||
" km_result = km.fit(df['time'], df['status'])\n",
|
||||
" km_result.plot(label='Result=%d' % result, ax=ax)"
|
||||
" km_result.plot(label='Result=%d' % result, ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1370,11 +1378,12 @@
|
||||
"id": "d070f716",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.000963Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.000831Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.031731Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.031439Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.249785Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.249668Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.282954Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.282630Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1439,7 +1448,7 @@
|
||||
"posres_fit = coxph().fit(posres_df,\n",
|
||||
" 'time',\n",
|
||||
" 'status')\n",
|
||||
"posres_fit.summary[['coef', 'se(coef)', 'p']]"
|
||||
"posres_fit.summary[['coef', 'se(coef)', 'p']]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1458,10 +1467,10 @@
|
||||
"id": "2bbcdd0c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.033405Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.033297Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.071871Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.071602Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.284714Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.284593Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.323890Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.323545Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1559,7 +1568,7 @@
|
||||
" intercept=False)\n",
|
||||
"coxph().fit(model.fit_transform(Publication),\n",
|
||||
" 'time',\n",
|
||||
" 'status').summary[['coef', 'se(coef)', 'p']]"
|
||||
" 'status').summary[['coef', 'se(coef)', 'p']]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1593,7 +1602,7 @@
|
||||
"`Time` of day (Morning, Afternoon, or Evening). We generate data\n",
|
||||
"for these covariates so that all possibilities are equally likely: for\n",
|
||||
"instance, morning, afternoon and evening calls are equally likely, and\n",
|
||||
"any number of operators from $5$ to $15$ is equally likely."
|
||||
"any number of operators from $5$ to $15$ is equally likely. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1602,10 +1611,10 @@
|
||||
"id": "b8ece43a",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.073478Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.073379Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.076907Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.076640Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.325634Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.325517Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.329272Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.328978Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1640,10 +1649,10 @@
|
||||
"id": "3e4f766f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.078347Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.078266Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.085129Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.084865Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.330871Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.330782Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.337958Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.337672Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1672,10 +1681,10 @@
|
||||
"id": "72f42d14",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.086703Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.086584Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.090844Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.090578Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.339669Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.339578Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.343948Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.343688Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1767,7 +1776,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"X[:5]"
|
||||
"X[:5]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1784,17 +1793,17 @@
|
||||
"id": "8b921536",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.092615Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.092336Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.103571Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.101990Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.345389Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.345291Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.350596Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.349892Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"true_beta = np.array([0.04, -0.3, 0, 0.2, -0.2])\n",
|
||||
"true_linpred = X.dot(true_beta)\n",
|
||||
"hazard = lambda t: 1e-5 * t"
|
||||
"hazard = lambda t: 1e-5 * t\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1831,15 +1840,16 @@
|
||||
"id": "96ce0f99",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.107799Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.107513Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.111944Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.110837Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.356110Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.355787Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.360120Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.358812Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cum_hazard = lambda t: 1e-5 * t**2 / 2"
|
||||
"cum_hazard = lambda t: 1e-5 * t**2 / 2\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1861,17 +1871,17 @@
|
||||
"id": "63d78ff9",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.116637Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.116086Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.297425Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.293868Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.363547Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.363232Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.547724Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.547018Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"W = np.array([sim_time(l, cum_hazard, rng)\n",
|
||||
" for l in true_linpred])\n",
|
||||
"D['Wait time'] = np.clip(W, 0, 1000)"
|
||||
"D['Wait time'] = np.clip(W, 0, 1000)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1890,11 +1900,12 @@
|
||||
"id": "fe008dbf",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.312187Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.311513Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.327004Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.326346Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.552971Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.552635Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.584979Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.563731Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1988,7 +1999,7 @@
|
||||
"D['Failed'] = rng.choice([1, 0],\n",
|
||||
" N,\n",
|
||||
" p=[0.9, 0.1])\n",
|
||||
"D[:5]"
|
||||
"D[:5]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1997,10 +2008,10 @@
|
||||
"id": "c3a2bec7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.330721Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.330474Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.363217Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.362899Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.615161Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.614999Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.618097Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.617615Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -2016,7 +2027,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"D['Failed'].mean()"
|
||||
"D['Failed'].mean()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2033,10 +2044,10 @@
|
||||
"id": "2b27af56",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.364912Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.364808Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.567228Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.566838Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.620507Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.620371Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:48.790525Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:48.790116Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -2068,7 +2079,7 @@
|
||||
" by_center[center] = df\n",
|
||||
" km_center = km.fit(df['Wait time'], df['Failed'])\n",
|
||||
" km_center.plot(label='Center=%s' % center, ax=ax)\n",
|
||||
"ax.set_title(\"Probability of Still Being on Hold\")"
|
||||
"ax.set_title(\"Probability of Still Being on Hold\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2085,10 +2096,10 @@
|
||||
"id": "9625598d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.569040Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.568920Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.823091Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.822627Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:48.792256Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:48.792159Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:49.004599Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:49.004246Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -2120,7 +2131,7 @@
|
||||
" by_time[time] = df\n",
|
||||
" km_time = km.fit(df['Wait time'], df['Failed'])\n",
|
||||
" km_time.plot(label='Time=%s' % time, ax=ax)\n",
|
||||
"ax.set_title(\"Probability of Still Being on Hold\")"
|
||||
"ax.set_title(\"Probability of Still Being on Hold\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2141,11 +2152,12 @@
|
||||
"id": "75a744ef",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.825758Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.825581Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.847974Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.847624Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:49.006368Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:49.006251Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:49.026122Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:49.025786Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2230,7 +2242,7 @@
|
||||
"source": [
|
||||
"multivariate_logrank_test(D['Wait time'],\n",
|
||||
" D['Center'],\n",
|
||||
" D['Failed'])"
|
||||
" D['Failed'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2247,11 +2259,12 @@
|
||||
"id": "9badb3e3",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.850725Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.850538Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:39.873327Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:39.873038Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:49.027909Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:49.027782Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:49.046955Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:49.046606Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2336,7 +2349,7 @@
|
||||
"source": [
|
||||
"multivariate_logrank_test(D['Wait time'],\n",
|
||||
" D['Time'],\n",
|
||||
" D['Failed'])"
|
||||
" D['Failed'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2356,11 +2369,12 @@
|
||||
"id": "026e9ff8",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:39.875394Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:39.875265Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:40.004667Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:40.004293Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:49.048485Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:49.048378Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:49.175538Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:49.175268Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2443,7 +2457,7 @@
|
||||
" 'Center'],\n",
|
||||
" intercept=False).fit_transform(D)\n",
|
||||
"F = coxph().fit(X, 'Wait time', 'Failed')\n",
|
||||
"F.log_likelihood_ratio_test()"
|
||||
"F.log_likelihood_ratio_test()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2460,11 +2474,12 @@
|
||||
"id": "7cab3789",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:40.006886Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:40.006736Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:40.134843Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:40.134522Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:49.177180Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:49.177056Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:49.306158Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:49.305806Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2547,7 +2562,7 @@
|
||||
" 'Time'],\n",
|
||||
" intercept=False).fit_transform(D)\n",
|
||||
"F = coxph().fit(X, 'Wait time', 'Failed')\n",
|
||||
"F.log_likelihood_ratio_test()"
|
||||
"F.log_likelihood_ratio_test()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2567,11 +2582,12 @@
|
||||
"id": "5cc4b898",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:40.136769Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:40.136659Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:40.462485Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:40.457034Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:49.308130Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:49.307985Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:49.596821Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:49.585317Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2663,7 +2679,7 @@
|
||||
" X,\n",
|
||||
" 'Wait time',\n",
|
||||
" 'Failed')\n",
|
||||
"fit_queuing.summary[['coef', 'se(coef)', 'p']]"
|
||||
"fit_queuing.summary[['coef', 'se(coef)', 'p']]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2679,15 +2695,16 @@
|
||||
" `Operators`, `Center = B`, `Center = C`, \n",
|
||||
"`Time = Even.` and `Time = Morn.` are $0.04$, $-0.3$,\n",
|
||||
"$0$, $0.2$, and $-0.2$, respectively. The coefficient estimates\n",
|
||||
"from the fitted Cox model are fairly accurate."
|
||||
"from the fitted Cox model are fairly accurate.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"cell_metadata_filter": "-all",
|
||||
"formats": "ipynb,md:myst",
|
||||
"main_language": "python"
|
||||
"main_language": "python",
|
||||
"notebook_metadata_filter": "-all"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,9 +5,11 @@
|
||||
"id": "75b2d75c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"# Chapter 13\n",
|
||||
"\n",
|
||||
"# Lab: Multiple Testing"
|
||||
"# Lab: Multiple Testing\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -24,10 +26,10 @@
|
||||
"id": "1f928b2d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:50.715151Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:50.714766Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:51.778071Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:51.777549Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:00.152825Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:00.151975Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.254245Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.253710Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -36,7 +38,7 @@
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import statsmodels.api as sm\n",
|
||||
"from ISLP import load_data"
|
||||
"from ISLP import load_data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -54,11 +56,12 @@
|
||||
"id": "eb4b32aa",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:51.780386Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:51.780091Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:51.782579Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:51.782295Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.256495Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.256216Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.258709Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.258467Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -70,7 +73,7 @@
|
||||
"from statsmodels.stats.multicomp import \\\n",
|
||||
" pairwise_tukeyhsd\n",
|
||||
"from statsmodels.stats.multitest import \\\n",
|
||||
" multipletests as mult_test"
|
||||
" multipletests as mult_test\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -92,10 +95,10 @@
|
||||
"id": "e12ac0cd",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:51.784074Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:51.783969Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:51.786285Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:51.786035Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.260260Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.260153Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.262414Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.262174Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -103,7 +106,7 @@
|
||||
"rng = np.random.default_rng(12)\n",
|
||||
"X = rng.standard_normal((10, 100))\n",
|
||||
"true_mean = np.array([0.5]*50 + [0]*50)\n",
|
||||
"X += true_mean[None,:]"
|
||||
"X += true_mean[None,:]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -122,10 +125,10 @@
|
||||
"id": "04d0f49e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:51.787724Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:51.787627Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:51.791755Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:51.791495Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.263887Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.263792Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.267718Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.267462Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -142,7 +145,7 @@
|
||||
],
|
||||
"source": [
|
||||
"result = ttest_1samp(X[:,0], 0)\n",
|
||||
"result.pvalue"
|
||||
"result.pvalue\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -169,11 +172,12 @@
|
||||
"id": "d1f0c695",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:51.793305Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:51.793226Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:51.815977Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:51.815697Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.269204Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.269116Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.292380Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.292124Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -186,7 +190,7 @@
|
||||
" 'Do not reject H0'])\n",
|
||||
"truth = pd.Categorical(true_mean == 0,\n",
|
||||
" categories=[True, False],\n",
|
||||
" ordered=True)"
|
||||
" ordered=True)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -204,11 +208,12 @@
|
||||
"id": "7a9594a0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:51.817571Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:51.817485Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:51.826998Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:51.826728Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.293910Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.293823Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.302891Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.302612Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -272,7 +277,7 @@
|
||||
"pd.crosstab(decision,\n",
|
||||
" truth,\n",
|
||||
" rownames=['Decision'],\n",
|
||||
" colnames=['H0'])"
|
||||
" colnames=['H0'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -294,7 +299,7 @@
|
||||
"amounts to quite a weak signal, and it resulted in a high number of\n",
|
||||
"Type II errors. Let’s instead simulate data with a stronger signal,\n",
|
||||
"so that the ratio of the mean to the standard deviation for the false\n",
|
||||
"null hypotheses equals $1$. We make only 10 Type II errors."
|
||||
"null hypotheses equals $1$. We make only 10 Type II errors.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -303,11 +308,12 @@
|
||||
"id": "25f7fc5d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:51.828557Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:51.828471Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:51.857144Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:51.856863Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.304398Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.304317Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.331987Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.331720Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -383,7 +389,15 @@
|
||||
"pd.crosstab(decision,\n",
|
||||
" truth,\n",
|
||||
" rownames=['Decision'],\n",
|
||||
" colnames=['H0'])"
|
||||
" colnames=['H0'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bb70c597",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -406,10 +420,10 @@
|
||||
"id": "369b5bd3",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:51.858641Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:51.858551Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:52.158944Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:52.158640Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.333446Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.333362Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.583878Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.583084Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -435,7 +449,7 @@
|
||||
"ax.set_xlabel('Number of Hypotheses')\n",
|
||||
"ax.set_ylabel('Family-Wise Error Rate')\n",
|
||||
"ax.legend()\n",
|
||||
"ax.axhline(0.05, c='k', ls='--');"
|
||||
"ax.axhline(0.05, c='k', ls='--');\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -462,10 +476,10 @@
|
||||
"id": "9ce7a19f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:52.160608Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:52.160488Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:52.198900Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:52.198590Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.592359Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.591524Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.636450Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.636160Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -486,7 +500,7 @@
|
||||
"fund_mini_pvals = np.empty(5)\n",
|
||||
"for i in range(5):\n",
|
||||
" fund_mini_pvals[i] = ttest_1samp(fund_mini.iloc[:,i], 0).pvalue\n",
|
||||
"fund_mini_pvals"
|
||||
"fund_mini_pvals\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -530,11 +544,12 @@
|
||||
"id": "de6cffed",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:52.200735Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:52.200617Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:52.202992Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:52.202738Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.638004Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.637923Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.640151Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.639891Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -550,7 +565,7 @@
|
||||
],
|
||||
"source": [
|
||||
"reject, bonf = mult_test(fund_mini_pvals, method = \"bonferroni\")[:2]\n",
|
||||
"reject"
|
||||
"reject\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -568,10 +583,10 @@
|
||||
"id": "0de71500",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:52.204499Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:52.204407Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:52.206753Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:52.206498Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.641646Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.641554Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.643766Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.643529Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -588,7 +603,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"bonf, np.minimum(fund_mini_pvals * 5, 1)"
|
||||
"bonf, np.minimum(fund_mini_pvals * 5, 1)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -610,11 +625,12 @@
|
||||
"id": "f7e87bdb",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:52.208134Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:52.208036Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:52.255948Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:52.255602Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.645254Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.645162Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.687110Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.686827Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -630,7 +646,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"mult_test(fund_mini_pvals, method = \"holm\", alpha=0.05)[:2]"
|
||||
"mult_test(fund_mini_pvals, method = \"holm\", alpha=0.05)[:2]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -648,11 +664,12 @@
|
||||
"id": "e88be376",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:52.257656Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:52.257540Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:52.260759Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:52.260443Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.688627Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.688527Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.691393Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.691161Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -672,7 +689,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"fund_mini.mean()"
|
||||
"fund_mini.mean()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -691,10 +708,10 @@
|
||||
"id": "41149af6",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:52.262161Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:52.262063Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:52.264821Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:52.264573Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.692844Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.692751Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:01.695119Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:01.694881Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -711,7 +728,7 @@
|
||||
],
|
||||
"source": [
|
||||
"ttest_rel(fund_mini['Manager1'],\n",
|
||||
" fund_mini['Manager2']).pvalue"
|
||||
" fund_mini['Manager2']).pvalue\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -743,11 +760,12 @@
|
||||
"id": "61aabda7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:52.266290Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:52.266197Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:52.746435Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:52.746118Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:01.696563Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:01.696465Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:02.177873Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:02.177587Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -776,7 +794,7 @@
|
||||
"returns = np.hstack([fund_mini.iloc[:,i] for i in range(5)])\n",
|
||||
"managers = np.hstack([[i+1]*50 for i in range(5)])\n",
|
||||
"tukey = pairwise_tukeyhsd(returns, managers)\n",
|
||||
"print(tukey.summary())"
|
||||
"print(tukey.summary())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -802,10 +820,10 @@
|
||||
"id": "cbcad4de",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:52.748078Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:52.747975Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:52.829340Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:52.829068Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:02.179444Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:02.179343Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:02.260168Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:02.259865Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -822,7 +840,7 @@
|
||||
],
|
||||
"source": [
|
||||
"fig, ax = plt.subplots(figsize=(8,8))\n",
|
||||
"tukey.plot_simultaneous(ax=ax);"
|
||||
"tukey.plot_simultaneous(ax=ax);\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -843,17 +861,17 @@
|
||||
"id": "b5842190",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:52.831064Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:52.830953Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:53.237615Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:53.237218Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:02.261922Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:02.261817Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:02.675908Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:02.675505Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fund_pvalues = np.empty(2000)\n",
|
||||
"for i, manager in enumerate(Fund.columns):\n",
|
||||
" fund_pvalues[i] = ttest_1samp(Fund[manager], 0).pvalue"
|
||||
" fund_pvalues[i] = ttest_1samp(Fund[manager], 0).pvalue\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -872,10 +890,10 @@
|
||||
"id": "7c9d8bed",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:53.239456Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:53.239345Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:53.242247Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:53.241963Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:02.677787Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:02.677666Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:02.680351Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:02.680097Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -893,7 +911,7 @@
|
||||
],
|
||||
"source": [
|
||||
"fund_qvalues = mult_test(fund_pvalues, method = \"fdr_bh\")[1]\n",
|
||||
"fund_qvalues[:10]"
|
||||
"fund_qvalues[:10]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -917,11 +935,12 @@
|
||||
"id": "bfa39f7c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:53.243699Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:53.243594Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:53.245817Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:53.245569Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:02.681878Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:02.681776Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:02.684078Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:02.683782Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -936,7 +955,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"(fund_qvalues <= 0.1).sum()"
|
||||
"(fund_qvalues <= 0.1).sum()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -960,11 +979,12 @@
|
||||
"id": "70b69b47",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:53.247278Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:53.247171Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:53.249292Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:53.249050Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:02.685580Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:02.685487Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:02.687581Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:02.687313Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -979,7 +999,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"(fund_pvalues <= 0.1 / 2000).sum()"
|
||||
"(fund_pvalues <= 0.1 / 2000).sum()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1009,10 +1029,10 @@
|
||||
"id": "4c0ddea1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:53.250720Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:53.250626Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:53.253039Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:53.252775Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:02.689041Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:02.688941Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:02.691386Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:02.691129Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1026,7 +1046,7 @@
|
||||
" sorted_set_ = np.arange(sorted_set_.max())\n",
|
||||
"else:\n",
|
||||
" selected_ = []\n",
|
||||
" sorted_set_ = []"
|
||||
" sorted_set_ = []\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1043,11 +1063,12 @@
|
||||
"id": "0314eac9",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:53.254539Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:53.254447Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:53.487410Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:53.487100Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:02.692825Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:02.692729Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:02.922587Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:02.922278Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1070,7 +1091,7 @@
|
||||
"ax.set_ylabel('P-Value')\n",
|
||||
"ax.set_xlabel('Index')\n",
|
||||
"ax.scatter(sorted_set_+1, sorted_[sorted_set_], c='r', s=20)\n",
|
||||
"ax.axline((0, 0), (1,q/m), c='k', ls='--', linewidth=3);"
|
||||
"ax.axline((0, 0), (1,q/m), c='k', ls='--', linewidth=3);\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1092,11 +1113,12 @@
|
||||
"id": "b59b8137",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:53.489154Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:53.489039Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:53.563916Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:53.563610Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:02.924316Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:02.924196Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:02.997644Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:02.997332Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1118,7 +1140,7 @@
|
||||
"Khan = load_data('Khan') \n",
|
||||
"D = pd.concat([Khan['xtrain'], Khan['xtest']])\n",
|
||||
"D['Y'] = pd.concat([Khan['ytrain'], Khan['ytest']])\n",
|
||||
"D['Y'].value_counts()"
|
||||
"D['Y'].value_counts()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1142,11 +1164,12 @@
|
||||
"id": "96fb2f61",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:53.565515Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:53.565402Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:53.569018Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:53.568748Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:02.999309Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:02.999199Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:03.003203Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:03.002963Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1167,7 +1190,7 @@
|
||||
"observedT, pvalue = ttest_ind(D2[gene_11],\n",
|
||||
" D4[gene_11],\n",
|
||||
" equal_var=True)\n",
|
||||
"observedT, pvalue"
|
||||
"observedT, pvalue\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1194,11 +1217,12 @@
|
||||
"id": "fdc229fa",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:53.570437Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:53.570359Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:55.953846Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:55.953572Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:03.004671Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:03.004588Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:05.379699Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:05.379380Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1224,7 +1248,7 @@
|
||||
" D_null[n_:],\n",
|
||||
" equal_var=True)\n",
|
||||
" Tnull[b] = ttest_.statistic\n",
|
||||
"(np.abs(Tnull) > np.abs(observedT)).mean()"
|
||||
"(np.abs(Tnull) > np.abs(observedT)).mean()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1244,11 +1268,12 @@
|
||||
"id": "e3894695",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:55.955682Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:55.955570Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:56.168609Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:56.168280Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:36:05.381564Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:05.381435Z",
|
||||
"iopub.status.idle": "2023-08-06T17:36:05.597223Z",
|
||||
"shell.execute_reply": "2023-08-06T17:36:05.596880Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1277,7 +1302,7 @@
|
||||
" c='b',\n",
|
||||
" label='Observed')\n",
|
||||
"ax.legend()\n",
|
||||
"ax.set_xlabel(\"Null Distribution of Test Statistic\");"
|
||||
"ax.set_xlabel(\"Null Distribution of Test Statistic\");\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1303,10 +1328,10 @@
|
||||
"id": "3b7392cb",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:56.170411Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:56.170304Z",
|
||||
"iopub.status.idle": "2023-07-31T02:19:52.282774Z",
|
||||
"shell.execute_reply": "2023-07-31T02:19:52.282319Z"
|
||||
"iopub.execute_input": "2023-08-06T17:36:05.599334Z",
|
||||
"iopub.status.busy": "2023-08-06T17:36:05.599205Z",
|
||||
"iopub.status.idle": "2023-08-06T17:40:01.929123Z",
|
||||
"shell.execute_reply": "2023-08-06T17:40:01.928341Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1328,7 +1353,7 @@
|
||||
" ttest_ = ttest_ind(D_null[:n_],\n",
|
||||
" D_null[n_:],\n",
|
||||
" equal_var=True)\n",
|
||||
" Tnull_vals[j,b] = ttest_.statistic"
|
||||
" Tnull_vals[j,b] = ttest_.statistic\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1349,10 +1374,10 @@
|
||||
"id": "cac15616",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:19:52.285179Z",
|
||||
"iopub.status.busy": "2023-07-31T02:19:52.285030Z",
|
||||
"iopub.status.idle": "2023-07-31T02:19:52.430820Z",
|
||||
"shell.execute_reply": "2023-07-31T02:19:52.430483Z"
|
||||
"iopub.execute_input": "2023-08-06T17:40:01.931393Z",
|
||||
"iopub.status.busy": "2023-08-06T17:40:01.931250Z",
|
||||
"iopub.status.idle": "2023-08-06T17:40:02.050525Z",
|
||||
"shell.execute_reply": "2023-08-06T17:40:02.050215Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1364,7 +1389,7 @@
|
||||
" V = np.sum(np.abs(Tnull_vals) >= cutoffs[j]) / B\n",
|
||||
" Rs[j] = R\n",
|
||||
" Vs[j] = V\n",
|
||||
" FDRs[j] = V / R"
|
||||
" FDRs[j] = V / R\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1390,10 +1415,10 @@
|
||||
"id": "9661eb10",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:19:52.432747Z",
|
||||
"iopub.status.busy": "2023-07-31T02:19:52.432615Z",
|
||||
"iopub.status.idle": "2023-07-31T02:19:52.435385Z",
|
||||
"shell.execute_reply": "2023-07-31T02:19:52.435090Z"
|
||||
"iopub.execute_input": "2023-08-06T17:40:02.052324Z",
|
||||
"iopub.status.busy": "2023-08-06T17:40:02.052224Z",
|
||||
"iopub.status.idle": "2023-08-06T17:40:02.054968Z",
|
||||
"shell.execute_reply": "2023-08-06T17:40:02.054729Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1427,7 +1452,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sorted(idx[np.abs(T_vals) >= cutoffs[FDRs < 0.1].min()])"
|
||||
"sorted(idx[np.abs(T_vals) >= cutoffs[FDRs < 0.1].min()])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1445,10 +1470,10 @@
|
||||
"id": "18ad4900",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:19:52.436985Z",
|
||||
"iopub.status.busy": "2023-07-31T02:19:52.436868Z",
|
||||
"iopub.status.idle": "2023-07-31T02:19:52.439478Z",
|
||||
"shell.execute_reply": "2023-07-31T02:19:52.439213Z"
|
||||
"iopub.execute_input": "2023-08-06T17:40:02.056480Z",
|
||||
"iopub.status.busy": "2023-08-06T17:40:02.056382Z",
|
||||
"iopub.status.idle": "2023-08-06T17:40:02.058766Z",
|
||||
"shell.execute_reply": "2023-08-06T17:40:02.058503Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1494,7 +1519,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"sorted(idx[np.abs(T_vals) >= cutoffs[FDRs < 0.2].min()])"
|
||||
"sorted(idx[np.abs(T_vals) >= cutoffs[FDRs < 0.2].min()])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1513,11 +1538,12 @@
|
||||
"id": "28c276b6",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:19:52.441008Z",
|
||||
"iopub.status.busy": "2023-07-31T02:19:52.440893Z",
|
||||
"iopub.status.idle": "2023-07-31T02:19:52.515423Z",
|
||||
"shell.execute_reply": "2023-07-31T02:19:52.515126Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:40:02.060234Z",
|
||||
"iopub.status.busy": "2023-08-06T17:40:02.060135Z",
|
||||
"iopub.status.idle": "2023-08-06T17:40:02.135583Z",
|
||||
"shell.execute_reply": "2023-08-06T17:40:02.135228Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1535,15 +1561,23 @@
|
||||
"fig, ax = plt.subplots()\n",
|
||||
"ax.plot(Rs, FDRs, 'b', linewidth=3)\n",
|
||||
"ax.set_xlabel(\"Number of Rejections\")\n",
|
||||
"ax.set_ylabel(\"False Discovery Rate\");"
|
||||
"ax.set_ylabel(\"False Discovery Rate\");\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e4b5d621",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"cell_metadata_filter": "-all",
|
||||
"formats": "ipynb,md:myst",
|
||||
"main_language": "python"
|
||||
"main_language": "python",
|
||||
"notebook_metadata_filter": "-all"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,9 @@
|
||||
"id": "82bce88a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Chapter 3"
|
||||
"\n",
|
||||
"# Chapter 3\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -26,17 +28,18 @@
|
||||
"id": "ca5277a6",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:37.098059Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:37.097629Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:37.599285Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:37.598959Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:45.279319Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:45.279082Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:45.953848Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:45.953518Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from matplotlib.pyplot import subplots"
|
||||
"from matplotlib.pyplot import subplots\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -58,15 +61,16 @@
|
||||
"id": "675f24e6",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:37.601365Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:37.601115Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.189274Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.188923Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:45.955884Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:45.955666Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:46.765820Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:46.765525Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import statsmodels.api as sm"
|
||||
"import statsmodels.api as sm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -90,17 +94,17 @@
|
||||
"id": "a0ee23c2",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.191201Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.191048Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.194262Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.194012Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:46.767689Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:46.767547Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:46.770326Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:46.770048Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from statsmodels.stats.outliers_influence \\\n",
|
||||
" import variance_inflation_factor as VIF\n",
|
||||
"from statsmodels.stats.anova import anova_lm"
|
||||
"from statsmodels.stats.anova import anova_lm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -121,10 +125,10 @@
|
||||
"id": "b35eb887",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.195737Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.195636Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.348861Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.348443Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:46.771834Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:46.771733Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:46.958904Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:46.958467Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -132,7 +136,7 @@
|
||||
"from ISLP import load_data\n",
|
||||
"from ISLP.models import (ModelSpec as MS,\n",
|
||||
" summarize,\n",
|
||||
" poly)"
|
||||
" poly)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -153,11 +157,12 @@
|
||||
"id": "961908f7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.351011Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.350696Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.354455Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.354198Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:46.960948Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:46.960687Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:46.964347Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:46.964073Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -208,7 +213,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dir()"
|
||||
"dir()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -233,11 +238,12 @@
|
||||
"id": "662caa15",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.355929Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.355825Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.358768Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.358500Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:46.966063Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:46.965960Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:46.968939Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:46.968662Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -418,7 +424,7 @@
|
||||
],
|
||||
"source": [
|
||||
"A = np.array([3,5,11])\n",
|
||||
"dir(A)"
|
||||
"dir(A)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -436,11 +442,12 @@
|
||||
"id": "ebb7d126",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.360173Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.360095Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.362221Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.361978Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:46.970359Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:46.970263Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:46.972364Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:46.972124Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -455,7 +462,15 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"A.sum()"
|
||||
"A.sum()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3b9db985",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -485,10 +500,10 @@
|
||||
"id": "1ea46cee",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.363717Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.363629Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.368806Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.368543Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:46.973818Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:46.973741Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:46.982452Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:46.982201Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -507,7 +522,7 @@
|
||||
],
|
||||
"source": [
|
||||
"Boston = load_data(\"Boston\")\n",
|
||||
"Boston.columns"
|
||||
"Boston.columns\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -520,7 +535,7 @@
|
||||
"We start by using the `sm.OLS()` function to fit a\n",
|
||||
"simple linear regression model. Our response will be\n",
|
||||
" `medv` and `lstat` will be the single predictor.\n",
|
||||
"For this model, we can create the model matrix by hand."
|
||||
"For this model, we can create the model matrix by hand.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -529,10 +544,10 @@
|
||||
"id": "26c0ba88",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.370249Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.370153Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.375404Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.375134Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:46.983850Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:46.983745Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:46.990247Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:46.990002Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -602,7 +617,7 @@
|
||||
"source": [
|
||||
"X = pd.DataFrame({'intercept': np.ones(Boston.shape[0]),\n",
|
||||
" 'lstat': Boston['lstat']})\n",
|
||||
"X[:4]"
|
||||
"X[:4]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -619,17 +634,18 @@
|
||||
"id": "d4dd511b",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.376828Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.376741Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.379010Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.378764Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:46.991664Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:46.991561Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:46.994290Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:46.994049Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y = Boston['medv']\n",
|
||||
"model = sm.OLS(y, X)\n",
|
||||
"results = model.fit()"
|
||||
"results = model.fit()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -653,11 +669,12 @@
|
||||
"id": "eef9f8e3",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.380359Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.380275Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.437623Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.437333Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:46.995620Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:46.995530Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.057569Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.057305Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -718,7 +735,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"summarize(results)"
|
||||
"summarize(results)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -754,7 +771,7 @@
|
||||
"initial computations on it, as specified in the transform object.\n",
|
||||
"For example, it may compute means and standard deviations for centering and scaling.\n",
|
||||
"The `transform()` \n",
|
||||
"method applies the fitted transformation to the array of data, and produces the model matrix."
|
||||
"method applies the fitted transformation to the array of data, and produces the model matrix.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -763,11 +780,12 @@
|
||||
"id": "557170d4",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.439217Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.439073Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.443761Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.443507Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.059196Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.059050Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.063793Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.063548Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -857,11 +875,12 @@
|
||||
"id": "b83ec097",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.445159Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.445084Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.449274Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.449028Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.065239Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.065152Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.069286Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.068998Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -960,10 +979,10 @@
|
||||
"id": "d4dce5f6",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.450699Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.450620Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.459848Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.459594Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.070742Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.070666Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.079919Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.079672Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -982,10 +1001,10 @@
|
||||
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 601.6</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
" <th>Date:</th> <td>Sun, 30 Jul 2023</td> <th> Prob (F-statistic):</th> <td>5.08e-88</td>\n",
|
||||
" <th>Date:</th> <td>Sun, 06 Aug 2023</td> <th> Prob (F-statistic):</th> <td>5.08e-88</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
" <th>Time:</th> <td>22:14:38</td> <th> Log-Likelihood: </th> <td> -1641.5</td>\n",
|
||||
" <th>Time:</th> <td>10:34:47</td> <th> Log-Likelihood: </th> <td> -1641.5</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
" <th>No. Observations:</th> <td> 506</td> <th> AIC: </th> <td> 3287.</td>\n",
|
||||
@@ -1033,8 +1052,8 @@
|
||||
"\\textbf{Dep. Variable:} & medv & \\textbf{ R-squared: } & 0.544 \\\\\n",
|
||||
"\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.543 \\\\\n",
|
||||
"\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 601.6 \\\\\n",
|
||||
"\\textbf{Date:} & Sun, 30 Jul 2023 & \\textbf{ Prob (F-statistic):} & 5.08e-88 \\\\\n",
|
||||
"\\textbf{Time:} & 22:14:38 & \\textbf{ Log-Likelihood: } & -1641.5 \\\\\n",
|
||||
"\\textbf{Date:} & Sun, 06 Aug 2023 & \\textbf{ Prob (F-statistic):} & 5.08e-88 \\\\\n",
|
||||
"\\textbf{Time:} & 10:34:47 & \\textbf{ Log-Likelihood: } & -1641.5 \\\\\n",
|
||||
"\\textbf{No. Observations:} & 506 & \\textbf{ AIC: } & 3287. \\\\\n",
|
||||
"\\textbf{Df Residuals:} & 504 & \\textbf{ BIC: } & 3295. \\\\\n",
|
||||
"\\textbf{Df Model:} & 1 & \\textbf{ } & \\\\\n",
|
||||
@@ -1069,8 +1088,8 @@
|
||||
"Dep. Variable: medv R-squared: 0.544\n",
|
||||
"Model: OLS Adj. R-squared: 0.543\n",
|
||||
"Method: Least Squares F-statistic: 601.6\n",
|
||||
"Date: Sun, 30 Jul 2023 Prob (F-statistic): 5.08e-88\n",
|
||||
"Time: 22:14:38 Log-Likelihood: -1641.5\n",
|
||||
"Date: Sun, 06 Aug 2023 Prob (F-statistic): 5.08e-88\n",
|
||||
"Time: 10:34:47 Log-Likelihood: -1641.5\n",
|
||||
"No. Observations: 506 AIC: 3287.\n",
|
||||
"Df Residuals: 504 BIC: 3295.\n",
|
||||
"Df Model: 1 \n",
|
||||
@@ -1098,7 +1117,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results.summary()"
|
||||
"results.summary()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1116,11 +1135,12 @@
|
||||
"id": "a0edf555",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.461298Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.461215Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.463809Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.463563Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.081336Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.081257Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.083680Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.083425Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1137,7 +1157,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results.params"
|
||||
"results.params\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1158,10 +1178,10 @@
|
||||
"id": "fdc5a3f3",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.465231Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.465150Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.468821Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.468569Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.085093Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.085015Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.088625Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.088392Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1225,7 +1245,7 @@
|
||||
"source": [
|
||||
"new_df = pd.DataFrame({'lstat':[5, 10, 15]})\n",
|
||||
"newX = design.transform(new_df)\n",
|
||||
"newX"
|
||||
"newX\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1242,11 +1262,12 @@
|
||||
"id": "2c6acbf0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.470242Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.470162Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.472450Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.472199Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.090091Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.090008Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.092428Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.092183Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1262,7 +1283,7 @@
|
||||
],
|
||||
"source": [
|
||||
"new_predictions = results.get_prediction(newX);\n",
|
||||
"new_predictions.predicted_mean"
|
||||
"new_predictions.predicted_mean\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1279,11 +1300,12 @@
|
||||
"id": "c472ef33",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.473835Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.473762Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.476010Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.475756Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.093785Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.093688Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.096010Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.095781Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1300,7 +1322,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"new_predictions.conf_int(alpha=0.05)"
|
||||
"new_predictions.conf_int(alpha=0.05)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1317,11 +1339,12 @@
|
||||
"id": "3e2ffc7a",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.477420Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.477344Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.479639Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.479371Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.097324Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.097234Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.099513Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.099275Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1338,7 +1361,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"new_predictions.conf_int(obs=True, alpha=0.05)"
|
||||
"new_predictions.conf_int(obs=True, alpha=0.05)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1376,11 +1399,12 @@
|
||||
"id": "4e56a1d3",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.481027Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.480949Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.482803Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.482553Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.100972Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.100892Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.102793Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.102549Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1388,7 +1412,7 @@
|
||||
" \"Add a line with slope m and intercept b to ax\"\n",
|
||||
" xlim = ax.get_xlim()\n",
|
||||
" ylim = [m * xlim[0] + b, m * xlim[1] + b]\n",
|
||||
" ax.plot(xlim, ylim)"
|
||||
" ax.plot(xlim, ylim)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1409,11 +1433,12 @@
|
||||
"id": "7f43ffe7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.484281Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.484202Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.486090Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.485818Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.104148Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.104079Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.106062Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.105836Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -1421,7 +1446,7 @@
|
||||
" \"Add a line with slope m and intercept b to ax\"\n",
|
||||
" xlim = ax.get_xlim()\n",
|
||||
" ylim = [m * xlim[0] + b, m * xlim[1] + b]\n",
|
||||
" ax.plot(xlim, ylim, *args, **kwargs)"
|
||||
" ax.plot(xlim, ylim, *args, **kwargs)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1448,11 +1473,12 @@
|
||||
"id": "3f7b67c9",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.487491Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.487422Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.594015Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.593271Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.107460Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.107386Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.254700Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.253915Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1472,7 +1498,7 @@
|
||||
" results.params[0],\n",
|
||||
" results.params[1],\n",
|
||||
" 'r--',\n",
|
||||
" linewidth=3)"
|
||||
" linewidth=3)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1485,7 +1511,8 @@
|
||||
"an argument to make it of width 3.\n",
|
||||
"There is some evidence for non-linearity in the relationship between `lstat` and `medv`. We will explore this issue later in this lab.\n",
|
||||
"\n",
|
||||
"As mentioned above, there is an existing function to add a line to a plot --- `ax.axline()` --- but knowing how to write such functions empowers us to create more expressive displays."
|
||||
"As mentioned above, there is an existing function to add a line to a plot --- `ax.axline()` --- but knowing how to write such functions empowers us to create more expressive displays.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1510,11 +1537,12 @@
|
||||
"id": "b35a2fd3",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.597846Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.597587Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.716373Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.714325Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.259557Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.259229Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.402403Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.402135Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1533,7 +1561,7 @@
|
||||
"ax.scatter(results.fittedvalues, results.resid)\n",
|
||||
"ax.set_xlabel('Fitted value')\n",
|
||||
"ax.set_ylabel('Residual')\n",
|
||||
"ax.axhline(0, c='k', ls='--');"
|
||||
"ax.axhline(0, c='k', ls='--');\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1557,11 +1585,12 @@
|
||||
"id": "82673b80",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.720789Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.720353Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.829000Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.828696Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.404190Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.404052Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.494230Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.493905Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1591,7 +1620,7 @@
|
||||
"ax.scatter(np.arange(X.shape[0]), infl.hat_matrix_diag)\n",
|
||||
"ax.set_xlabel('Index')\n",
|
||||
"ax.set_ylabel('Leverage')\n",
|
||||
"np.argmax(infl.hat_matrix_diag)"
|
||||
"np.argmax(infl.hat_matrix_diag)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1624,11 +1653,12 @@
|
||||
"id": "54596dc4",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.830833Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.830723Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.841593Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.841314Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.495999Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.495872Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.506251Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.505979Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1721,10 +1751,10 @@
|
||||
"id": "75c78238",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.843301Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.843182Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.845817Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.845544Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.508238Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.508087Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.510826Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.510491Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1743,7 +1773,7 @@
|
||||
],
|
||||
"source": [
|
||||
"terms = Boston.columns.drop('medv')\n",
|
||||
"terms"
|
||||
"terms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1761,10 +1791,10 @@
|
||||
"id": "f14b9e1a",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.847324Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.847229Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.861947Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.861682Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.512561Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.512448Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.526980Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.526612Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1917,7 +1947,7 @@
|
||||
"X = MS(terms).fit_transform(Boston)\n",
|
||||
"model = sm.OLS(y, X)\n",
|
||||
"results = model.fit()\n",
|
||||
"summarize(results)"
|
||||
"summarize(results)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1937,10 +1967,10 @@
|
||||
"id": "0a2714b1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.863470Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.863382Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.877283Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.877001Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.528637Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.528526Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.542270Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.541976Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -2085,7 +2115,7 @@
|
||||
"minus_age = Boston.columns.drop(['medv', 'age']) \n",
|
||||
"Xma = MS(minus_age).fit_transform(Boston)\n",
|
||||
"model1 = sm.OLS(y, Xma)\n",
|
||||
"summarize(model1.fit())"
|
||||
"summarize(model1.fit())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2116,7 +2146,7 @@
|
||||
"lists of `Python` objects. The language also supports\n",
|
||||
"dictionary and *generator* comprehension, though these are\n",
|
||||
"beyond our scope here. Let's look at an example. We compute the VIF for each of the variables\n",
|
||||
"in the model matrix `X`, using the function `variance_inflation_factor()`."
|
||||
"in the model matrix `X`, using the function `variance_inflation_factor()`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2125,11 +2155,12 @@
|
||||
"id": "961c9128",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.878849Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.878736Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.885983Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.885709Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.543839Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.543752Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.551040Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.550770Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2235,7 +2266,7 @@
|
||||
" for i in range(1, X.shape[1])]\n",
|
||||
"vif = pd.DataFrame({'vif':vals},\n",
|
||||
" index=X.columns[1:])\n",
|
||||
"vif"
|
||||
"vif\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2256,17 +2287,18 @@
|
||||
"id": "4886f9e9",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.887620Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.887509Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.892708Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.892460Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.552596Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.552485Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.557775Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.557510Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vals = []\n",
|
||||
"for i in range(1, X.values.shape[1]):\n",
|
||||
" vals.append(VIF(X.values, i))"
|
||||
" vals.append(VIF(X.values, i))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2288,11 +2320,12 @@
|
||||
"id": "b54d2da1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.894293Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.894204Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.904859Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.904591Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.559401Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.559315Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.570264Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.570004Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2373,7 +2406,7 @@
|
||||
" 'age',\n",
|
||||
" ('lstat', 'age')]).fit_transform(Boston)\n",
|
||||
"model2 = sm.OLS(y, X)\n",
|
||||
"summarize(model2.fit())"
|
||||
"summarize(model2.fit())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2395,11 +2428,12 @@
|
||||
"id": "1b71633a",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.906973Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.906845Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.918896Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.918604Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.571718Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.571635Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.583621Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.583342Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2479,7 +2513,7 @@
|
||||
"X = MS([poly('lstat', degree=2), 'age']).fit_transform(Boston)\n",
|
||||
"model3 = sm.OLS(y, X)\n",
|
||||
"results3 = model3.fit()\n",
|
||||
"summarize(results3)"
|
||||
"summarize(results3)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2512,11 +2546,12 @@
|
||||
"id": "6d30a306",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.920531Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.920425Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:38.926296Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:38.926036Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.585282Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.585169Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.591106Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.590768Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2583,7 +2618,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"anova_lm(results1, results3)"
|
||||
"anova_lm(results1, results3)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2612,7 +2647,7 @@
|
||||
"The function `anova_lm()` can take more than two nested models\n",
|
||||
"as input, in which case it compares every successive pair of models.\n",
|
||||
"That also explains why their are `NaN`s in the first row above, since\n",
|
||||
"there is no previous model with which to compare the first."
|
||||
"there is no previous model with which to compare the first.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2621,17 +2656,18 @@
|
||||
"id": "9a5ec13f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:38.927819Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:38.927721Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:39.027635Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:39.027301Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.592639Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.592536Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.692002Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.691684Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<matplotlib.lines.Line2D at 0x154a82f50>"
|
||||
"<matplotlib.lines.Line2D at 0x15f00b090>"
|
||||
]
|
||||
},
|
||||
"execution_count": 34,
|
||||
@@ -2654,7 +2690,7 @@
|
||||
"ax.scatter(results3.fittedvalues, results3.resid)\n",
|
||||
"ax.set_xlabel('Fitted value')\n",
|
||||
"ax.set_ylabel('Residual')\n",
|
||||
"ax.axhline(0, c='k', ls='--')"
|
||||
"ax.axhline(0, c='k', ls='--')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2665,7 +2701,7 @@
|
||||
"We see that when the quadratic term is included in the model,\n",
|
||||
"there is little discernible pattern in the residuals.\n",
|
||||
"In order to create a cubic or higher-degree polynomial fit, we can simply change the degree argument\n",
|
||||
"to `poly()`."
|
||||
"to `poly()`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2686,11 +2722,12 @@
|
||||
"id": "09bbc0c6",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:39.029410Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:39.029292Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:39.034985Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:39.034677Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.693776Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.693649Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.700319Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.700055Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2708,7 +2745,7 @@
|
||||
],
|
||||
"source": [
|
||||
"Carseats = load_data('Carseats')\n",
|
||||
"Carseats.columns"
|
||||
"Carseats.columns\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2736,11 +2773,12 @@
|
||||
"id": "2e1da1fa",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:39.036665Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:39.036539Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:39.057391Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:39.057099Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:47.701981Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:47.701852Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:47.722346Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:47.722062Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -2903,7 +2941,7 @@
|
||||
" ('Price', 'Age')]\n",
|
||||
"X = MS(final).fit_transform(Carseats)\n",
|
||||
"model = sm.OLS(y, X)\n",
|
||||
"summarize(model.fit())"
|
||||
"summarize(model.fit())\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -2922,15 +2960,16 @@
|
||||
"positive indicates that a good shelving location is associated with high sales (relative to a bad location).\n",
|
||||
"And `ShelveLoc[Medium]` has a smaller positive coefficient,\n",
|
||||
"indicating that a medium shelving location leads to higher sales than a bad\n",
|
||||
"shelving location, but lower sales than a good shelving location."
|
||||
"shelving location, but lower sales than a good shelving location.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"cell_metadata_filter": "-all",
|
||||
"formats": "ipynb,md:myst",
|
||||
"main_language": "python"
|
||||
"main_language": "python",
|
||||
"notebook_metadata_filter": "-all"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,7 +5,9 @@
|
||||
"id": "3a3f2f85",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Chapter 5"
|
||||
"\n",
|
||||
"# Chapter 5\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -27,11 +29,12 @@
|
||||
"id": "60fad148",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:45.345335Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:45.345019Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:46.175442Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:46.174945Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:54.032413Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:54.032111Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:54.873921Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:54.873325Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -41,7 +44,7 @@
|
||||
"from ISLP.models import (ModelSpec as MS,\n",
|
||||
" summarize,\n",
|
||||
" poly)\n",
|
||||
"from sklearn.model_selection import train_test_split"
|
||||
"from sklearn.model_selection import train_test_split\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -58,11 +61,12 @@
|
||||
"id": "2478aeb4",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:46.177622Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:46.177374Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:46.179577Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:46.179312Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:54.876060Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:54.875842Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:54.878002Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:54.877731Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -72,7 +76,7 @@
|
||||
" KFold,\n",
|
||||
" ShuffleSplit)\n",
|
||||
"from sklearn.base import clone\n",
|
||||
"from ISLP.models import sklearn_sm"
|
||||
"from ISLP.models import sklearn_sm\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -92,7 +96,7 @@
|
||||
"when performing operations like this that contain an\n",
|
||||
"element of randomness, so that the results obtained can be reproduced\n",
|
||||
"precisely at a later time. We set the random seed of the splitter\n",
|
||||
"with the argument `random_state=0`."
|
||||
"with the argument `random_state=0`. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -101,10 +105,10 @@
|
||||
"id": "99c95faf",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:46.181100Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:46.180998Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:46.187844Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:46.187574Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:54.879485Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:54.879378Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:54.886401Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:54.886117Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -112,7 +116,7 @@
|
||||
"Auto = load_data('Auto')\n",
|
||||
"Auto_train, Auto_valid = train_test_split(Auto,\n",
|
||||
" test_size=196,\n",
|
||||
" random_state=0)"
|
||||
" random_state=0)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -129,10 +133,10 @@
|
||||
"id": "41b0717d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:46.189426Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:46.189344Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:46.193204Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:46.192935Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:54.887942Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:54.887854Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:54.891560Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:54.891297Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -141,7 +145,7 @@
|
||||
"X_train = hp_mm.fit_transform(Auto_train)\n",
|
||||
"y_train = Auto_train['mpg']\n",
|
||||
"model = sm.OLS(y_train, X_train)\n",
|
||||
"results = model.fit()"
|
||||
"results = model.fit()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -159,10 +163,10 @@
|
||||
"id": "d7ea3c0d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:46.194683Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:46.194591Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:46.198962Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:46.198705Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:54.893051Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:54.892965Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:54.897036Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:54.896785Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -181,7 +185,7 @@
|
||||
"X_valid = hp_mm.transform(Auto_valid)\n",
|
||||
"y_valid = Auto_valid['mpg']\n",
|
||||
"valid_pred = results.predict(X_valid)\n",
|
||||
"np.mean((y_valid - valid_pred)**2)"
|
||||
"np.mean((y_valid - valid_pred)**2)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -203,10 +207,10 @@
|
||||
"id": "a02a2d05",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:46.200468Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:46.200388Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:46.202553Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:46.202309Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:54.898500Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:54.898412Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:54.900767Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:54.900515Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -226,7 +230,7 @@
|
||||
" results = sm.OLS(y_train, X_train).fit()\n",
|
||||
" test_pred = results.predict(X_test)\n",
|
||||
"\n",
|
||||
" return np.mean((y_test - test_pred)**2)"
|
||||
" return np.mean((y_test - test_pred)**2)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -246,10 +250,10 @@
|
||||
"id": "51d93dea",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:46.203992Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:46.203891Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:46.213999Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:46.213730Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:54.902229Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:54.902150Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:54.912255Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:54.912027Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -271,7 +275,7 @@
|
||||
" 'mpg',\n",
|
||||
" Auto_train,\n",
|
||||
" Auto_valid)\n",
|
||||
"MSE"
|
||||
"MSE\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -290,10 +294,10 @@
|
||||
"id": "83432f06",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:46.215488Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:46.215401Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:46.225856Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:46.225598Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:54.913788Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:54.913696Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:54.924230Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:54.923983Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -373,11 +377,12 @@
|
||||
"id": "bcfc433f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:46.227362Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:46.227281Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:46.785216Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:46.784936Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:54.925794Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:54.925711Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:55.485718Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:55.485445Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -400,7 +405,7 @@
|
||||
" Y,\n",
|
||||
" cv=Auto.shape[0])\n",
|
||||
"cv_err = np.mean(cv_results['test_score'])\n",
|
||||
"cv_err"
|
||||
"cv_err\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -440,11 +445,12 @@
|
||||
"id": "f951ffc8",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:46.786874Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:46.786773Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.387263Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.386958Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:55.487370Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:55.487270Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.086269Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.085986Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -469,7 +475,7 @@
|
||||
" Y,\n",
|
||||
" cv=Auto.shape[0])\n",
|
||||
" cv_error[i] = np.mean(M_CV['test_score'])\n",
|
||||
"cv_error"
|
||||
"cv_error\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -487,7 +493,7 @@
|
||||
"It has two arrays as\n",
|
||||
"arguments, and then forms a larger\n",
|
||||
"array where the operation is applied to each pair of elements of the\n",
|
||||
"two arrays."
|
||||
"two arrays. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -496,10 +502,10 @@
|
||||
"id": "e3610b5a",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.388905Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.388796Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.391290Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.391034Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.087846Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.087739Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.090120Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.089850Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -519,7 +525,7 @@
|
||||
"source": [
|
||||
"A = np.array([3, 5, 9])\n",
|
||||
"B = np.array([2, 4])\n",
|
||||
"np.add.outer(A, B)"
|
||||
"np.add.outer(A, B)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -538,11 +544,12 @@
|
||||
"id": "1627460d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.392816Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.392715Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.414278Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.414015Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.091649Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.091540Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.113415Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.113164Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -568,7 +575,7 @@
|
||||
" Y,\n",
|
||||
" cv=cv)\n",
|
||||
" cv_error[i] = np.mean(M_CV['test_score'])\n",
|
||||
"cv_error"
|
||||
"cv_error\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -602,11 +609,12 @@
|
||||
"id": "8a636468",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.415780Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.415699Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.421148Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.420911Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.114990Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.114909Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.120375Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.120121Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -628,7 +636,7 @@
|
||||
" Auto.drop(['mpg'], axis=1),\n",
|
||||
" Auto['mpg'],\n",
|
||||
" cv=validation);\n",
|
||||
"results['test_score']"
|
||||
"results['test_score']\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -645,10 +653,10 @@
|
||||
"id": "746aeccd",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.422643Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.422562Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.442227Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.441935Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.121875Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.121788Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.141044Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.140787Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -671,7 +679,7 @@
|
||||
" Auto.drop(['mpg'], axis=1),\n",
|
||||
" Auto['mpg'],\n",
|
||||
" cv=validation)\n",
|
||||
"results['test_score'].mean(), results['test_score'].std()"
|
||||
"results['test_score'].mean(), results['test_score'].std()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -719,11 +727,12 @@
|
||||
"id": "daa53d0c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.443783Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.443671Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.447006Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.446693Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.142563Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.142482Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.146459Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.146215Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -731,7 +740,7 @@
|
||||
"def alpha_func(D, idx):\n",
|
||||
" cov_ = np.cov(D[['X','Y']].loc[idx], rowvar=False)\n",
|
||||
" return ((cov_[1,1] - cov_[0,1]) /\n",
|
||||
" (cov_[0,0]+cov_[1,1]-2*cov_[0,1]))"
|
||||
" (cov_[0,0]+cov_[1,1]-2*cov_[0,1]))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -752,10 +761,10 @@
|
||||
"id": "578c9564",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.448908Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.448792Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.451963Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.451646Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.147902Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.147820Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.150542Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.150288Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -791,11 +800,12 @@
|
||||
"id": "5754d6d5",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.453581Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.453484Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.457771Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.457529Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.151951Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.151874Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.155780Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.155537Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -833,11 +843,12 @@
|
||||
"id": "8320a49c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.459153Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.459066Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.461370Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.461113Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.157150Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.157060Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.159342Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.159133Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -868,7 +879,7 @@
|
||||
"unimportant and simply makes sure the loop is executed `B` times.\n",
|
||||
"\n",
|
||||
"Let’s use our function to evaluate the accuracy of our\n",
|
||||
"estimate of $\\alpha$ using $B=1{,}000$ bootstrap replications."
|
||||
"estimate of $\\alpha$ using $B=1{,}000$ bootstrap replications. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -877,10 +888,10 @@
|
||||
"id": "e656aa1f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.462792Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.462698Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.745378Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.745111Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.160707Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.160617Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.455515Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.455259Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -900,7 +911,7 @@
|
||||
" Portfolio,\n",
|
||||
" B=1000,\n",
|
||||
" seed=0)\n",
|
||||
"alpha_SE"
|
||||
"alpha_SE\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -943,11 +954,12 @@
|
||||
"id": "c5d14195",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.746827Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.746750Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.748785Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.748522Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.457024Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.456940Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.459011Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.458766Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -977,15 +989,16 @@
|
||||
"id": "7e0523f0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.750186Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.750111Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.751794Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.751573Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.460430Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.460350Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.462034Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.461808Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hp_func = partial(boot_OLS, MS(['horsepower']), 'mpg')"
|
||||
"hp_func = partial(boot_OLS, MS(['horsepower']), 'mpg')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1009,11 +1022,12 @@
|
||||
"id": "32836e93",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.753215Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.753126Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:47.768431Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:47.768162Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.463386Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.463311Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:56.477900Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:56.477641Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1041,7 +1055,7 @@
|
||||
"np.array([hp_func(Auto,\n",
|
||||
" rng.choice(392,\n",
|
||||
" 392,\n",
|
||||
" replace=True)) for _ in range(10)])"
|
||||
" replace=True)) for _ in range(10)])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1059,11 +1073,12 @@
|
||||
"id": "14ce3afa",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:47.769912Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:47.769830Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:48.926601Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:48.926292Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:56.479382Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:56.479299Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:57.627980Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:57.627706Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1084,7 +1099,7 @@
|
||||
" Auto,\n",
|
||||
" B=1000,\n",
|
||||
" seed=10)\n",
|
||||
"hp_se"
|
||||
"hp_se\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1108,11 +1123,12 @@
|
||||
"id": "6b1213ac",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:48.928216Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:48.928105Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:48.985288Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:48.985012Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:57.629628Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:57.629520Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:57.687018Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:57.686748Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1131,7 +1147,7 @@
|
||||
"source": [
|
||||
"hp_model.fit(Auto, Auto['mpg'])\n",
|
||||
"model_se = summarize(hp_model.results_)['std err']\n",
|
||||
"model_se"
|
||||
"model_se\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1179,10 +1195,10 @@
|
||||
"id": "af99b778",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:48.986895Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:48.986777Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:50.767999Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:50.767687Z"
|
||||
"iopub.execute_input": "2023-08-06T17:34:57.688662Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:57.688521Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:59.481117Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:59.480813Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1205,7 +1221,7 @@
|
||||
"quad_func = partial(boot_OLS,\n",
|
||||
" quad_model,\n",
|
||||
" 'mpg')\n",
|
||||
"boot_SE(quad_func, Auto, B=1000)"
|
||||
"boot_SE(quad_func, Auto, B=1000)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1222,11 +1238,12 @@
|
||||
"id": "0206281e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:14:50.769670Z",
|
||||
"iopub.status.busy": "2023-07-31T02:14:50.769556Z",
|
||||
"iopub.status.idle": "2023-07-31T02:14:50.778195Z",
|
||||
"shell.execute_reply": "2023-07-31T02:14:50.777948Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:34:59.482760Z",
|
||||
"iopub.status.busy": "2023-08-06T17:34:59.482640Z",
|
||||
"iopub.status.idle": "2023-08-06T17:34:59.491295Z",
|
||||
"shell.execute_reply": "2023-08-06T17:34:59.491041Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1246,15 +1263,24 @@
|
||||
"source": [
|
||||
"M = sm.OLS(Auto['mpg'],\n",
|
||||
" quad_model.fit_transform(Auto))\n",
|
||||
"summarize(M.fit())['std err']"
|
||||
"summarize(M.fit())['std err']\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0c11a71f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"cell_metadata_filter": "-all",
|
||||
"formats": "ipynb,md:myst",
|
||||
"main_language": "python"
|
||||
"main_language": "python",
|
||||
"notebook_metadata_filter": "-all"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -5,7 +5,9 @@
|
||||
"id": "d45c6d2b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Chapter 9"
|
||||
"\n",
|
||||
"# Chapter 9\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -26,18 +28,19 @@
|
||||
"id": "eeaa5be0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:31.933940Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:31.933716Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:32.774361Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:32.773846Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:41.109844Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:41.109434Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:41.992454Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:41.991881Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"from matplotlib.pyplot import subplots, cm\n",
|
||||
"import sklearn.model_selection as skm\n",
|
||||
"from ISLP import load_data, confusion_table"
|
||||
"from ISLP import load_data, confusion_table\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -55,17 +58,17 @@
|
||||
"id": "41a59634",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:32.776623Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:32.776343Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:32.807761Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:32.807471Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:41.994815Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:41.994482Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.026716Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.026409Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.svm import SVC\n",
|
||||
"from ISLP.svm import plot as plot_svm\n",
|
||||
"from sklearn.metrics import RocCurveDisplay"
|
||||
"from sklearn.metrics import RocCurveDisplay\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -83,15 +86,15 @@
|
||||
"id": "c9a175d7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:32.809461Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:32.809366Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:32.811185Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:32.810941Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.028394Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.028303Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.030112Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.029879Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"roc_curve = RocCurveDisplay.from_estimator # shorthand"
|
||||
"roc_curve = RocCurveDisplay.from_estimator # shorthand\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -123,11 +126,12 @@
|
||||
"id": "a7216b47",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:32.812622Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:32.812526Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:32.919064Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:32.918299Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.031508Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.031412Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.144107Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.141512Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -150,7 +154,7 @@
|
||||
"ax.scatter(X[:,0],\n",
|
||||
" X[:,1],\n",
|
||||
" c=y,\n",
|
||||
" cmap=cm.coolwarm);"
|
||||
" cmap=cm.coolwarm);\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -167,11 +171,12 @@
|
||||
"id": "ed329198",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:32.924346Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:32.922626Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:32.934623Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:32.934041Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.148661Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.148275Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.164597Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.162951Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -190,7 +195,7 @@
|
||||
],
|
||||
"source": [
|
||||
"svm_linear = SVC(C=10, kernel='linear')\n",
|
||||
"svm_linear.fit(X, y)"
|
||||
"svm_linear.fit(X, y)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,10 +215,10 @@
|
||||
"id": "95494b8b",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:32.938730Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:32.937883Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.117216Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.116876Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.170134Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.169857Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.356574Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.356275Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -233,7 +238,7 @@
|
||||
"plot_svm(X,\n",
|
||||
" y,\n",
|
||||
" svm_linear,\n",
|
||||
" ax=ax)"
|
||||
" ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -255,11 +260,12 @@
|
||||
"id": "98c2236f",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.119014Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.118891Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.258092Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.257817Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.358304Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.358185Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.497338Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.496986Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -280,7 +286,7 @@
|
||||
"plot_svm(X,\n",
|
||||
" y,\n",
|
||||
" svm_linear_small,\n",
|
||||
" ax=ax)"
|
||||
" ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -300,11 +306,12 @@
|
||||
"id": "b498f594",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.259755Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.259627Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.262432Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.262105Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.499133Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.499013Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.501656Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.501370Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -319,7 +326,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"svm_linear.coef_"
|
||||
"svm_linear.coef_\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -337,11 +344,12 @@
|
||||
"id": "b65e80d6",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.263978Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.263877Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.290665Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.290398Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.503232Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.503114Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.530360Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.530089Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -365,7 +373,7 @@
|
||||
" cv=kfold,\n",
|
||||
" scoring='accuracy')\n",
|
||||
"grid.fit(X, y)\n",
|
||||
"grid.best_params_"
|
||||
"grid.best_params_\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -384,11 +392,12 @@
|
||||
"id": "bba8fad7",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.292219Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.292135Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.294550Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.294318Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.533039Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.532868Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.535494Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.535174Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -403,7 +412,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"grid.cv_results_[('mean_test_score')]"
|
||||
"grid.cv_results_[('mean_test_score')]\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -424,17 +433,17 @@
|
||||
"id": "ad64269d",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.296023Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.295928Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.297787Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.297542Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.537255Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.537133Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.539335Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.538987Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_test = rng.standard_normal((20, 2))\n",
|
||||
"y_test = np.array([-1]*10+[1]*10)\n",
|
||||
"X_test[y_test==1] += 1"
|
||||
"X_test[y_test==1] += 1\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -453,10 +462,10 @@
|
||||
"id": "5107fca1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.299207Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.299107Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.304700Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.304434Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.540795Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.540697Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.546310Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.546056Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -520,7 +529,7 @@
|
||||
"source": [
|
||||
"best_ = grid.best_estimator_\n",
|
||||
"y_test_hat = best_.predict(X_test)\n",
|
||||
"confusion_table(y_test_hat, y_test)"
|
||||
"confusion_table(y_test_hat, y_test)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -540,10 +549,10 @@
|
||||
"id": "0320d9e0",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.306170Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.306087Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.310352Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.310107Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.547949Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.547859Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.552481Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.552209Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -608,7 +617,7 @@
|
||||
"svm_ = SVC(C=0.001,\n",
|
||||
" kernel='linear').fit(X, y)\n",
|
||||
"y_test_hat = svm_.predict(X_test)\n",
|
||||
"confusion_table(y_test_hat, y_test)"
|
||||
"confusion_table(y_test_hat, y_test)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -631,10 +640,10 @@
|
||||
"id": "84d7e778",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.311857Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.311774Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.434376Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.434084Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.554007Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.553921Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.677356Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.677072Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -652,7 +661,7 @@
|
||||
"source": [
|
||||
"X[y==1] += 1.9;\n",
|
||||
"fig, ax = subplots(figsize=(8,8))\n",
|
||||
"ax.scatter(X[:,0], X[:,1], c=y, cmap=cm.coolwarm);"
|
||||
"ax.scatter(X[:,0], X[:,1], c=y, cmap=cm.coolwarm);\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -669,10 +678,10 @@
|
||||
"id": "abb1f8be",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.436068Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.435948Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.440665Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.440404Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.679000Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.678886Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.683759Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.683448Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -736,7 +745,7 @@
|
||||
"source": [
|
||||
"svm_ = SVC(C=1e5, kernel='linear').fit(X, y)\n",
|
||||
"y_hat = svm_.predict(X)\n",
|
||||
"confusion_table(y_hat, y)"
|
||||
"confusion_table(y_hat, y)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -747,7 +756,7 @@
|
||||
"We fit the\n",
|
||||
"support vector classifier and plot the resulting hyperplane, using a\n",
|
||||
"very large value of `C` so that no observations are\n",
|
||||
"misclassified."
|
||||
"misclassified. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -756,11 +765,12 @@
|
||||
"id": "2e4ed2f5",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.442227Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.442140Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.558482Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.558203Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.685443Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.685337Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.800013Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.799680Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -779,7 +789,7 @@
|
||||
"plot_svm(X,\n",
|
||||
" y,\n",
|
||||
" svm_,\n",
|
||||
" ax=ax)"
|
||||
" ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -800,10 +810,10 @@
|
||||
"id": "164a611c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.560147Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.560028Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.565003Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.564720Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.801762Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.801639Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.806674Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.806389Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -867,7 +877,7 @@
|
||||
"source": [
|
||||
"svm_ = SVC(C=0.1, kernel='linear').fit(X, y)\n",
|
||||
"y_hat = svm_.predict(X)\n",
|
||||
"confusion_table(y_hat, y)"
|
||||
"confusion_table(y_hat, y)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -887,11 +897,12 @@
|
||||
"id": "c67591a1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.566489Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.566402Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.687614Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.687292Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.808296Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.808164Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.929878Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.929590Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -910,7 +921,7 @@
|
||||
"plot_svm(X,\n",
|
||||
" y,\n",
|
||||
" svm_,\n",
|
||||
" ax=ax)"
|
||||
" ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -939,10 +950,10 @@
|
||||
"id": "322be574",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.689275Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.689167Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.691316Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.690975Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.931502Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.931386Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:42.933592Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:42.933305Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -950,7 +961,7 @@
|
||||
"X = rng.standard_normal((200, 2))\n",
|
||||
"X[:100] += 2\n",
|
||||
"X[100:150] -= 2\n",
|
||||
"y = np.array([1]*150+[2]*50)"
|
||||
"y = np.array([1]*150+[2]*50)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -967,17 +978,18 @@
|
||||
"id": "04fda182",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.692938Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.692827Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.780286Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.779983Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:42.935158Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:42.935059Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:43.022251Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:43.021963Z"
|
||||
},
|
||||
"lines_to_next_cell": 2
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<matplotlib.collections.PathCollection at 0x1474d1090>"
|
||||
"<matplotlib.collections.PathCollection at 0x17fa62650>"
|
||||
]
|
||||
},
|
||||
"execution_count": 20,
|
||||
@@ -1000,7 +1012,7 @@
|
||||
"ax.scatter(X[:,0],\n",
|
||||
" X[:,1],\n",
|
||||
" c=y,\n",
|
||||
" cmap=cm.coolwarm)"
|
||||
" cmap=cm.coolwarm)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1019,10 +1031,10 @@
|
||||
"id": "0c2690d1",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.782021Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.781908Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:33.786110Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:33.785823Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:43.024031Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:43.023910Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:43.028070Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:43.027774Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1049,7 +1061,7 @@
|
||||
" test_size=0.5,\n",
|
||||
" random_state=0)\n",
|
||||
"svm_rbf = SVC(kernel=\"rbf\", gamma=1, C=1)\n",
|
||||
"svm_rbf.fit(X_train, y_train)"
|
||||
"svm_rbf.fit(X_train, y_train)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1058,7 +1070,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The plot shows that the resulting SVM has a decidedly non-linear\n",
|
||||
"boundary."
|
||||
"boundary. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1067,10 +1079,10 @@
|
||||
"id": "3eb171e8",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:33.787740Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:33.787646Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:34.044862Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:34.044553Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:43.029639Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:43.029527Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:43.285558Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:43.285220Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1090,7 +1102,7 @@
|
||||
"plot_svm(X_train,\n",
|
||||
" y_train,\n",
|
||||
" svm_rbf,\n",
|
||||
" ax=ax)"
|
||||
" ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1111,10 +1123,10 @@
|
||||
"id": "9a6b905b",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:34.046569Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:34.046449Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:34.197967Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:34.197661Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:43.287370Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:43.287120Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:43.439266Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:43.438933Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1136,7 +1148,7 @@
|
||||
"plot_svm(X_train,\n",
|
||||
" y_train,\n",
|
||||
" svm_rbf,\n",
|
||||
" ax=ax)"
|
||||
" ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1155,10 +1167,10 @@
|
||||
"id": "5ab01d6c",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:34.199716Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:34.199590Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:34.297753Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:34.297384Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:43.440929Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:43.440803Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:43.533538Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:43.533266Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1184,7 +1196,7 @@
|
||||
" cv=kfold,\n",
|
||||
" scoring='accuracy');\n",
|
||||
"grid.fit(X_train, y_train)\n",
|
||||
"grid.best_params_"
|
||||
"grid.best_params_\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1203,10 +1215,10 @@
|
||||
"id": "166a6acb",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:34.299759Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:34.299634Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:34.531852Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:34.531406Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:43.535272Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:43.535151Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:43.767970Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:43.767689Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1286,7 +1298,7 @@
|
||||
" ax=ax)\n",
|
||||
"\n",
|
||||
"y_hat_test = best_svm.predict(X_test)\n",
|
||||
"confusion_table(y_hat_test, y_test)"
|
||||
"confusion_table(y_hat_test, y_test)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1337,11 +1349,12 @@
|
||||
"id": "0607fc41",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:34.533790Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:34.533662Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:34.622732Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:34.622432Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:43.769863Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:43.769754Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:43.862697Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:43.862378Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1362,7 +1375,7 @@
|
||||
" y_train,\n",
|
||||
" name='Training',\n",
|
||||
" color='r',\n",
|
||||
" ax=ax);"
|
||||
" ax=ax);\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1381,10 +1394,10 @@
|
||||
"id": "5211a882",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:34.624457Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:34.624340Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:34.760006Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:34.759724Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:43.864472Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:43.864353Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:44.004720Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:44.004393Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1410,7 +1423,7 @@
|
||||
" y_train,\n",
|
||||
" name='Training $\\gamma=50$',\n",
|
||||
" color='r',\n",
|
||||
" ax=ax);"
|
||||
" ax=ax);\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1430,10 +1443,10 @@
|
||||
"id": "12acc4ff",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:34.761683Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:34.761571Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:34.765670Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:34.765355Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:44.006556Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:44.006435Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:44.010443Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:44.010158Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
@@ -1444,7 +1457,7 @@
|
||||
" name='Test $\\gamma=50$',\n",
|
||||
" color='b',\n",
|
||||
" ax=ax)\n",
|
||||
"fig;"
|
||||
"fig;\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1461,10 +1474,10 @@
|
||||
"id": "21c81913",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:34.767320Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:34.767196Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:34.862379Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:34.862109Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:44.012048Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:44.011950Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:44.108784Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:44.108364Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1492,7 +1505,7 @@
|
||||
" y_,\n",
|
||||
" name=name,\n",
|
||||
" ax=ax,\n",
|
||||
" color=c)"
|
||||
" color=c)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1516,10 +1529,10 @@
|
||||
"id": "2fff4fa8",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:34.864078Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:34.863962Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:34.950893Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:34.950574Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:44.110511Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:44.110386Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:44.198408Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:44.198028Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1540,7 +1553,7 @@
|
||||
"y = np.hstack([y, [0]*50])\n",
|
||||
"X[y==0,1] += 2\n",
|
||||
"fig, ax = subplots(figsize=(8,8))\n",
|
||||
"ax.scatter(X[:,0], X[:,1], c=y, cmap=cm.coolwarm);"
|
||||
"ax.scatter(X[:,0], X[:,1], c=y, cmap=cm.coolwarm);\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1557,11 +1570,12 @@
|
||||
"id": "5396f2df",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:34.952587Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:34.952475Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:35.530841Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:35.530380Z"
|
||||
}
|
||||
"iopub.execute_input": "2023-08-06T17:35:44.200116Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:44.200009Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:44.778024Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:44.777739Z"
|
||||
},
|
||||
"lines_to_next_cell": 0
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@@ -1586,7 +1600,7 @@
|
||||
" y,\n",
|
||||
" svm_rbf_3,\n",
|
||||
" scatter_cmap=cm.tab10,\n",
|
||||
" ax=ax)"
|
||||
" ax=ax)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1620,10 +1634,10 @@
|
||||
"id": "f63c575e",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:35.532698Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:35.532574Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:35.607009Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:35.606701Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:44.779762Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:44.779640Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:44.856770Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:44.856276Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1640,7 +1654,7 @@
|
||||
],
|
||||
"source": [
|
||||
"Khan = load_data('Khan')\n",
|
||||
"Khan['xtrain'].shape, Khan['xtest'].shape"
|
||||
"Khan['xtrain'].shape, Khan['xtest'].shape\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1657,7 +1671,7 @@
|
||||
"large number of features relative to the number of observations. This\n",
|
||||
"suggests that we should use a linear kernel, because the additional\n",
|
||||
"flexibility that will result from using a polynomial or radial kernel \n",
|
||||
"is unnecessary."
|
||||
"is unnecessary. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1666,10 +1680,10 @@
|
||||
"id": "32091338",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:35.608711Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:35.608596Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:35.637522Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:35.637236Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:44.858879Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:44.858701Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:44.889028Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:44.888737Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1758,7 +1772,7 @@
|
||||
"khan_linear = SVC(kernel='linear', C=10)\n",
|
||||
"khan_linear.fit(Khan['xtrain'], Khan['ytrain'])\n",
|
||||
"confusion_table(khan_linear.predict(Khan['xtrain']),\n",
|
||||
" Khan['ytrain'])"
|
||||
" Khan['ytrain'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1780,10 +1794,10 @@
|
||||
"id": "d9058023",
|
||||
"metadata": {
|
||||
"execution": {
|
||||
"iopub.execute_input": "2023-07-31T02:15:35.639109Z",
|
||||
"iopub.status.busy": "2023-07-31T02:15:35.639016Z",
|
||||
"iopub.status.idle": "2023-07-31T02:15:35.649929Z",
|
||||
"shell.execute_reply": "2023-07-31T02:15:35.649661Z"
|
||||
"iopub.execute_input": "2023-08-06T17:35:44.891247Z",
|
||||
"iopub.status.busy": "2023-08-06T17:35:44.891089Z",
|
||||
"iopub.status.idle": "2023-08-06T17:35:44.902454Z",
|
||||
"shell.execute_reply": "2023-08-06T17:35:44.902106Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
@@ -1870,7 +1884,7 @@
|
||||
],
|
||||
"source": [
|
||||
"confusion_table(khan_linear.predict(Khan['xtest']),\n",
|
||||
" Khan['ytest'])"
|
||||
" Khan['ytest'])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1878,15 +1892,16 @@
|
||||
"id": "d0d5aba4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We see that using `C=10` yields two test set errors on these data."
|
||||
"We see that using `C=10` yields two test set errors on these data.\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"jupytext": {
|
||||
"cell_metadata_filter": "-all",
|
||||
"formats": "ipynb,md:myst",
|
||||
"main_language": "python"
|
||||
"main_language": "python",
|
||||
"notebook_metadata_filter": "-all"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
Reference in New Issue
Block a user