Treat TK
This commit is contained in:
@@ -6,7 +6,15 @@
|
||||
"metadata": {
|
||||
"hide_input": true
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /home/sgugger/.kaggle/kaggle.json'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#hide\n",
|
||||
"from utils import *\n",
|
||||
@@ -87,7 +95,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Path('/home/jhoward/.fastai/archive/bluebook')"
|
||||
"Path('/home/sgugger/.fastai/archive/bluebook')"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
@@ -118,7 +126,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(#7) [Path('TrainAndValid.csv'),Path('Machine_Appendix.csv'),Path('random_forest_benchmark_test.csv'),Path('Test.csv'),Path('median_benchmark.csv'),Path('ValidSolution.csv'),Path('Valid.csv')]"
|
||||
"(#7) [Path('Valid.csv'),Path('Machine_Appendix.csv'),Path('ValidSolution.csv'),Path('TrainAndValid.csv'),Path('random_forest_benchmark_test.csv'),Path('Test.csv'),Path('median_benchmark.csv')]"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
@@ -423,20 +431,8 @@
|
||||
" <th>saleIs_quarter_start</th>\n",
|
||||
" <th>saleIs_year_end</th>\n",
|
||||
" <th>saleIs_year_start</th>\n",
|
||||
" <th>SalesID_na</th>\n",
|
||||
" <th>MachineID_na</th>\n",
|
||||
" <th>ModelID_na</th>\n",
|
||||
" <th>datasource_na</th>\n",
|
||||
" <th>auctioneerID_na</th>\n",
|
||||
" <th>YearMade_na</th>\n",
|
||||
" <th>MachineHoursCurrentMeter_na</th>\n",
|
||||
" <th>saleYear_na</th>\n",
|
||||
" <th>saleMonth_na</th>\n",
|
||||
" <th>saleWeek_na</th>\n",
|
||||
" <th>saleDay_na</th>\n",
|
||||
" <th>saleDayofweek_na</th>\n",
|
||||
" <th>saleDayofyear_na</th>\n",
|
||||
" <th>saleElapsed_na</th>\n",
|
||||
" <th>SalesID</th>\n",
|
||||
" <th>MachineID</th>\n",
|
||||
" <th>ModelID</th>\n",
|
||||
@@ -509,32 +505,20 @@
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>1139246</td>\n",
|
||||
" <td>999089</td>\n",
|
||||
" <td>3157</td>\n",
|
||||
" <td>121</td>\n",
|
||||
" <td>1139246.0</td>\n",
|
||||
" <td>999089.0</td>\n",
|
||||
" <td>3157.0</td>\n",
|
||||
" <td>121.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>2004</td>\n",
|
||||
" <td>2004.0</td>\n",
|
||||
" <td>68.0</td>\n",
|
||||
" <td>2006</td>\n",
|
||||
" <td>11</td>\n",
|
||||
" <td>46</td>\n",
|
||||
" <td>16</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>320</td>\n",
|
||||
" <td>1163635200</td>\n",
|
||||
" <td>2006.0</td>\n",
|
||||
" <td>11.0</td>\n",
|
||||
" <td>46.0</td>\n",
|
||||
" <td>16.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>320.0</td>\n",
|
||||
" <td>1.163635e+09</td>\n",
|
||||
" <td>11.097410</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@@ -591,32 +575,20 @@
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>1139248</td>\n",
|
||||
" <td>117657</td>\n",
|
||||
" <td>77</td>\n",
|
||||
" <td>121</td>\n",
|
||||
" <td>1139248.0</td>\n",
|
||||
" <td>117657.0</td>\n",
|
||||
" <td>77.0</td>\n",
|
||||
" <td>121.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>1996</td>\n",
|
||||
" <td>1996.0</td>\n",
|
||||
" <td>4640.0</td>\n",
|
||||
" <td>2004</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>13</td>\n",
|
||||
" <td>26</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>86</td>\n",
|
||||
" <td>1080259200</td>\n",
|
||||
" <td>2004.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>13.0</td>\n",
|
||||
" <td>26.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>86.0</td>\n",
|
||||
" <td>1.080259e+09</td>\n",
|
||||
" <td>10.950807</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
@@ -673,32 +645,20 @@
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>False</td>\n",
|
||||
" <td>1139249</td>\n",
|
||||
" <td>434808</td>\n",
|
||||
" <td>7009</td>\n",
|
||||
" <td>121</td>\n",
|
||||
" <td>1139249.0</td>\n",
|
||||
" <td>434808.0</td>\n",
|
||||
" <td>7009.0</td>\n",
|
||||
" <td>121.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>2001</td>\n",
|
||||
" <td>2001.0</td>\n",
|
||||
" <td>2838.0</td>\n",
|
||||
" <td>2004</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>26</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>57</td>\n",
|
||||
" <td>1077753600</td>\n",
|
||||
" <td>2004.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>26.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>57.0</td>\n",
|
||||
" <td>1.077754e+09</td>\n",
|
||||
" <td>9.210340</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
@@ -716,6 +676,66 @@
|
||||
"to.show(3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>state</th>\n",
|
||||
" <th>ProductGroup</th>\n",
|
||||
" <th>Drive_System</th>\n",
|
||||
" <th>Enclosure</th>\n",
|
||||
" <th>SalePrice</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Alabama</td>\n",
|
||||
" <td>WL</td>\n",
|
||||
" <td>#na#</td>\n",
|
||||
" <td>EROPS w AC</td>\n",
|
||||
" <td>11.097410</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>North Carolina</td>\n",
|
||||
" <td>WL</td>\n",
|
||||
" <td>#na#</td>\n",
|
||||
" <td>EROPS w AC</td>\n",
|
||||
" <td>10.950807</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>New York</td>\n",
|
||||
" <td>SSL</td>\n",
|
||||
" <td>#na#</td>\n",
|
||||
" <td>OROPS</td>\n",
|
||||
" <td>9.210340</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"to1 = TabularPandas(df, procs, ['state', 'ProductGroup', 'Drive_System', 'Enclosure'], [], y_names=dep_var, splits=splits)\n",
|
||||
"to1.show(3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -818,6 +838,80 @@
|
||||
"to.items.head(3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>state</th>\n",
|
||||
" <th>ProductGroup</th>\n",
|
||||
" <th>Drive_System</th>\n",
|
||||
" <th>Enclosure</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>33</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>32</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" state ProductGroup Drive_System Enclosure\n",
|
||||
"0 1 6 0 3\n",
|
||||
"1 33 6 0 3\n",
|
||||
"2 32 3 0 6"
|
||||
]
|
||||
},
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"to1.items[['state', 'ProductGroup', 'Drive_System', 'Enclosure']].head(3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -8203,7 +8297,14 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### fastai's Tabular classes"
|
||||
"### Sidebar: fastai's Tabular classes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### End sidebar"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -8254,7 +8355,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Combining embeddings with other methods"
|
||||
"### Combining embeddings with other methods"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user