diff --git a/11_midlevel_data.ipynb b/11_midlevel_data.ipynb index 6520e93..af8367c 100644 --- a/11_midlevel_data.ipynb +++ b/11_midlevel_data.ipynb @@ -132,7 +132,7 @@ { "data": { "text/plain": [ - "(#228) ['xxbos','xxmaj','this','movie',',','which','i','just','discovered','at'...]" + "(#374) ['xxbos','xxmaj','well',',','\"','cube','\"','(','1997',')'...]" ] }, "execution_count": null, @@ -162,7 +162,7 @@ { "data": { "text/plain": [ - "tensor([ 2, 8, 20, 27, 11, 88, 18, 53, 3286, 45])" + "tensor([ 2, 8, 76, 10, 23, 3112, 23, 34, 3113, 33])" ] }, "execution_count": null, @@ -192,7 +192,7 @@ { "data": { "text/plain": [ - "(#10) ['xxbos','xxmaj','this','movie',',','which','i','just','discovered','at']" + "(#10) ['xxbos','xxmaj','well',',','\"','cube','\"','(','1997',')']" ] }, "execution_count": null, @@ -219,7 +219,7 @@ { "data": { "text/plain": [ - "'xxbos xxmaj this movie , which i just discovered at'" + "'xxbos xxmaj well , \" cube \" ( 1997 )'" ] }, "execution_count": null, @@ -256,8 +256,8 @@ { "data": { "text/plain": [ - "((#228) ['xxbos','xxmaj','this','movie',',','which','i','just','discovered','at'...],\n", - " (#238) ['xxbos','i','stopped','watching','this','film','half','way','through','.'...])" + "((#374) ['xxbos','xxmaj','well',',','\"','cube','\"','(','1997',')'...],\n", + " (#207) ['xxbos','xxmaj','conrad','xxmaj','hall','went','out','with','a','bang'...])" ] }, "execution_count": null, diff --git a/clean/11_midlevel_data.ipynb b/clean/11_midlevel_data.ipynb index d78cb56..25bd7a0 100644 --- a/clean/11_midlevel_data.ipynb +++ b/clean/11_midlevel_data.ipynb @@ -76,7 +76,7 @@ { "data": { "text/plain": [ - "(#228) ['xxbos','xxmaj','this','movie',',','which','i','just','discovered','at'...]" + "(#374) ['xxbos','xxmaj','well',',','\"','cube','\"','(','1997',')'...]" ] }, "execution_count": null, @@ -99,7 +99,7 @@ { "data": { "text/plain": [ - "tensor([ 2, 8, 20, 27, 11, 88, 18, 53, 3286, 45])" + "tensor([ 2, 8, 76, 10, 23, 3112, 23, 34, 3113, 33])" ] }, "execution_count": null, @@ -122,30 +122,7 @@ { "data": { "text/plain": [ - "tensor([ 2, 8, 20, 27, 11, 88, 18, 53, 3286, 45])" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "num = Numericalize()\n", - "num.setup(toks)\n", - "nums = toks.map(num)\n", - "nums[0][:10]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(#10) ['xxbos','xxmaj','this','movie',',','which','i','just','discovered','at']" + "(#10) ['xxbos','xxmaj','well',',','\"','cube','\"','(','1997',')']" ] }, "execution_count": null, @@ -165,7 +142,7 @@ { "data": { "text/plain": [ - "'xxbos xxmaj this movie , which i just discovered at'" + "'xxbos xxmaj well , \" cube \" ( 1997 )'" ] }, "execution_count": null, @@ -185,8 +162,8 @@ { "data": { "text/plain": [ - "((#228) ['xxbos','xxmaj','this','movie',',','which','i','just','discovered','at'...],\n", - " (#238) ['xxbos','i','stopped','watching','this','film','half','way','through','.'...])" + "((#374) ['xxbos','xxmaj','well',',','\"','cube','\"','(','1997',')'...],\n", + " (#207) ['xxbos','xxmaj','conrad','xxmaj','hall','went','out','with','a','bang'...])" ] }, "execution_count": null, @@ -269,7 +246,7 @@ { "data": { "text/plain": [ - "(3.0, 5.0, 2.0)" + "(3.0, -1.0, 2.0)" ] }, "execution_count": null,