Add files via upload

2024-02-20 23:01:37 -08:00 · 2024-02-20 23:01:37 -08:00 · d5d31f932d
commit d5d31f932d
parent 1ae17ba5b6
5 changed files with 7754 additions and 5900 deletions
--- a/ipynb/Bike-Stats.ipynb
+++ b/ipynb/Bike-Stats.ipynb
--- a/ipynb/BikeCode.ipynb
+++ b/ipynb/BikeCode.ipynb
@ -13,7 +13,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
@ -31,24 +31,24 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Reading Data: `rides` and `yearly`\n",
+    "# Reading Data: `rides`, `yearly`, and `daily`\n",
    "\n",
-    "I saved a bunch of my recorded [Strava](https://www.strava.com/athletes/575579) rides, most of them longer than 25 miles, as [`bikerides.tsv`](bikerides.tsv).  The columns are: the date; the year; a title; the elapsed time of the ride; the length of the ride in miles; and the total climbing in feet, e.g.: \n",
+    "I saved a bunch of my recorded [Strava](https://www.strava.com/athletes/575579) rides, most of them longer than 25 miles, as [`bikerides.tsv`](bikerides.tsv).  The tab-separated columns are: the date; the year; a title; the elapsed time of the ride; the length of the ride in miles; and the total climbing in feet, e.g.: \n",
    "\n",
-    "    Mon, 10/5\t2020\tHalf way around the bay on bay trail\t6:26:35\t80.05\t541\n",
+    "    Mon, 10/5/2020\tHalf way around the bay on bay trail\t6:26:35\t80.05\t541\n",
    "    \n",
    "I parse the file into the pandas dataframe `rides`, adding derived columns for miles per hour, vertical meters climbed per hour (VAM), grade in feet per mile, grade in percent, and kilometers ridden:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "def parse_rides(lines):\n",
    "    \"\"\"Parse a bikerides.tsv file.\"\"\"\n",
-    "    return drop_index(add_columns(pd.read_table(lines, comment='#',\n",
+    "    return drop_index(add_ride_columns(pd.read_table(lines, comment='#',\n",
    "                       converters=dict(hours=parse_hours, feet=parse_int))))\n",
    "\n",
    "def parse_hours(time: str) -> float: \n",
@ -57,18 +57,20 @@
    "              for i, x in enumerate(reversed(time.split(':'))))\n",
    "    return round(hrs, 2)\n",
    "\n",
-    "def parse_int(field: str) -> int: return int(field.replace(',', ''))\n",
+    "def parse_int(field: str) -> int: return int(field.replace(',', '').replace('ft', '').replace('mi', ''))\n",
    "\n",
-    "def add_columns(rides) -> pd.DataFrame:\n",
+    "def add_ride_columns(rides) -> pd.DataFrame:\n",
    "    \"\"\"Compute new columns from existing ones.\"\"\"\n",
    "    mi, hr, ft = rides['miles'], rides['hours'], rides['feet']\n",
+    "    if 'date' in rides and 'year' not in rides:\n",
+    "        rides.insert(1, \"year\", [int(str(d).split('/')[-1]) for d in rides['date'].tolist()])\n",
    "    return rides.assign(\n",
    "        mph=round(mi / hr, 2),\n",
    "        vam=round(ft / hr / 3.28084),\n",
-    "        fpm=round(ft / mi),\n",
+    "        fpmi=round(ft / mi),\n",
    "        pct=round(ft / mi * 100 / 5280, 2),\n",
    "        kms=round(mi * 1.609, 2),\n",
-    "        km_up=round(ft * 0.0003048, 1))\n",
+    "        meters=round(ft * 0.3048))\n",
    "\n",
    "def drop_index(frame) -> pd.DataFrame:\n",
    "    \"\"\"Drop the index column.\"\"\"\n",
@ -78,15 +80,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 125,
   "metadata": {},
   "outputs": [],
   "source": [
    "rides  = parse_rides(open('bikerides.tsv'))\n",
-    "yearly = parse_rides(open('bikeyears.tsv')).drop(columns=['date', 'title'])\n",
+    "\n",
+    "yearly = parse_rides(open('bikeyears.tsv')).drop(columns='date')\n",
+    "\n",
    "daily  = yearly.copy()\n",
-    "for name in 'hours miles feet kms km_up'.split():\n",
-    "    daily[name] = round(daily[name].map(lambda x: x / 350), 3 if name == 'km_up' else 1)"
+    "for name in 'hours miles feet kms meters'.split():\n",
+    "    daily[name] = round(daily[name].map(lambda x: x / (6 * 52)), 1)"
   ]
  },
  {
@ -106,7 +110,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
@ -118,30 +122,12 @@
    "        title, mi, ft, *times = segment.split(',')[:5]\n",
    "        for time in times:\n",
    "            records.append((title, parse_hours(time), float(mi), parse_int(ft)))\n",
-    "    return add_columns(pd.DataFrame(records, columns=('title', 'hours', 'miles', 'feet')))"
+    "    return add_ride_columns(pd.DataFrame(records, columns=('title', 'hours', 'miles', 'feet')))"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "segments = parse_segments(open('bikesegments.csv'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "places = pd.read_table(open('bikeplaceshort.csv'), sep=',', comment='#')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
@ -160,18 +146,25 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
+    "segments = parse_segments(open('bikesegments.csv'))\n",
+    "\n",
+    "places = drop_index(pd.read_table(open('bikeplaceshort.csv'), sep=',', comment='#'))\n",
+    "\n",
    "tiles = drop_index(pd.DataFrame(columns='date square cluster total comment'.split(), data=[\n",
-    "   ('06/30/2023', 13, 689, 2640, 'Rides in east Bay!9298603815'),\n",
-    "   ('04/14/2023', 13, 630, 2595, 'Black Sands Beach connects Marin to max cluster!8891171008'),\n",
-    "   ('03/04/2023', 13, 583, 2574, 'Almaden rides connects Gilroy to max cluster!8654437264'),\n",
-    "   ('10/22/2022', 13, 396, 2495, 'Alviso levees to get to 13x13 max square!8003921626'),\n",
-    "   ('10/16/2022', 12, 393, 2492, 'Milpitas ride connects East Bay to max cluster!7974994605'),\n",
-    "   ('09/08/2022', 11, 300, 2487, 'First started tracking tiles')])\n",
-    "   ).style.format({'comment': make_clickable, 'date': link_date})"
+    "    ('01/01/2024', 14, 1056, 3105, 'Start of this year'),\n",
+    "    ('12/08/2023', 14, 1042, 3084, 'Benicia ride connects East Bay and Napa clusters!10350071201'),\n",
+    "    ('11/05/2023', 14,  932, 2914, 'Alum Rock ride gets 14x14 max square!8850905872'),\n",
+    "    ('06/30/2023', 13,  689, 2640, 'Rides in east Bay fill in holes!9298603815'),\n",
+    "    ('04/14/2023', 13,  630, 2595, 'Black Sands Beach low-tide hike connects Marin to max cluster!8891171008'),\n",
+    "    ('03/04/2023', 13,  583, 2574, 'Almaden rides connects Gilroy to max cluster!8654437264'),\n",
+    "    ('10/22/2022', 13,  396, 2495, 'Alviso levees to get to 13x13 max square!8003921626'),\n",
+    "    ('10/16/2022', 12,  393, 2492, 'Milpitas ride connects East Bay to max cluster!7974994605'),\n",
+    "    ('09/08/2022', 11,  300, 2487, 'First started tracking tiles')])\n",
+    "    ).style.format({'comment': make_clickable, 'date': link_date})"
   ]
  },
  {
@ -183,7 +176,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
@ -216,7 +209,7 @@
    "    \"\"\"Given a ride distance in miles and total climb in feet, estimate time in minutes.\"\"\"\n",
    "    return round(60 * miles / estimator(feet / miles))\n",
    "\n",
-    "def top(frame, field, n=20): return frame.sort_values(field, ascending=False).head(n)"
+    "def top(frame, field, n=20): return drop_index(frame.sort_values(field, ascending=False).head(n))"
   ]
  },
  {
@ -228,23 +221,33 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
+    "def mapl(f, *values): return list(map(f, *values))\n",
+    "\n",
    "def wandering(places=places, by=['pct']):\n",
    "    \"All those who wander are not lost.\" # Also try by=['cat', 'pct']\n",
-    "    frame = places.sort_values(by=by, ascending=('pct' not in by))\n",
    "    M = 1_000_000\n",
-    "    for i, (name, miles, county, pct) in frame.iterrows():\n",
-    "        # Some fiddling to get the format right\n",
-    "        p  = f'{pct:.1f}' if (pct > 0.1) else f'{pct:.3f}'\n",
-    "        mymiles = pct / 100 * miles\n",
-    "        done = f'{rounded(mymiles)}/{rounded(miles)} mi'\n",
-    "        togo = next((f'{rounded(target / 100 * miles - mymiles):>5} mi for {target}%' \n",
-    "                     for target in (0.02, 0.1, 0.2, 1, 2, 25, 50, 90, 99)\n",
-    "                     if mymiles < target / 100 * miles), '')\n",
-    "        print(f'{county} {p:>5}% {name:25} {done:>15} {togo}') \n",
+    "    F = drop_index(places.sort_values(by=by, ascending=('pct' not in by)))\n",
+    "    pd.set_option('display.max_rows', None)\n",
+    "    return pd.DataFrame(\n",
+    "        {'pct': [f'{p:.1f}%' if (p > 1) else f'{p:.3f}%' for p in F['pct']],\n",
+    "         'county': F['county'],\n",
+    "         'name': F['name'],\n",
+    "         'total': F['miles'],\n",
+    "         'done': mapl(rounded, F['miles'] * F['pct'] / 100),\n",
+    "         'to next badge': mapl(to_go, F['pct'], F['miles'])})\n",
+    "\n",
+    "\n",
+    "def to_go(pct, miles, targets=(0.02, 0.1, 0.2, 1, 2, 25, 50, 90, 99)):\n",
+    "    \"\"\"Describe next target to hit to get a badge.\"\"\"\n",
+    "    done = pct * miles / 100\n",
+    "    return next((f'{rounded(target / 100 * miles - done):>5} mi to {target}%' \n",
+    "                     for target in targets\n",
+    "                     if done < target / 100 * miles), \n",
+    "                '')\n",
    "        \n",
    "def rounded(x: float) -> str: \n",
    "    \"\"\"Round x to 3 spaces wide (if possible).\"\"\"\n",
@ -263,32 +266,36 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_leaders(data):\n",
    "    \"\"\"Make a dataframe of leaders in two counties.\"\"\"\n",
-    "    leaders = pd.DataFrame(data, columns=['Name', 'Initials', 'SMC %', 'SCC %', 'Front?'])\n",
+    "    leaders = pd.DataFrame(data, columns=['Name', 'Initials', 'SMC %', 'SCC %'])\n",
    "    leaders['SMC miles'] = [round(2814 * d[2] / 100) for d in data]\n",
    "    leaders['SCC miles'] = [round(7569 * d[3] / 100) for d in data]\n",
    "    leaders['Total miles'] = leaders['SMC miles'] + leaders['SCC miles']\n",
    "    leaders['Avg %'] = (leaders['SMC %'] + leaders['SCC %']) / 2\n",
    "    return drop_index(leaders.sort_values('Avg %', ascending=False))\n",
    "\n",
-    "leaders = make_leaders([ # Data as of Sept 20, 2023 (Name, Initials, SMC, SCC, Frontier?)\n",
-    "    ('Barry Mann', 'BM', 76.97, 30.21, 1),   ('Jason Molenda', 'JM', 7.13, 55.39, 1),  \n",
-    "    ('Peter Norvig', 'PN', 61.56, 32.8, 1), ('Brian Feinberg', 'BF', 32.5, 43.68, 1),\n",
-    "    ('Jim Brooks', 'JB', 4.23, 44.36, 0),    ('Megan Gardner', 'MG', 97.62, 8.69, 1),\n",
-    "    ('Matthew Ring', 'MR', 78.85, 1.48, 0),  ('Elliot  Hoff', 'EF', 52.88, 8.14, 0)])\n",
+    "leaders = make_leaders([ # Data as of Jan 3, 2024 (Name, Initials, SMC, SCC)\n",
+    "    ('Megan Gardner',  'MG', 99.01, 13.6),\n",
+    "    ('Barry Mann',     'BM', 77.41, 30.38), \n",
+    "    ('Peter Norvig',   'PN', 63.5,  33.0),\n",
+    "    ('Brian Feinberg', 'BF', 32.5,  43.9),\n",
+    "    ('Jason Molenda',  'JM',  7.56, 56.25)  \n",
+    "    ])\n",
    "                   \n",
    "def pareto_front(leaders):\n",
-    "    ax = leaders.plot('SMC %', 'SCC %', grid=True, kind='scatter')\n",
-    "    front = sorted((x, y) for i, (_, _, x, y, f, *_) in leaders.iterrows() if f)\n",
+    "    ax = leaders.plot('SMC %', 'SCC %',  kind='scatter')\n",
+    "    front = sorted((x, y) for i, (_, _, x, y, *_) in leaders.iterrows())\n",
    "    ax.plot(*zip(*front), ':'); ax.axis('square'); grid()\n",
+    "    ax.set_xlabel('San Mateo County %')\n",
+    "    ax.set_ylabel('Santa Clara County %')\n",
    "    for i, (name, initials, x, y, *_) in leaders.iterrows():\n",
    "        ax.text(x - 2, y + 2, initials)\n",
-    "    return leaders.drop(columns=['Front?'])"
+    "    return leaders"
   ]
  },
  {
@ -300,7 +307,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
@ -314,16 +321,14 @@
    "    \"\"\"The number of rides needed to reach an Eddington number target.\"\"\"\n",
    "    return target - sum(distances >= target)\n",
    "\n",
-    "def Ed_gaps(rides, E_km=100, E_mi=67, N=11) -> dict:\n",
+    "def Ed_gaps(rides, E_km=103, E_mi=69, N=9) -> dict:\n",
    "    \"\"\"A table of gaps to Eddington numbers by year.\"\"\"\n",
-    "    data = [(E_km + d, sum(rides.kms >= E_km + d), Ed_gap(rides.kms,   E_km + d), \n",
-    "             E_mi + d, sum(rides.miles >= E_mi + d), Ed_gap(rides.miles, E_mi + d))\n",
+    "    data = [(E_km + d, Ed_gap(rides.kms,   E_km + d), E_mi + d, Ed_gap(rides.miles, E_mi + d))\n",
    "            for d in range(N)]\n",
-    "    df = pd.DataFrame(data, columns=['kms', 'km rides', 'kms gap', \n",
-    "                                     'miles', 'miles rides', 'miles gap'])\n",
+    "    df = pd.DataFrame(data, columns=['kms', 'kms gap',  'miles',  'miles gap'])\n",
    "    return drop_index(df)\n",
    "\n",
-    "def Ed_progress(rides, years=range(2023, 2013, -1)) -> pd.DataFrame:\n",
+    "def Ed_progress(rides, years=range(2024, 2013, -1)) -> pd.DataFrame:\n",
    "    \"\"\"A table of Eddington numbers by year, and a plot.\"\"\"\n",
    "    def Ed(year, unit): return Ed_number(rides[rides['year'] <= year], unit)\n",
    "    data  = [(y, Ed(y, 'kms'), Ed(y, 'miles')) for y in years]\n",
--- a/ipynb/Goldberg.ipynb
+++ b/ipynb/Goldberg.ipynb
--- a/ipynb/Sudoku.ipynb
+++ b/ipynb/Sudoku.ipynb
--- a/ipynb/bikerides.tsv
+++ b/ipynb/bikerides.tsv