Initial commit
This commit is contained in:
55
Solutions/2_5/cta.py
Normal file
55
Solutions/2_5/cta.py
Normal file
@@ -0,0 +1,55 @@
|
||||
# cta.py
|
||||
|
||||
from collections import defaultdict, Counter
|
||||
import tracemalloc
|
||||
import readrides
|
||||
|
||||
tracemalloc.start()
|
||||
|
||||
rows = readrides.read_rides_as_dicts('../../Data/ctabus.csv')
|
||||
|
||||
# --------------------------------------------------
|
||||
# Question 1: How many bus routes are in Chicago?
|
||||
# Solution: Use a set to get unique values.
|
||||
|
||||
routes = set()
|
||||
for row in rows:
|
||||
routes.add(row['route'])
|
||||
print(len(routes), 'routes')
|
||||
|
||||
# --------------------------------------------------
|
||||
# Question 2: How many people rode route 22 on February 2, 2011?
|
||||
# Solution: Make dictionary with composite keys
|
||||
|
||||
by_route_date = { }
|
||||
for row in rows:
|
||||
by_route_date[row['route'], row['date']] = row['rides']
|
||||
|
||||
print('Rides on Route 22, February 2, 2011:', by_route_date['22','02/02/2011'])
|
||||
|
||||
# --------------------------------------------------
|
||||
# Question 3: Total number of rides per route
|
||||
# Solution: Use a counter to tabulate things
|
||||
rides_per_route = Counter()
|
||||
for row in rows:
|
||||
rides_per_route[row['route']] += row['rides']
|
||||
|
||||
# Make a table showing the routes and a count ranked by popularity
|
||||
for route, count in rides_per_route.most_common():
|
||||
print('%5s %10d' % (route, count))
|
||||
|
||||
# --------------------------------------------------
|
||||
# Question 4: Routes with greatest increase in ridership 2001 - 2011
|
||||
# Solution: Counters embedded inside a defaultdict
|
||||
|
||||
rides_by_year = defaultdict(Counter)
|
||||
for row in rows:
|
||||
year = row['date'].split('/')[2]
|
||||
rides_by_year[year][row['route']] += row['rides']
|
||||
|
||||
diffs = rides_by_year['2011'] - rides_by_year['2001']
|
||||
for route, diff in diffs.most_common(5):
|
||||
print(route, diff)
|
||||
|
||||
# ---- Memory use
|
||||
print('Memory Use: Current %d, Peak %d' % tracemalloc.get_traced_memory())
|
||||
144
Solutions/2_5/readrides.py
Normal file
144
Solutions/2_5/readrides.py
Normal file
@@ -0,0 +1,144 @@
|
||||
# readrides.py
|
||||
|
||||
import csv
|
||||
|
||||
def read_rides_as_tuples(filename):
|
||||
'''
|
||||
Read the bus ride data as a list of tuples
|
||||
'''
|
||||
records = []
|
||||
with open(filename) as f:
|
||||
rows = csv.reader(f)
|
||||
headings = next(rows) # Skip headers
|
||||
for row in rows:
|
||||
route = row[0]
|
||||
date = row[1]
|
||||
daytype = row[2]
|
||||
rides = int(row[3])
|
||||
record = (route, date, daytype, rides)
|
||||
records.append(record)
|
||||
return records
|
||||
|
||||
def read_rides_as_dicts(filename):
|
||||
'''
|
||||
Read the bus ride data as a list of dicts
|
||||
'''
|
||||
records = []
|
||||
with open(filename) as f:
|
||||
rows = csv.reader(f)
|
||||
headings = next(rows) # Skip headers
|
||||
for row in rows:
|
||||
route = row[0]
|
||||
date = row[1]
|
||||
daytype = row[2]
|
||||
rides = int(row[3])
|
||||
record = {
|
||||
'route': route,
|
||||
'date': date,
|
||||
'daytype': daytype,
|
||||
'rides' : rides
|
||||
}
|
||||
records.append(record)
|
||||
return records
|
||||
|
||||
class Row:
|
||||
__slots__ = ('route', 'date', 'daytype', 'rides')
|
||||
def __init__(self, route, date, daytype, rides):
|
||||
self.route = route
|
||||
self.date = date
|
||||
self.daytype = daytype
|
||||
self.rides = rides
|
||||
|
||||
def read_rides_as_instances(filename):
|
||||
'''
|
||||
Read the bus ride data as a list of instances
|
||||
'''
|
||||
records = []
|
||||
with open(filename) as f:
|
||||
rows = csv.reader(f)
|
||||
headings = next(rows) # Skip headers
|
||||
for row in rows:
|
||||
route = row[0]
|
||||
date = row[1]
|
||||
daytype = row[2]
|
||||
rides = int(row[3])
|
||||
record = Row(route, date, daytype, rides)
|
||||
records.append(record)
|
||||
return records
|
||||
|
||||
# Read as columns
|
||||
def read_rides_as_columns(filename):
|
||||
'''
|
||||
Read the bus ride data into 4 lists, representing columns
|
||||
'''
|
||||
routes = []
|
||||
dates = []
|
||||
daytypes = []
|
||||
numrides = []
|
||||
with open(filename) as f:
|
||||
rows = csv.reader(f)
|
||||
headings = next(rows) # Skip headers
|
||||
for row in rows:
|
||||
routes.append(row[0])
|
||||
dates.append(row[1])
|
||||
daytypes.append(row[2])
|
||||
numrides.append(int(row[3]))
|
||||
return dict(routes=routes, dates=dates, daytypes=daytypes, numrides=numrides)
|
||||
|
||||
# The great "fake"
|
||||
|
||||
import collections
|
||||
class RideData(collections.Sequence):
|
||||
def __init__(self):
|
||||
# Each value is a list with all of the values (a column)
|
||||
self.routes = []
|
||||
self.dates = []
|
||||
self.daytypes = []
|
||||
self.numrides = []
|
||||
|
||||
def __len__(self):
|
||||
# All lists assumed to have the same length
|
||||
return len(self.routes)
|
||||
|
||||
def append(self, d):
|
||||
self.routes.append(d['route'])
|
||||
self.dates.append(d['date'])
|
||||
self.daytypes.append(d['daytype'])
|
||||
self.numrides.append(d['rides'])
|
||||
|
||||
def __getitem__(self, index):
|
||||
return { 'route': self.routes[index],
|
||||
'date': self.dates[index],
|
||||
'daytype': self.daytypes[index],
|
||||
'rides': self.numrides[index] }
|
||||
|
||||
# Modified version using RideData
|
||||
def read_rides_as_dicts(filename):
|
||||
'''
|
||||
Read the bus ride data as a list of dicts
|
||||
'''
|
||||
records = RideData()
|
||||
with open(filename) as f:
|
||||
rows = csv.reader(f)
|
||||
headings = next(rows) # Skip headers
|
||||
for row in rows:
|
||||
route = row[0]
|
||||
date = row[1]
|
||||
daytype = row[2]
|
||||
rides = int(row[3])
|
||||
record = {
|
||||
'route': route,
|
||||
'date': date,
|
||||
'daytype': daytype,
|
||||
'rides' : rides
|
||||
}
|
||||
records.append(record)
|
||||
return records
|
||||
|
||||
if __name__ == '__main__':
|
||||
import tracemalloc
|
||||
tracemalloc.start()
|
||||
read_rides = read_rides_as_dicts # Change to as_dicts, as_instances, etc.
|
||||
rides = read_rides("../../Data/ctabus.csv")
|
||||
|
||||
print('Memory Use: Current %d, Peak %d' % tracemalloc.get_traced_memory())
|
||||
Reference in New Issue
Block a user