add chapter 11
This commit is contained in:
parent
3d33b999c9
commit
a7f0c90b46
6
appB.jl
6
appB.jl
@ -165,6 +165,12 @@ proptable([walk_unique_2ahead() for _ in 1:10^5])
|
||||
@time wide = DataFrame(ones(1, 10_000), :auto);
|
||||
@time Tables.columntable(wide);
|
||||
|
||||
# Code for exercise 11.2
|
||||
|
||||
using Statistics
|
||||
Dict(key.city => mean(df.rainfall) for (key, df) in pairs(gdf_city))
|
||||
combine(gdf_city, :rainfall => mean)
|
||||
|
||||
# Code for exercise 12.1
|
||||
|
||||
cg = complete_graph(37700)
|
||||
|
75
ch11.jl
75
ch11.jl
@ -4,6 +4,12 @@
|
||||
|
||||
# Code for section 11.1
|
||||
|
||||
# deserialization of source data frame
|
||||
|
||||
using DataFrames
|
||||
using Serialization
|
||||
walk = deserialize("walk.bin")
|
||||
|
||||
# Code for a note on conversion
|
||||
|
||||
x = [1.5]
|
||||
@ -12,9 +18,6 @@ x
|
||||
|
||||
# Code from section 11.1.1
|
||||
|
||||
using Serialization
|
||||
walk = deserialize("walk.bin")
|
||||
|
||||
Matrix(walk)
|
||||
Matrix{Any}(walk)
|
||||
Matrix{String}(walk)
|
||||
@ -83,3 +86,69 @@ identity.(eachcol(walk))
|
||||
|
||||
df = DataFrame(x=1:2, b=["a", "b"])
|
||||
identity.(eachcol(df))
|
||||
|
||||
# Code from section 11.2
|
||||
|
||||
using CSV
|
||||
raw_data = """
|
||||
city,date,rainfall
|
||||
Olecko,2020-11-16,2.9
|
||||
Olecko,2020-11-17,4.1
|
||||
Olecko,2020-11-19,4.3
|
||||
Olecko,2020-11-20,2.0
|
||||
Olecko,2020-11-21,0.6
|
||||
Olecko,2020-11-22,1.0
|
||||
Ełk,2020-11-16,3.9
|
||||
Ełk,2020-11-19,1.2
|
||||
Ełk,2020-11-20,2.0
|
||||
Ełk,2020-11-22,2.0
|
||||
""";
|
||||
rainfall_df = CSV.read(IOBuffer(raw_data), DataFrame)
|
||||
|
||||
gdf_city = groupby(rainfall_df, "city")
|
||||
|
||||
gdf_city_date = groupby(rainfall_df, Not("rainfall"))
|
||||
|
||||
keys(gdf_city_date)
|
||||
|
||||
gk1 = keys(gdf_city_date)[1]
|
||||
g1_t = Tuple(gk1)
|
||||
g1_nt = NamedTuple(gk1)
|
||||
g1_dict = Dict(gk1)
|
||||
|
||||
gdf_city_date[1]
|
||||
gdf_city_date[gk1]
|
||||
gdf_city_date[g1_t]
|
||||
gdf_city_date[g1_nt]
|
||||
gdf_city_date[g1_dict]
|
||||
|
||||
gdf_city[("Olecko",)]
|
||||
gdf_city[(city="Olecko",)]
|
||||
|
||||
using BenchmarkTools
|
||||
bench_df = DataFrame(id=1:10^8);
|
||||
bench_gdf = groupby(bench_df, :id);
|
||||
@btime groupby($bench_df, :id);
|
||||
bench_i = 1_000_000;
|
||||
bench_gk = keys(bench_gdf)[bench_i];
|
||||
bench_t = Tuple(bench_gk);
|
||||
bench_nt = NamedTuple(bench_gk);
|
||||
bench_dict = Dict(bench_gk);
|
||||
@btime $bench_gdf[$bench_i];
|
||||
@btime $bench_gdf[$bench_gk];
|
||||
@btime $bench_gdf[$bench_t];
|
||||
@btime $bench_gdf[$bench_nt];
|
||||
@btime $bench_gdf[$bench_dict];
|
||||
|
||||
gdf_city[[2, 1]]
|
||||
gdf_city[[1]]
|
||||
|
||||
[nrow(df) for df in gdf_city]
|
||||
|
||||
for p in pairs(gdf_city)
|
||||
println(p)
|
||||
end
|
||||
|
||||
Dict(key.city => nrow(df) for (key, df) in pairs(gdf_city))
|
||||
|
||||
combine(gdf_city, nrow)
|
||||
|
Loading…
Reference in New Issue
Block a user