update codes
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
# This file is machine-generated - editing it directly is not advised
|
# This file is machine-generated - editing it directly is not advised
|
||||||
|
|
||||||
julia_version = "1.7.1"
|
julia_version = "1.7.2"
|
||||||
manifest_format = "2.0"
|
manifest_format = "2.0"
|
||||||
|
|
||||||
[[deps.AbstractFFTs]]
|
[[deps.AbstractFFTs]]
|
||||||
@@ -133,6 +133,12 @@ version = "3.41.0"
|
|||||||
deps = ["Artifacts", "Libdl"]
|
deps = ["Artifacts", "Libdl"]
|
||||||
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
|
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
|
||||||
|
|
||||||
|
[[deps.Conda]]
|
||||||
|
deps = ["Downloads", "JSON", "VersionParsing"]
|
||||||
|
git-tree-sha1 = "6cdc8832ba11c7695f494c9d9a1c31e90959ce0f"
|
||||||
|
uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
|
||||||
|
version = "1.6.0"
|
||||||
|
|
||||||
[[deps.Contour]]
|
[[deps.Contour]]
|
||||||
deps = ["StaticArrays"]
|
deps = ["StaticArrays"]
|
||||||
git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7"
|
git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7"
|
||||||
@@ -741,6 +747,12 @@ git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
|
|||||||
uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
|
uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
|
||||||
version = "2.4.2"
|
version = "2.4.2"
|
||||||
|
|
||||||
|
[[deps.RCall]]
|
||||||
|
deps = ["CategoricalArrays", "Conda", "DataFrames", "DataStructures", "Dates", "Libdl", "Missings", "REPL", "Random", "Requires", "StatsModels", "WinReg"]
|
||||||
|
git-tree-sha1 = "72fddd643785ec1f36581cbc3d288529b96e99a7"
|
||||||
|
uuid = "6f49c342-dc21-5d91-9882-a32aef131414"
|
||||||
|
version = "0.13.13"
|
||||||
|
|
||||||
[[deps.REPL]]
|
[[deps.REPL]]
|
||||||
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
|
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
|
||||||
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
|
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
|
||||||
@@ -955,6 +967,11 @@ git-tree-sha1 = "34db80951901073501137bdbc3d5a8e7bbd06670"
|
|||||||
uuid = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d"
|
uuid = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d"
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
|
|
||||||
|
[[deps.VersionParsing]]
|
||||||
|
git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868"
|
||||||
|
uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
|
||||||
|
version = "1.3.0"
|
||||||
|
|
||||||
[[deps.Wayland_jll]]
|
[[deps.Wayland_jll]]
|
||||||
deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
|
deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
|
||||||
git-tree-sha1 = "3e61f0b86f90dacb0bc0e73a0c5a83f6a8636e23"
|
git-tree-sha1 = "3e61f0b86f90dacb0bc0e73a0c5a83f6a8636e23"
|
||||||
@@ -973,6 +990,12 @@ git-tree-sha1 = "c69f9da3ff2f4f02e811c3323c22e5dfcb584cfa"
|
|||||||
uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
|
uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
|
||||||
version = "1.4.1"
|
version = "1.4.1"
|
||||||
|
|
||||||
|
[[deps.WinReg]]
|
||||||
|
deps = ["Test"]
|
||||||
|
git-tree-sha1 = "808380e0a0483e134081cc54150be4177959b5f4"
|
||||||
|
uuid = "1b915085-20d7-51cf-bf83-8f477d6f5128"
|
||||||
|
version = "0.3.1"
|
||||||
|
|
||||||
[[deps.XML2_jll]]
|
[[deps.XML2_jll]]
|
||||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
|
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
|
||||||
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
|
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
|
||||||
|
|||||||
@@ -16,5 +16,6 @@ Loess = "4345ca2d-374a-55d4-8d30-97f9976e7612"
|
|||||||
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
|
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
|
||||||
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
|
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
|
||||||
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
|
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
|
||||||
|
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
|
||||||
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
|
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
|
||||||
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
|
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
|
||||||
|
|||||||
32
appB.jl
32
appB.jl
@@ -82,16 +82,14 @@ plot(scatter(data.set1.x, data.set1.y; legend=false),
|
|||||||
|
|
||||||
parse.(Int, ["1", "2", "3"])
|
parse.(Int, ["1", "2", "3"])
|
||||||
|
|
||||||
# CODES BELOW REQUIRE RE-NUMBERING
|
# Code for exercise 6.1
|
||||||
|
|
||||||
# Code for exercise 4.1
|
|
||||||
|
|
||||||
years_table = freqtable(years)
|
years_table = freqtable(years)
|
||||||
plot(names(years_table, 1), years_table; legend=false,
|
plot(names(years_table, 1), years_table; legend=false,
|
||||||
xlabel="year", ylabel="# of movies")
|
xlabel="year", ylabel="# of movies")
|
||||||
|
|
||||||
|
|
||||||
# Code for exercise 4.2
|
# Code for exercise 6.2
|
||||||
|
|
||||||
s3 = Symbol.(s1)
|
s3 = Symbol.(s1)
|
||||||
@benchmark sort($s3)
|
@benchmark sort($s3)
|
||||||
@@ -99,7 +97,7 @@ s3 = Symbol.(s1)
|
|||||||
@benchmark unique($s2)
|
@benchmark unique($s2)
|
||||||
@benchmark unique($s3)
|
@benchmark unique($s3)
|
||||||
|
|
||||||
# Code for exercise 5.1
|
# Code for exercise 7.1
|
||||||
|
|
||||||
v = ["1", "2", missing, "4"]
|
v = ["1", "2", missing, "4"]
|
||||||
[ismissing(x) ? missing : parse(Int, x) for x in v]
|
[ismissing(x) ? missing : parse(Int, x) for x in v]
|
||||||
@@ -113,43 +111,43 @@ end
|
|||||||
using Missings
|
using Missings
|
||||||
passmissing(parse).(Int, v)
|
passmissing(parse).(Int, v)
|
||||||
|
|
||||||
# Code for exercise 5.2
|
# Code for exercise 7.2
|
||||||
|
|
||||||
using Dates
|
using Dates
|
||||||
Date(2021, 1, 1):Month(1):Date(2021, 12, 1)
|
Date(2021, 1, 1):Month(1):Date(2021, 12, 1)
|
||||||
collect(Date(2021, 1, 1):Month(1):Date(2021, 12, 1))
|
collect(Date(2021, 1, 1):Month(1):Date(2021, 12, 1))
|
||||||
|
|
||||||
# Code for exercise 6.1
|
# Code for exercise 8.1
|
||||||
|
|
||||||
using BenchmarkTools
|
using BenchmarkTools
|
||||||
@benchmark $puzzles."Rating"
|
@benchmark $puzzles."Rating"
|
||||||
|
|
||||||
# Code for exercise 6.2
|
# Code for exercise 9.1
|
||||||
|
|
||||||
using StatsBase
|
using StatsBase
|
||||||
summarystats(puzzles[puzzles.Popularity .== 100, "NbPlays"])
|
summarystats(puzzles[puzzles.Popularity .== 100, "NbPlays"])
|
||||||
summarystats(puzzles[puzzles.Popularity .== -100, "NbPlays"])
|
summarystats(puzzles[puzzles.Popularity .== -100, "NbPlays"])
|
||||||
|
|
||||||
# Code for exercise 6.3
|
# Code for exercise 9.2
|
||||||
|
|
||||||
sum(length, values(rating_mapping))
|
sum(length, values(rating_mapping))
|
||||||
nrow(good)
|
nrow(good)
|
||||||
|
|
||||||
# Code for exercise 7.1
|
# Code for exercise 10.1
|
||||||
|
|
||||||
using BenchmarkTools
|
using BenchmarkTools
|
||||||
x = rand(10^6);
|
x = rand(10^6);
|
||||||
@btime DataFrame(x=$x);
|
@btime DataFrame(x=$x);
|
||||||
@btime DataFrame(x=$x; copycols=false);
|
@btime DataFrame(x=$x; copycols=false);
|
||||||
|
|
||||||
# Code for exercise 7.2
|
# Code for exercise 10.2
|
||||||
|
|
||||||
df1 = DataFrame(a=1,b=2)
|
df1 = DataFrame(a=1,b=2)
|
||||||
df2 = DataFrame(b=3, a=4)
|
df2 = DataFrame(b=3, a=4)
|
||||||
vcat(df1, df2)
|
vcat(df1, df2)
|
||||||
vcat(df1, df2, cols=:orderequal)
|
vcat(df1, df2, cols=:orderequal)
|
||||||
|
|
||||||
# Code for exercise 7.3
|
# Code for exercise 10.3
|
||||||
|
|
||||||
function walk_unique_2ahead()
|
function walk_unique_2ahead()
|
||||||
walk = DataFrame(x=0, y=0)
|
walk = DataFrame(x=0, y=0)
|
||||||
@@ -162,18 +160,18 @@ end
|
|||||||
Random.seed!(2);
|
Random.seed!(2);
|
||||||
proptable([walk_unique_2ahead() for _ in 1:10^5])
|
proptable([walk_unique_2ahead() for _ in 1:10^5])
|
||||||
|
|
||||||
# Code for exercise 7.4
|
# Code for exercise 11.1
|
||||||
|
|
||||||
@time wide = DataFrame(ones(1, 10_000), :auto);
|
@time wide = DataFrame(ones(1, 10_000), :auto);
|
||||||
@time Tables.columntable(wide);
|
@time Tables.columntable(wide);
|
||||||
|
|
||||||
# Code for exercise 8.1
|
# Code for exercise 12.1
|
||||||
|
|
||||||
cg = complete_graph(37700)
|
cg = complete_graph(37700)
|
||||||
Base.summarysize(cg)
|
Base.summarysize(cg)
|
||||||
@time deg_class(cg, classes_df.ml_target);
|
@time deg_class(cg, classes_df.ml_target);
|
||||||
|
|
||||||
# Code for exercise 8.2
|
# Code for exercise 12.2
|
||||||
|
|
||||||
scatter(log1p.(agg_df.deg_ml),
|
scatter(log1p.(agg_df.deg_ml),
|
||||||
log1p.(agg_df.deg_web);
|
log1p.(agg_df.deg_web);
|
||||||
@@ -184,12 +182,12 @@ scatter(log1p.(agg_df.deg_ml),
|
|||||||
xticks=gen_ticks(maximum(classes_df.deg_ml)),
|
xticks=gen_ticks(maximum(classes_df.deg_ml)),
|
||||||
yticks=gen_ticks(maximum(classes_df.deg_web)))
|
yticks=gen_ticks(maximum(classes_df.deg_web)))
|
||||||
|
|
||||||
# Code for exercise 8.3
|
# Code for exercise 12.3
|
||||||
|
|
||||||
glm(@formula(ml_target~log1p(deg_ml)+log1p(deg_web)),
|
glm(@formula(ml_target~log1p(deg_ml)+log1p(deg_web)),
|
||||||
classes_df, Binomial(), ProbitLink())
|
classes_df, Binomial(), ProbitLink())
|
||||||
|
|
||||||
# Code for exercise 8.4
|
# Code for exercise 12.4
|
||||||
|
|
||||||
df = DataFrame()
|
df = DataFrame()
|
||||||
df.a = [1, 2, 3]
|
df.a = [1, 2, 3]
|
||||||
|
|||||||
117
ch10.jl
117
ch10.jl
@@ -1,8 +1,8 @@
|
|||||||
# Bogumił Kamiński, 2022
|
# Bogumił Kamiński, 2022
|
||||||
|
|
||||||
# Codes for chapter 7
|
# Codes for chapter 10
|
||||||
|
|
||||||
# Code for section 7.1
|
# Code for section 10.1
|
||||||
|
|
||||||
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||||
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||||
@@ -16,16 +16,11 @@ aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
|||||||
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||||
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89];
|
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89];
|
||||||
|
|
||||||
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
|
||||||
set2=(x=aq[:, 3], y=aq[:, 4]),
|
|
||||||
set3=(x=aq[:, 5], y=aq[:, 6]),
|
|
||||||
set4=(x=aq[:, 7], y=aq[:, 8]));
|
|
||||||
|
|
||||||
using DataFrames
|
using DataFrames
|
||||||
|
|
||||||
# Code for listing 7.1
|
# Code for listing 10.1
|
||||||
|
|
||||||
aq1 = ataFrame(aq, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
aq1 = DataFrame(aq, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
||||||
DataFrame(aq, [:x1, :y1, :x2, :y2, :x3, :y3, :x4, :y4])
|
DataFrame(aq, [:x1, :y1, :x2, :y2, :x3, :y3, :x4, :y4])
|
||||||
|
|
||||||
# Code for creating DataFrame with automatic column names
|
# Code for creating DataFrame with automatic column names
|
||||||
@@ -38,7 +33,12 @@ aq_vec = collect(eachcol(aq))
|
|||||||
DataFrame(aq_vec, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
DataFrame(aq_vec, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
||||||
DataFrame(aq_vec, :auto)
|
DataFrame(aq_vec, :auto)
|
||||||
|
|
||||||
# Codes for section 7.1.2
|
# Codes for section 10.1.2
|
||||||
|
|
||||||
|
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
||||||
|
set2=(x=aq[:, 3], y=aq[:, 4]),
|
||||||
|
set3=(x=aq[:, 5], y=aq[:, 6]),
|
||||||
|
set4=(x=aq[:, 7], y=aq[:, 8]));
|
||||||
|
|
||||||
data.set1.x
|
data.set1.x
|
||||||
|
|
||||||
@@ -84,7 +84,11 @@ df.x
|
|||||||
|
|
||||||
DataFrame(x=[1], y=[1, 2, 3])
|
DataFrame(x=[1], y=[1, 2, 3])
|
||||||
|
|
||||||
# Codes for section 7.1.3
|
using RCall
|
||||||
|
r_df = R"data.frame(a=1:6, b=1:2, c=1:3)"
|
||||||
|
julia_df = rcopy(r_df)
|
||||||
|
|
||||||
|
# Codes for section 10.1.3
|
||||||
|
|
||||||
data.set1
|
data.set1
|
||||||
DataFrame(data.set1)
|
DataFrame(data.set1)
|
||||||
@@ -93,11 +97,11 @@ DataFrame([(a=1, b=2), (a=3, b=4), (a=5, b=6)])
|
|||||||
|
|
||||||
data
|
data
|
||||||
|
|
||||||
# Code for listing 7.2
|
# Code for listing 10.2
|
||||||
|
|
||||||
aq2 = DataFrame(data)
|
aq2 = DataFrame(data)
|
||||||
|
|
||||||
# Codes for listing 7.3
|
# Codes for listing 10.3
|
||||||
|
|
||||||
data_dfs = map(DataFrame, data)
|
data_dfs = map(DataFrame, data)
|
||||||
|
|
||||||
@@ -114,14 +118,14 @@ vcat(data_dfs.set1, data_dfs.set2, data_dfs.set3, data_dfs.set4;
|
|||||||
reduce(vcat, collect(data_dfs);
|
reduce(vcat, collect(data_dfs);
|
||||||
source="source_id"=>string.("set", 1:4))
|
source="source_id"=>string.("set", 1:4))
|
||||||
|
|
||||||
# Code for listing 7.4
|
# Code for listing 10.4
|
||||||
|
|
||||||
df1 = DataFrame(a=1:3, b=11:13)
|
df1 = DataFrame(a=1:3, b=11:13)
|
||||||
df2 = DataFrame(a=4:6, c=24:26)
|
df2 = DataFrame(a=4:6, c=24:26)
|
||||||
vcat(df1, df2)
|
vcat(df1, df2)
|
||||||
vcat(df1, df2; cols=:union)
|
vcat(df1, df2; cols=:union)
|
||||||
|
|
||||||
# Code for listing 7.5
|
# Code for listing 10.5
|
||||||
|
|
||||||
df_agg = DataFrame()
|
df_agg = DataFrame()
|
||||||
append!(df_agg, data_dfs.set1)
|
append!(df_agg, data_dfs.set1)
|
||||||
@@ -140,7 +144,7 @@ df2 = DataFrame(a=4:6, b=[14, missing, 16])
|
|||||||
append!(df1, df2)
|
append!(df1, df2)
|
||||||
append!(df1, df2; promote=true)
|
append!(df1, df2; promote=true)
|
||||||
|
|
||||||
# Code for section 7.2.3
|
# Code for section 10.2.3
|
||||||
|
|
||||||
df = DataFrame()
|
df = DataFrame()
|
||||||
push!(df, (a=1, b=2))
|
push!(df, (a=1, b=2))
|
||||||
@@ -188,7 +192,7 @@ range(1, 5)
|
|||||||
|
|
||||||
(3/4)^9
|
(3/4)^9
|
||||||
|
|
||||||
# Code for listing 7.6
|
# Code for listing 10.6
|
||||||
|
|
||||||
function walk_unique() #A
|
function walk_unique() #A
|
||||||
walk = DataFrame(x=0, y=0)
|
walk = DataFrame(x=0, y=0)
|
||||||
@@ -201,79 +205,8 @@ end
|
|||||||
Random.seed!(2);
|
Random.seed!(2);
|
||||||
proptable([walk_unique() for _ in 1:10^5])
|
proptable([walk_unique() for _ in 1:10^5])
|
||||||
|
|
||||||
# Code for a note on conversion
|
# code for serialization
|
||||||
|
|
||||||
x = [1.5]
|
using Serialization
|
||||||
x[1] = 1
|
serialize("walk.bin", walk)
|
||||||
x
|
deserialize("walk.bin") == walk
|
||||||
|
|
||||||
# Code from section 7.3.1
|
|
||||||
|
|
||||||
Matrix(walk)
|
|
||||||
Matrix{Any}(walk)
|
|
||||||
Matrix{String}(walk)
|
|
||||||
|
|
||||||
plot(walk)
|
|
||||||
|
|
||||||
plot(Matrix(walk); labels=["x" "y"] , legend=:topleft)
|
|
||||||
|
|
||||||
# Code from section 7.3.2
|
|
||||||
|
|
||||||
Tables.columntable(walk)
|
|
||||||
|
|
||||||
using BenchmarkTools
|
|
||||||
function mysum(table)
|
|
||||||
s = 0 #A
|
|
||||||
for v in table.x #B
|
|
||||||
s += v
|
|
||||||
end
|
|
||||||
return s
|
|
||||||
end
|
|
||||||
df = DataFrame(x=1:1_000_000);
|
|
||||||
@btime mysum($df)
|
|
||||||
|
|
||||||
tab = Tables.columntable(df);
|
|
||||||
@btime mysum($tab)
|
|
||||||
|
|
||||||
@code_warntype mysum(df)
|
|
||||||
|
|
||||||
@code_warntype mysum(tab)
|
|
||||||
|
|
||||||
typeof(tab)
|
|
||||||
|
|
||||||
function barrier_mysum2(x)
|
|
||||||
s = 0
|
|
||||||
for v in x
|
|
||||||
s += v
|
|
||||||
end
|
|
||||||
return s
|
|
||||||
end
|
|
||||||
mysum2(table) = barrier_mysum2(table.x)
|
|
||||||
@btime mysum2($df)
|
|
||||||
|
|
||||||
df = DataFrame(a=[1, 1, 2], b=[1, 1, 2])
|
|
||||||
unique(df)
|
|
||||||
|
|
||||||
tab = Tables.columntable(df)
|
|
||||||
unique(tab)
|
|
||||||
|
|
||||||
# Code from section 7.3.3
|
|
||||||
|
|
||||||
Tables.rowtable(walk)
|
|
||||||
|
|
||||||
nti = Tables.namedtupleiterator(walk)
|
|
||||||
for v in nti
|
|
||||||
println(v)
|
|
||||||
end
|
|
||||||
|
|
||||||
er = eachrow(walk)
|
|
||||||
er[1]
|
|
||||||
er[end]
|
|
||||||
ec = eachcol(walk)
|
|
||||||
ec[1]
|
|
||||||
ec[end]
|
|
||||||
|
|
||||||
identity.(eachcol(walk))
|
|
||||||
|
|
||||||
df = DataFrame(x=1:2, b=["a", "b"])
|
|
||||||
identity.(eachcol(df))
|
|
||||||
|
|||||||
85
ch11.jl
Normal file
85
ch11.jl
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
# Bogumił Kamiński, 2022
|
||||||
|
|
||||||
|
# Codes for chapter 11
|
||||||
|
|
||||||
|
# Code for section 11.1
|
||||||
|
|
||||||
|
# Code for a note on conversion
|
||||||
|
|
||||||
|
x = [1.5]
|
||||||
|
x[1] = 1
|
||||||
|
x
|
||||||
|
|
||||||
|
# Code from section 11.1.1
|
||||||
|
|
||||||
|
using Serialization
|
||||||
|
walk = deserialize("walk.bin")
|
||||||
|
|
||||||
|
Matrix(walk)
|
||||||
|
Matrix{Any}(walk)
|
||||||
|
Matrix{String}(walk)
|
||||||
|
|
||||||
|
plot(walk)
|
||||||
|
|
||||||
|
plot(Matrix(walk); labels=["x" "y"] , legend=:topleft)
|
||||||
|
|
||||||
|
# Code from section 11.1.2
|
||||||
|
|
||||||
|
Tables.columntable(walk)
|
||||||
|
|
||||||
|
using BenchmarkTools
|
||||||
|
function mysum(table)
|
||||||
|
s = 0 #A
|
||||||
|
for v in table.x #B
|
||||||
|
s += v
|
||||||
|
end
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
df = DataFrame(x=1:1_000_000);
|
||||||
|
@btime mysum($df)
|
||||||
|
|
||||||
|
tab = Tables.columntable(df);
|
||||||
|
@btime mysum($tab)
|
||||||
|
|
||||||
|
@code_warntype mysum(df)
|
||||||
|
|
||||||
|
@code_warntype mysum(tab)
|
||||||
|
|
||||||
|
typeof(tab)
|
||||||
|
|
||||||
|
function barrier_mysum2(x)
|
||||||
|
s = 0
|
||||||
|
for v in x
|
||||||
|
s += v
|
||||||
|
end
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
mysum2(table) = barrier_mysum2(table.x)
|
||||||
|
@btime mysum2($df)
|
||||||
|
|
||||||
|
df = DataFrame(a=[1, 1, 2], b=[1, 1, 2])
|
||||||
|
unique(df)
|
||||||
|
|
||||||
|
tab = Tables.columntable(df)
|
||||||
|
unique(tab)
|
||||||
|
|
||||||
|
# Code from section 11.1.3
|
||||||
|
|
||||||
|
Tables.rowtable(walk)
|
||||||
|
|
||||||
|
nti = Tables.namedtupleiterator(walk)
|
||||||
|
for v in nti
|
||||||
|
println(v)
|
||||||
|
end
|
||||||
|
|
||||||
|
er = eachrow(walk)
|
||||||
|
er[1]
|
||||||
|
er[end]
|
||||||
|
ec = eachcol(walk)
|
||||||
|
ec[1]
|
||||||
|
ec[end]
|
||||||
|
|
||||||
|
identity.(eachcol(walk))
|
||||||
|
|
||||||
|
df = DataFrame(x=1:2, b=["a", "b"])
|
||||||
|
identity.(eachcol(df))
|
||||||
20
ch12.jl
20
ch12.jl
@@ -1,10 +1,10 @@
|
|||||||
# Bogumił Kamiński, 2022
|
# Bogumił Kamiński, 2022
|
||||||
|
|
||||||
# Codes for chapter 8
|
# Codes for chapter 12
|
||||||
|
|
||||||
# Codes for section 8.1
|
# Codes for section 12.1
|
||||||
|
|
||||||
# Code for listing 8.1
|
# Code for listing 12.1
|
||||||
|
|
||||||
import Downloads
|
import Downloads
|
||||||
using SHA
|
using SHA
|
||||||
@@ -29,7 +29,7 @@ open(sha256, git_zip) == [0x56, 0xc0, 0xc1, 0xc2,
|
|||||||
import ZipFile
|
import ZipFile
|
||||||
git_archive = ZipFile.Reader(git_zip)
|
git_archive = ZipFile.Reader(git_zip)
|
||||||
|
|
||||||
# Code for listing 8.2
|
# Code for listing 12.2
|
||||||
|
|
||||||
function ingest_to_df(archive::ZipFile.Reader, filename::AbstractString)
|
function ingest_to_df(archive::ZipFile.Reader, filename::AbstractString)
|
||||||
idx = only(findall(x -> x.name == filename, archive.files))
|
idx = only(findall(x -> x.name == filename, archive.files))
|
||||||
@@ -48,7 +48,7 @@ findall(x -> x.name == "", git_archive.files)
|
|||||||
only(findall(x -> x.name == "git_web_ml/musae_git_edges.csv", git_archive.files))
|
only(findall(x -> x.name == "git_web_ml/musae_git_edges.csv", git_archive.files))
|
||||||
only(findall(x -> x.name == "", git_archive.files))
|
only(findall(x -> x.name == "", git_archive.files))
|
||||||
|
|
||||||
# Code for listing 8.3
|
# Code for listing 12.3
|
||||||
|
|
||||||
using CSV
|
using CSV
|
||||||
using DataFrames
|
using DataFrames
|
||||||
@@ -91,9 +91,9 @@ df[:, :b] = ["x", "y", "z"]
|
|||||||
df[:, :c] = [11, 12, 13]
|
df[:, :c] = [11, 12, 13]
|
||||||
df
|
df
|
||||||
|
|
||||||
# Codes for section 8.2
|
# Codes for section 12.2
|
||||||
|
|
||||||
# Code from listing 8.4
|
# Code from listing 12.4
|
||||||
|
|
||||||
using Graphs
|
using Graphs
|
||||||
gh = SimpleGraph(nrow(classes_df))
|
gh = SimpleGraph(nrow(classes_df))
|
||||||
@@ -144,7 +144,7 @@ dump(e1)
|
|||||||
e1.src
|
e1.src
|
||||||
e1.dst
|
e1.dst
|
||||||
|
|
||||||
# Code for listing 8.5
|
# Code for listing 12.5
|
||||||
|
|
||||||
function deg_class(gh, class)
|
function deg_class(gh, class)
|
||||||
deg_ml = zeros(Int, length(class))
|
deg_ml = zeros(Int, length(class))
|
||||||
@@ -193,7 +193,7 @@ df = DataFrame(a=1, b=11)
|
|||||||
push!(df.a, 2)
|
push!(df.a, 2)
|
||||||
df
|
df
|
||||||
|
|
||||||
# Codes for section 8.3
|
# Codes for section 12.3
|
||||||
|
|
||||||
# Code for computing groupwise means of columns
|
# Code for computing groupwise means of columns
|
||||||
|
|
||||||
@@ -242,7 +242,7 @@ describe(agg_df)
|
|||||||
|
|
||||||
log1p(0)
|
log1p(0)
|
||||||
|
|
||||||
# Code for listing 8.6
|
# Code for listing 12.6
|
||||||
|
|
||||||
function gen_ticks(maxv)
|
function gen_ticks(maxv)
|
||||||
max2 = round(Int, log2(maxv))
|
max2 = round(Int, log2(maxv))
|
||||||
|
|||||||
Reference in New Issue
Block a user