update codes

This commit is contained in:
Bogumił Kamiński 2022-02-13 22:44:40 +01:00
parent ab6b8f18f3
commit 3d33b999c9
6 changed files with 160 additions and 120 deletions

View File

@ -1,6 +1,6 @@
# This file is machine-generated - editing it directly is not advised
julia_version = "1.7.1"
julia_version = "1.7.2"
manifest_format = "2.0"
[[deps.AbstractFFTs]]
@ -133,6 +133,12 @@ version = "3.41.0"
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
[[deps.Conda]]
deps = ["Downloads", "JSON", "VersionParsing"]
git-tree-sha1 = "6cdc8832ba11c7695f494c9d9a1c31e90959ce0f"
uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
version = "1.6.0"
[[deps.Contour]]
deps = ["StaticArrays"]
git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7"
@ -741,6 +747,12 @@ git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
version = "2.4.2"
[[deps.RCall]]
deps = ["CategoricalArrays", "Conda", "DataFrames", "DataStructures", "Dates", "Libdl", "Missings", "REPL", "Random", "Requires", "StatsModels", "WinReg"]
git-tree-sha1 = "72fddd643785ec1f36581cbc3d288529b96e99a7"
uuid = "6f49c342-dc21-5d91-9882-a32aef131414"
version = "0.13.13"
[[deps.REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
@ -955,6 +967,11 @@ git-tree-sha1 = "34db80951901073501137bdbc3d5a8e7bbd06670"
uuid = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d"
version = "0.1.2"
[[deps.VersionParsing]]
git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868"
uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
version = "1.3.0"
[[deps.Wayland_jll]]
deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
git-tree-sha1 = "3e61f0b86f90dacb0bc0e73a0c5a83f6a8636e23"
@ -973,6 +990,12 @@ git-tree-sha1 = "c69f9da3ff2f4f02e811c3323c22e5dfcb584cfa"
uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
version = "1.4.1"
[[deps.WinReg]]
deps = ["Test"]
git-tree-sha1 = "808380e0a0483e134081cc54150be4177959b5f4"
uuid = "1b915085-20d7-51cf-bf83-8f477d6f5128"
version = "0.3.1"
[[deps.XML2_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"

View File

@ -16,5 +16,6 @@ Loess = "4345ca2d-374a-55d4-8d30-97f9976e7612"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"

32
appB.jl
View File

@ -82,16 +82,14 @@ plot(scatter(data.set1.x, data.set1.y; legend=false),
parse.(Int, ["1", "2", "3"])
# CODES BELOW REQUIRE RE-NUMBERING
# Code for exercise 4.1
# Code for exercise 6.1
years_table = freqtable(years)
plot(names(years_table, 1), years_table; legend=false,
xlabel="year", ylabel="# of movies")
# Code for exercise 4.2
# Code for exercise 6.2
s3 = Symbol.(s1)
@benchmark sort($s3)
@ -99,7 +97,7 @@ s3 = Symbol.(s1)
@benchmark unique($s2)
@benchmark unique($s3)
# Code for exercise 5.1
# Code for exercise 7.1
v = ["1", "2", missing, "4"]
[ismissing(x) ? missing : parse(Int, x) for x in v]
@ -113,43 +111,43 @@ end
using Missings
passmissing(parse).(Int, v)
# Code for exercise 5.2
# Code for exercise 7.2
using Dates
Date(2021, 1, 1):Month(1):Date(2021, 12, 1)
collect(Date(2021, 1, 1):Month(1):Date(2021, 12, 1))
# Code for exercise 6.1
# Code for exercise 8.1
using BenchmarkTools
@benchmark $puzzles."Rating"
# Code for exercise 6.2
# Code for exercise 9.1
using StatsBase
summarystats(puzzles[puzzles.Popularity .== 100, "NbPlays"])
summarystats(puzzles[puzzles.Popularity .== -100, "NbPlays"])
# Code for exercise 6.3
# Code for exercise 9.2
sum(length, values(rating_mapping))
nrow(good)
# Code for exercise 7.1
# Code for exercise 10.1
using BenchmarkTools
x = rand(10^6);
@btime DataFrame(x=$x);
@btime DataFrame(x=$x; copycols=false);
# Code for exercise 7.2
# Code for exercise 10.2
df1 = DataFrame(a=1,b=2)
df2 = DataFrame(b=3, a=4)
vcat(df1, df2)
vcat(df1, df2, cols=:orderequal)
# Code for exercise 7.3
# Code for exercise 10.3
function walk_unique_2ahead()
walk = DataFrame(x=0, y=0)
@ -162,18 +160,18 @@ end
Random.seed!(2);
proptable([walk_unique_2ahead() for _ in 1:10^5])
# Code for exercise 7.4
# Code for exercise 11.1
@time wide = DataFrame(ones(1, 10_000), :auto);
@time Tables.columntable(wide);
# Code for exercise 8.1
# Code for exercise 12.1
cg = complete_graph(37700)
Base.summarysize(cg)
@time deg_class(cg, classes_df.ml_target);
# Code for exercise 8.2
# Code for exercise 12.2
scatter(log1p.(agg_df.deg_ml),
log1p.(agg_df.deg_web);
@ -184,12 +182,12 @@ scatter(log1p.(agg_df.deg_ml),
xticks=gen_ticks(maximum(classes_df.deg_ml)),
yticks=gen_ticks(maximum(classes_df.deg_web)))
# Code for exercise 8.3
# Code for exercise 12.3
glm(@formula(ml_target~log1p(deg_ml)+log1p(deg_web)),
classes_df, Binomial(), ProbitLink())
# Code for exercise 8.4
# Code for exercise 12.4
df = DataFrame()
df.a = [1, 2, 3]

117
ch10.jl
View File

@ -1,8 +1,8 @@
# Bogumił Kamiński, 2022
# Codes for chapter 7
# Codes for chapter 10
# Code for section 7.1
# Code for section 10.1
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
@ -16,16 +16,11 @@ aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89];
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
set2=(x=aq[:, 3], y=aq[:, 4]),
set3=(x=aq[:, 5], y=aq[:, 6]),
set4=(x=aq[:, 7], y=aq[:, 8]));
using DataFrames
# Code for listing 7.1
# Code for listing 10.1
aq1 = ataFrame(aq, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
aq1 = DataFrame(aq, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
DataFrame(aq, [:x1, :y1, :x2, :y2, :x3, :y3, :x4, :y4])
# Code for creating DataFrame with automatic column names
@ -38,7 +33,12 @@ aq_vec = collect(eachcol(aq))
DataFrame(aq_vec, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
DataFrame(aq_vec, :auto)
# Codes for section 7.1.2
# Codes for section 10.1.2
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
set2=(x=aq[:, 3], y=aq[:, 4]),
set3=(x=aq[:, 5], y=aq[:, 6]),
set4=(x=aq[:, 7], y=aq[:, 8]));
data.set1.x
@ -84,7 +84,11 @@ df.x
DataFrame(x=[1], y=[1, 2, 3])
# Codes for section 7.1.3
using RCall
r_df = R"data.frame(a=1:6, b=1:2, c=1:3)"
julia_df = rcopy(r_df)
# Codes for section 10.1.3
data.set1
DataFrame(data.set1)
@ -93,11 +97,11 @@ DataFrame([(a=1, b=2), (a=3, b=4), (a=5, b=6)])
data
# Code for listing 7.2
# Code for listing 10.2
aq2 = DataFrame(data)
# Codes for listing 7.3
# Codes for listing 10.3
data_dfs = map(DataFrame, data)
@ -114,14 +118,14 @@ vcat(data_dfs.set1, data_dfs.set2, data_dfs.set3, data_dfs.set4;
reduce(vcat, collect(data_dfs);
source="source_id"=>string.("set", 1:4))
# Code for listing 7.4
# Code for listing 10.4
df1 = DataFrame(a=1:3, b=11:13)
df2 = DataFrame(a=4:6, c=24:26)
vcat(df1, df2)
vcat(df1, df2; cols=:union)
# Code for listing 7.5
# Code for listing 10.5
df_agg = DataFrame()
append!(df_agg, data_dfs.set1)
@ -140,7 +144,7 @@ df2 = DataFrame(a=4:6, b=[14, missing, 16])
append!(df1, df2)
append!(df1, df2; promote=true)
# Code for section 7.2.3
# Code for section 10.2.3
df = DataFrame()
push!(df, (a=1, b=2))
@ -188,7 +192,7 @@ range(1, 5)
(3/4)^9
# Code for listing 7.6
# Code for listing 10.6
function walk_unique() #A
walk = DataFrame(x=0, y=0)
@ -201,79 +205,8 @@ end
Random.seed!(2);
proptable([walk_unique() for _ in 1:10^5])
# Code for a note on conversion
# code for serialization
x = [1.5]
x[1] = 1
x
# Code from section 7.3.1
Matrix(walk)
Matrix{Any}(walk)
Matrix{String}(walk)
plot(walk)
plot(Matrix(walk); labels=["x" "y"] , legend=:topleft)
# Code from section 7.3.2
Tables.columntable(walk)
using BenchmarkTools
function mysum(table)
s = 0 #A
for v in table.x #B
s += v
end
return s
end
df = DataFrame(x=1:1_000_000);
@btime mysum($df)
tab = Tables.columntable(df);
@btime mysum($tab)
@code_warntype mysum(df)
@code_warntype mysum(tab)
typeof(tab)
function barrier_mysum2(x)
s = 0
for v in x
s += v
end
return s
end
mysum2(table) = barrier_mysum2(table.x)
@btime mysum2($df)
df = DataFrame(a=[1, 1, 2], b=[1, 1, 2])
unique(df)
tab = Tables.columntable(df)
unique(tab)
# Code from section 7.3.3
Tables.rowtable(walk)
nti = Tables.namedtupleiterator(walk)
for v in nti
println(v)
end
er = eachrow(walk)
er[1]
er[end]
ec = eachcol(walk)
ec[1]
ec[end]
identity.(eachcol(walk))
df = DataFrame(x=1:2, b=["a", "b"])
identity.(eachcol(df))
using Serialization
serialize("walk.bin", walk)
deserialize("walk.bin") == walk

85
ch11.jl Normal file
View File

@ -0,0 +1,85 @@
# Bogumił Kamiński, 2022
# Codes for chapter 11
# Code for section 11.1
# Code for a note on conversion
x = [1.5]
x[1] = 1
x
# Code from section 11.1.1
using Serialization
walk = deserialize("walk.bin")
Matrix(walk)
Matrix{Any}(walk)
Matrix{String}(walk)
plot(walk)
plot(Matrix(walk); labels=["x" "y"] , legend=:topleft)
# Code from section 11.1.2
Tables.columntable(walk)
using BenchmarkTools
function mysum(table)
s = 0 #A
for v in table.x #B
s += v
end
return s
end
df = DataFrame(x=1:1_000_000);
@btime mysum($df)
tab = Tables.columntable(df);
@btime mysum($tab)
@code_warntype mysum(df)
@code_warntype mysum(tab)
typeof(tab)
function barrier_mysum2(x)
s = 0
for v in x
s += v
end
return s
end
mysum2(table) = barrier_mysum2(table.x)
@btime mysum2($df)
df = DataFrame(a=[1, 1, 2], b=[1, 1, 2])
unique(df)
tab = Tables.columntable(df)
unique(tab)
# Code from section 11.1.3
Tables.rowtable(walk)
nti = Tables.namedtupleiterator(walk)
for v in nti
println(v)
end
er = eachrow(walk)
er[1]
er[end]
ec = eachcol(walk)
ec[1]
ec[end]
identity.(eachcol(walk))
df = DataFrame(x=1:2, b=["a", "b"])
identity.(eachcol(df))

20
ch12.jl
View File

@ -1,10 +1,10 @@
# Bogumił Kamiński, 2022
# Codes for chapter 8
# Codes for chapter 12
# Codes for section 8.1
# Codes for section 12.1
# Code for listing 8.1
# Code for listing 12.1
import Downloads
using SHA
@ -29,7 +29,7 @@ open(sha256, git_zip) == [0x56, 0xc0, 0xc1, 0xc2,
import ZipFile
git_archive = ZipFile.Reader(git_zip)
# Code for listing 8.2
# Code for listing 12.2
function ingest_to_df(archive::ZipFile.Reader, filename::AbstractString)
idx = only(findall(x -> x.name == filename, archive.files))
@ -48,7 +48,7 @@ findall(x -> x.name == "", git_archive.files)
only(findall(x -> x.name == "git_web_ml/musae_git_edges.csv", git_archive.files))
only(findall(x -> x.name == "", git_archive.files))
# Code for listing 8.3
# Code for listing 12.3
using CSV
using DataFrames
@ -91,9 +91,9 @@ df[:, :b] = ["x", "y", "z"]
df[:, :c] = [11, 12, 13]
df
# Codes for section 8.2
# Codes for section 12.2
# Code from listing 8.4
# Code from listing 12.4
using Graphs
gh = SimpleGraph(nrow(classes_df))
@ -144,7 +144,7 @@ dump(e1)
e1.src
e1.dst
# Code for listing 8.5
# Code for listing 12.5
function deg_class(gh, class)
deg_ml = zeros(Int, length(class))
@ -193,7 +193,7 @@ df = DataFrame(a=1, b=11)
push!(df.a, 2)
df
# Codes for section 8.3
# Codes for section 12.3
# Code for computing groupwise means of columns
@ -242,7 +242,7 @@ describe(agg_df)
log1p(0)
# Code for listing 8.6
# Code for listing 12.6
function gen_ticks(maxv)
max2 = round(Int, log2(maxv))