update codes
This commit is contained in:
parent
ab6b8f18f3
commit
3d33b999c9
@ -1,6 +1,6 @@
|
||||
# This file is machine-generated - editing it directly is not advised
|
||||
|
||||
julia_version = "1.7.1"
|
||||
julia_version = "1.7.2"
|
||||
manifest_format = "2.0"
|
||||
|
||||
[[deps.AbstractFFTs]]
|
||||
@ -133,6 +133,12 @@ version = "3.41.0"
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
|
||||
|
||||
[[deps.Conda]]
|
||||
deps = ["Downloads", "JSON", "VersionParsing"]
|
||||
git-tree-sha1 = "6cdc8832ba11c7695f494c9d9a1c31e90959ce0f"
|
||||
uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d"
|
||||
version = "1.6.0"
|
||||
|
||||
[[deps.Contour]]
|
||||
deps = ["StaticArrays"]
|
||||
git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7"
|
||||
@ -741,6 +747,12 @@ git-tree-sha1 = "78aadffb3efd2155af139781b8a8df1ef279ea39"
|
||||
uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
|
||||
version = "2.4.2"
|
||||
|
||||
[[deps.RCall]]
|
||||
deps = ["CategoricalArrays", "Conda", "DataFrames", "DataStructures", "Dates", "Libdl", "Missings", "REPL", "Random", "Requires", "StatsModels", "WinReg"]
|
||||
git-tree-sha1 = "72fddd643785ec1f36581cbc3d288529b96e99a7"
|
||||
uuid = "6f49c342-dc21-5d91-9882-a32aef131414"
|
||||
version = "0.13.13"
|
||||
|
||||
[[deps.REPL]]
|
||||
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
|
||||
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
|
||||
@ -955,6 +967,11 @@ git-tree-sha1 = "34db80951901073501137bdbc3d5a8e7bbd06670"
|
||||
uuid = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d"
|
||||
version = "0.1.2"
|
||||
|
||||
[[deps.VersionParsing]]
|
||||
git-tree-sha1 = "58d6e80b4ee071f5efd07fda82cb9fbe17200868"
|
||||
uuid = "81def892-9a0e-5fdd-b105-ffc91e053289"
|
||||
version = "1.3.0"
|
||||
|
||||
[[deps.Wayland_jll]]
|
||||
deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"]
|
||||
git-tree-sha1 = "3e61f0b86f90dacb0bc0e73a0c5a83f6a8636e23"
|
||||
@ -973,6 +990,12 @@ git-tree-sha1 = "c69f9da3ff2f4f02e811c3323c22e5dfcb584cfa"
|
||||
uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
|
||||
version = "1.4.1"
|
||||
|
||||
[[deps.WinReg]]
|
||||
deps = ["Test"]
|
||||
git-tree-sha1 = "808380e0a0483e134081cc54150be4177959b5f4"
|
||||
uuid = "1b915085-20d7-51cf-bf83-8f477d6f5128"
|
||||
version = "0.3.1"
|
||||
|
||||
[[deps.XML2_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"]
|
||||
git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a"
|
||||
|
@ -16,5 +16,6 @@ Loess = "4345ca2d-374a-55d4-8d30-97f9976e7612"
|
||||
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
|
||||
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
|
||||
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
|
||||
RCall = "6f49c342-dc21-5d91-9882-a32aef131414"
|
||||
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
|
||||
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
|
||||
|
32
appB.jl
32
appB.jl
@ -82,16 +82,14 @@ plot(scatter(data.set1.x, data.set1.y; legend=false),
|
||||
|
||||
parse.(Int, ["1", "2", "3"])
|
||||
|
||||
# CODES BELOW REQUIRE RE-NUMBERING
|
||||
|
||||
# Code for exercise 4.1
|
||||
# Code for exercise 6.1
|
||||
|
||||
years_table = freqtable(years)
|
||||
plot(names(years_table, 1), years_table; legend=false,
|
||||
xlabel="year", ylabel="# of movies")
|
||||
|
||||
|
||||
# Code for exercise 4.2
|
||||
# Code for exercise 6.2
|
||||
|
||||
s3 = Symbol.(s1)
|
||||
@benchmark sort($s3)
|
||||
@ -99,7 +97,7 @@ s3 = Symbol.(s1)
|
||||
@benchmark unique($s2)
|
||||
@benchmark unique($s3)
|
||||
|
||||
# Code for exercise 5.1
|
||||
# Code for exercise 7.1
|
||||
|
||||
v = ["1", "2", missing, "4"]
|
||||
[ismissing(x) ? missing : parse(Int, x) for x in v]
|
||||
@ -113,43 +111,43 @@ end
|
||||
using Missings
|
||||
passmissing(parse).(Int, v)
|
||||
|
||||
# Code for exercise 5.2
|
||||
# Code for exercise 7.2
|
||||
|
||||
using Dates
|
||||
Date(2021, 1, 1):Month(1):Date(2021, 12, 1)
|
||||
collect(Date(2021, 1, 1):Month(1):Date(2021, 12, 1))
|
||||
|
||||
# Code for exercise 6.1
|
||||
# Code for exercise 8.1
|
||||
|
||||
using BenchmarkTools
|
||||
@benchmark $puzzles."Rating"
|
||||
|
||||
# Code for exercise 6.2
|
||||
# Code for exercise 9.1
|
||||
|
||||
using StatsBase
|
||||
summarystats(puzzles[puzzles.Popularity .== 100, "NbPlays"])
|
||||
summarystats(puzzles[puzzles.Popularity .== -100, "NbPlays"])
|
||||
|
||||
# Code for exercise 6.3
|
||||
# Code for exercise 9.2
|
||||
|
||||
sum(length, values(rating_mapping))
|
||||
nrow(good)
|
||||
|
||||
# Code for exercise 7.1
|
||||
# Code for exercise 10.1
|
||||
|
||||
using BenchmarkTools
|
||||
x = rand(10^6);
|
||||
@btime DataFrame(x=$x);
|
||||
@btime DataFrame(x=$x; copycols=false);
|
||||
|
||||
# Code for exercise 7.2
|
||||
# Code for exercise 10.2
|
||||
|
||||
df1 = DataFrame(a=1,b=2)
|
||||
df2 = DataFrame(b=3, a=4)
|
||||
vcat(df1, df2)
|
||||
vcat(df1, df2, cols=:orderequal)
|
||||
|
||||
# Code for exercise 7.3
|
||||
# Code for exercise 10.3
|
||||
|
||||
function walk_unique_2ahead()
|
||||
walk = DataFrame(x=0, y=0)
|
||||
@ -162,18 +160,18 @@ end
|
||||
Random.seed!(2);
|
||||
proptable([walk_unique_2ahead() for _ in 1:10^5])
|
||||
|
||||
# Code for exercise 7.4
|
||||
# Code for exercise 11.1
|
||||
|
||||
@time wide = DataFrame(ones(1, 10_000), :auto);
|
||||
@time Tables.columntable(wide);
|
||||
|
||||
# Code for exercise 8.1
|
||||
# Code for exercise 12.1
|
||||
|
||||
cg = complete_graph(37700)
|
||||
Base.summarysize(cg)
|
||||
@time deg_class(cg, classes_df.ml_target);
|
||||
|
||||
# Code for exercise 8.2
|
||||
# Code for exercise 12.2
|
||||
|
||||
scatter(log1p.(agg_df.deg_ml),
|
||||
log1p.(agg_df.deg_web);
|
||||
@ -184,12 +182,12 @@ scatter(log1p.(agg_df.deg_ml),
|
||||
xticks=gen_ticks(maximum(classes_df.deg_ml)),
|
||||
yticks=gen_ticks(maximum(classes_df.deg_web)))
|
||||
|
||||
# Code for exercise 8.3
|
||||
# Code for exercise 12.3
|
||||
|
||||
glm(@formula(ml_target~log1p(deg_ml)+log1p(deg_web)),
|
||||
classes_df, Binomial(), ProbitLink())
|
||||
|
||||
# Code for exercise 8.4
|
||||
# Code for exercise 12.4
|
||||
|
||||
df = DataFrame()
|
||||
df.a = [1, 2, 3]
|
||||
|
117
ch10.jl
117
ch10.jl
@ -1,8 +1,8 @@
|
||||
# Bogumił Kamiński, 2022
|
||||
|
||||
# Codes for chapter 7
|
||||
# Codes for chapter 10
|
||||
|
||||
# Code for section 7.1
|
||||
# Code for section 10.1
|
||||
|
||||
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||
@ -16,16 +16,11 @@ aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89];
|
||||
|
||||
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
||||
set2=(x=aq[:, 3], y=aq[:, 4]),
|
||||
set3=(x=aq[:, 5], y=aq[:, 6]),
|
||||
set4=(x=aq[:, 7], y=aq[:, 8]));
|
||||
|
||||
using DataFrames
|
||||
|
||||
# Code for listing 7.1
|
||||
# Code for listing 10.1
|
||||
|
||||
aq1 = ataFrame(aq, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
||||
aq1 = DataFrame(aq, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
||||
DataFrame(aq, [:x1, :y1, :x2, :y2, :x3, :y3, :x4, :y4])
|
||||
|
||||
# Code for creating DataFrame with automatic column names
|
||||
@ -38,7 +33,12 @@ aq_vec = collect(eachcol(aq))
|
||||
DataFrame(aq_vec, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
||||
DataFrame(aq_vec, :auto)
|
||||
|
||||
# Codes for section 7.1.2
|
||||
# Codes for section 10.1.2
|
||||
|
||||
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
||||
set2=(x=aq[:, 3], y=aq[:, 4]),
|
||||
set3=(x=aq[:, 5], y=aq[:, 6]),
|
||||
set4=(x=aq[:, 7], y=aq[:, 8]));
|
||||
|
||||
data.set1.x
|
||||
|
||||
@ -84,7 +84,11 @@ df.x
|
||||
|
||||
DataFrame(x=[1], y=[1, 2, 3])
|
||||
|
||||
# Codes for section 7.1.3
|
||||
using RCall
|
||||
r_df = R"data.frame(a=1:6, b=1:2, c=1:3)"
|
||||
julia_df = rcopy(r_df)
|
||||
|
||||
# Codes for section 10.1.3
|
||||
|
||||
data.set1
|
||||
DataFrame(data.set1)
|
||||
@ -93,11 +97,11 @@ DataFrame([(a=1, b=2), (a=3, b=4), (a=5, b=6)])
|
||||
|
||||
data
|
||||
|
||||
# Code for listing 7.2
|
||||
# Code for listing 10.2
|
||||
|
||||
aq2 = DataFrame(data)
|
||||
|
||||
# Codes for listing 7.3
|
||||
# Codes for listing 10.3
|
||||
|
||||
data_dfs = map(DataFrame, data)
|
||||
|
||||
@ -114,14 +118,14 @@ vcat(data_dfs.set1, data_dfs.set2, data_dfs.set3, data_dfs.set4;
|
||||
reduce(vcat, collect(data_dfs);
|
||||
source="source_id"=>string.("set", 1:4))
|
||||
|
||||
# Code for listing 7.4
|
||||
# Code for listing 10.4
|
||||
|
||||
df1 = DataFrame(a=1:3, b=11:13)
|
||||
df2 = DataFrame(a=4:6, c=24:26)
|
||||
vcat(df1, df2)
|
||||
vcat(df1, df2; cols=:union)
|
||||
|
||||
# Code for listing 7.5
|
||||
# Code for listing 10.5
|
||||
|
||||
df_agg = DataFrame()
|
||||
append!(df_agg, data_dfs.set1)
|
||||
@ -140,7 +144,7 @@ df2 = DataFrame(a=4:6, b=[14, missing, 16])
|
||||
append!(df1, df2)
|
||||
append!(df1, df2; promote=true)
|
||||
|
||||
# Code for section 7.2.3
|
||||
# Code for section 10.2.3
|
||||
|
||||
df = DataFrame()
|
||||
push!(df, (a=1, b=2))
|
||||
@ -188,7 +192,7 @@ range(1, 5)
|
||||
|
||||
(3/4)^9
|
||||
|
||||
# Code for listing 7.6
|
||||
# Code for listing 10.6
|
||||
|
||||
function walk_unique() #A
|
||||
walk = DataFrame(x=0, y=0)
|
||||
@ -201,79 +205,8 @@ end
|
||||
Random.seed!(2);
|
||||
proptable([walk_unique() for _ in 1:10^5])
|
||||
|
||||
# Code for a note on conversion
|
||||
# code for serialization
|
||||
|
||||
x = [1.5]
|
||||
x[1] = 1
|
||||
x
|
||||
|
||||
# Code from section 7.3.1
|
||||
|
||||
Matrix(walk)
|
||||
Matrix{Any}(walk)
|
||||
Matrix{String}(walk)
|
||||
|
||||
plot(walk)
|
||||
|
||||
plot(Matrix(walk); labels=["x" "y"] , legend=:topleft)
|
||||
|
||||
# Code from section 7.3.2
|
||||
|
||||
Tables.columntable(walk)
|
||||
|
||||
using BenchmarkTools
|
||||
function mysum(table)
|
||||
s = 0 #A
|
||||
for v in table.x #B
|
||||
s += v
|
||||
end
|
||||
return s
|
||||
end
|
||||
df = DataFrame(x=1:1_000_000);
|
||||
@btime mysum($df)
|
||||
|
||||
tab = Tables.columntable(df);
|
||||
@btime mysum($tab)
|
||||
|
||||
@code_warntype mysum(df)
|
||||
|
||||
@code_warntype mysum(tab)
|
||||
|
||||
typeof(tab)
|
||||
|
||||
function barrier_mysum2(x)
|
||||
s = 0
|
||||
for v in x
|
||||
s += v
|
||||
end
|
||||
return s
|
||||
end
|
||||
mysum2(table) = barrier_mysum2(table.x)
|
||||
@btime mysum2($df)
|
||||
|
||||
df = DataFrame(a=[1, 1, 2], b=[1, 1, 2])
|
||||
unique(df)
|
||||
|
||||
tab = Tables.columntable(df)
|
||||
unique(tab)
|
||||
|
||||
# Code from section 7.3.3
|
||||
|
||||
Tables.rowtable(walk)
|
||||
|
||||
nti = Tables.namedtupleiterator(walk)
|
||||
for v in nti
|
||||
println(v)
|
||||
end
|
||||
|
||||
er = eachrow(walk)
|
||||
er[1]
|
||||
er[end]
|
||||
ec = eachcol(walk)
|
||||
ec[1]
|
||||
ec[end]
|
||||
|
||||
identity.(eachcol(walk))
|
||||
|
||||
df = DataFrame(x=1:2, b=["a", "b"])
|
||||
identity.(eachcol(df))
|
||||
using Serialization
|
||||
serialize("walk.bin", walk)
|
||||
deserialize("walk.bin") == walk
|
||||
|
85
ch11.jl
Normal file
85
ch11.jl
Normal file
@ -0,0 +1,85 @@
|
||||
# Bogumił Kamiński, 2022
|
||||
|
||||
# Codes for chapter 11
|
||||
|
||||
# Code for section 11.1
|
||||
|
||||
# Code for a note on conversion
|
||||
|
||||
x = [1.5]
|
||||
x[1] = 1
|
||||
x
|
||||
|
||||
# Code from section 11.1.1
|
||||
|
||||
using Serialization
|
||||
walk = deserialize("walk.bin")
|
||||
|
||||
Matrix(walk)
|
||||
Matrix{Any}(walk)
|
||||
Matrix{String}(walk)
|
||||
|
||||
plot(walk)
|
||||
|
||||
plot(Matrix(walk); labels=["x" "y"] , legend=:topleft)
|
||||
|
||||
# Code from section 11.1.2
|
||||
|
||||
Tables.columntable(walk)
|
||||
|
||||
using BenchmarkTools
|
||||
function mysum(table)
|
||||
s = 0 #A
|
||||
for v in table.x #B
|
||||
s += v
|
||||
end
|
||||
return s
|
||||
end
|
||||
df = DataFrame(x=1:1_000_000);
|
||||
@btime mysum($df)
|
||||
|
||||
tab = Tables.columntable(df);
|
||||
@btime mysum($tab)
|
||||
|
||||
@code_warntype mysum(df)
|
||||
|
||||
@code_warntype mysum(tab)
|
||||
|
||||
typeof(tab)
|
||||
|
||||
function barrier_mysum2(x)
|
||||
s = 0
|
||||
for v in x
|
||||
s += v
|
||||
end
|
||||
return s
|
||||
end
|
||||
mysum2(table) = barrier_mysum2(table.x)
|
||||
@btime mysum2($df)
|
||||
|
||||
df = DataFrame(a=[1, 1, 2], b=[1, 1, 2])
|
||||
unique(df)
|
||||
|
||||
tab = Tables.columntable(df)
|
||||
unique(tab)
|
||||
|
||||
# Code from section 11.1.3
|
||||
|
||||
Tables.rowtable(walk)
|
||||
|
||||
nti = Tables.namedtupleiterator(walk)
|
||||
for v in nti
|
||||
println(v)
|
||||
end
|
||||
|
||||
er = eachrow(walk)
|
||||
er[1]
|
||||
er[end]
|
||||
ec = eachcol(walk)
|
||||
ec[1]
|
||||
ec[end]
|
||||
|
||||
identity.(eachcol(walk))
|
||||
|
||||
df = DataFrame(x=1:2, b=["a", "b"])
|
||||
identity.(eachcol(df))
|
20
ch12.jl
20
ch12.jl
@ -1,10 +1,10 @@
|
||||
# Bogumił Kamiński, 2022
|
||||
|
||||
# Codes for chapter 8
|
||||
# Codes for chapter 12
|
||||
|
||||
# Codes for section 8.1
|
||||
# Codes for section 12.1
|
||||
|
||||
# Code for listing 8.1
|
||||
# Code for listing 12.1
|
||||
|
||||
import Downloads
|
||||
using SHA
|
||||
@ -29,7 +29,7 @@ open(sha256, git_zip) == [0x56, 0xc0, 0xc1, 0xc2,
|
||||
import ZipFile
|
||||
git_archive = ZipFile.Reader(git_zip)
|
||||
|
||||
# Code for listing 8.2
|
||||
# Code for listing 12.2
|
||||
|
||||
function ingest_to_df(archive::ZipFile.Reader, filename::AbstractString)
|
||||
idx = only(findall(x -> x.name == filename, archive.files))
|
||||
@ -48,7 +48,7 @@ findall(x -> x.name == "", git_archive.files)
|
||||
only(findall(x -> x.name == "git_web_ml/musae_git_edges.csv", git_archive.files))
|
||||
only(findall(x -> x.name == "", git_archive.files))
|
||||
|
||||
# Code for listing 8.3
|
||||
# Code for listing 12.3
|
||||
|
||||
using CSV
|
||||
using DataFrames
|
||||
@ -91,9 +91,9 @@ df[:, :b] = ["x", "y", "z"]
|
||||
df[:, :c] = [11, 12, 13]
|
||||
df
|
||||
|
||||
# Codes for section 8.2
|
||||
# Codes for section 12.2
|
||||
|
||||
# Code from listing 8.4
|
||||
# Code from listing 12.4
|
||||
|
||||
using Graphs
|
||||
gh = SimpleGraph(nrow(classes_df))
|
||||
@ -144,7 +144,7 @@ dump(e1)
|
||||
e1.src
|
||||
e1.dst
|
||||
|
||||
# Code for listing 8.5
|
||||
# Code for listing 12.5
|
||||
|
||||
function deg_class(gh, class)
|
||||
deg_ml = zeros(Int, length(class))
|
||||
@ -193,7 +193,7 @@ df = DataFrame(a=1, b=11)
|
||||
push!(df.a, 2)
|
||||
df
|
||||
|
||||
# Codes for section 8.3
|
||||
# Codes for section 12.3
|
||||
|
||||
# Code for computing groupwise means of columns
|
||||
|
||||
@ -242,7 +242,7 @@ describe(agg_df)
|
||||
|
||||
log1p(0)
|
||||
|
||||
# Code for listing 8.6
|
||||
# Code for listing 12.6
|
||||
|
||||
function gen_ticks(maxv)
|
||||
max2 = round(Int, log2(maxv))
|
||||
|
Loading…
Reference in New Issue
Block a user