add chapter 6
This commit is contained in:
parent
2ca7eb6737
commit
8b39fdb427
@ -77,6 +77,12 @@ git-tree-sha1 = "bf98fa45a0a4cee295de98d4c1462be26345b9a1"
|
||||
uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
|
||||
version = "0.1.2"
|
||||
|
||||
[[deps.CodecBzip2]]
|
||||
deps = ["Bzip2_jll", "Libdl", "TranscodingStreams"]
|
||||
git-tree-sha1 = "2e62a725210ce3c3c2e1a3080190e7ca491f18d7"
|
||||
uuid = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd"
|
||||
version = "0.7.2"
|
||||
|
||||
[[deps.CodecZlib]]
|
||||
deps = ["TranscodingStreams", "Zlib_jll"]
|
||||
git-tree-sha1 = "ded953804d019afa9a3f98981d99b33e3db7b6da"
|
||||
@ -508,6 +514,12 @@ version = "2.36.0+0"
|
||||
deps = ["Libdl", "libblastrampoline_jll"]
|
||||
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
|
||||
|
||||
[[deps.Loess]]
|
||||
deps = ["Distances", "LinearAlgebra", "Statistics"]
|
||||
git-tree-sha1 = "46efcea75c890e5d820e670516dc156689851722"
|
||||
uuid = "4345ca2d-374a-55d4-8d30-97f9976e7612"
|
||||
version = "0.5.4"
|
||||
|
||||
[[deps.LogExpFunctions]]
|
||||
deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
|
||||
git-tree-sha1 = "e5718a00af0ab9756305a0392832c8952c7426c1"
|
||||
|
@ -1,6 +1,7 @@
|
||||
[deps]
|
||||
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
|
||||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||
CodecBzip2 = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd"
|
||||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||
FreqTables = "da1fdf0e-e0ff-5433-a45f-9bb5ff651cb1"
|
||||
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
|
||||
@ -8,6 +9,7 @@ HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
|
||||
Impute = "f7bf1975-0170-51b9-8c5f-a992d46b9575"
|
||||
InlineStrings = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48"
|
||||
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
|
||||
Loess = "4345ca2d-374a-55d4-8d30-97f9976e7612"
|
||||
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
|
||||
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
|
||||
PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
|
||||
|
@ -38,4 +38,8 @@ To work with codes from some given chapter:
|
||||
* execute the commands sequentially as they appear in the file;
|
||||
the codes were prepared in a way that you do not need to restart Julia
|
||||
when working with material from a single chapter, unless it is explicitly
|
||||
written in the instructions to restart Julia (some of the codes require this).
|
||||
written in the instructions to restart Julia (some of the codes require this);
|
||||
* before each code there is a comment allowing you to locate the relevant part
|
||||
of the book where it is used; if in the code there is a blank line between
|
||||
consecutive code sections this means that in the book these codes are
|
||||
separated by the text of the book explaining what the code does
|
248
ch06.jl
Normal file
248
ch06.jl
Normal file
@ -0,0 +1,248 @@
|
||||
# Bogumił Kamiński, 2022
|
||||
|
||||
# Codes for chapter 6
|
||||
|
||||
# Code for section 6.1
|
||||
|
||||
if isfile("puzzles.csv.bz2")
|
||||
@info "file already present"
|
||||
else
|
||||
@info "fetching file"
|
||||
download("https://database.lichess.org/" *
|
||||
"lichess_db_puzzle.csv.bz2",
|
||||
"puzzles.csv.bz2")
|
||||
end
|
||||
|
||||
using CodecBzip2
|
||||
compressed = read("puzzles.csv.bz2")
|
||||
plain = transcode(Bzip2Decompressor, compressed)
|
||||
|
||||
open("puzzles.csv", "w") do io
|
||||
println(io, "PuzzleId,FEN,Moves,Rating,RatingDeviation," *
|
||||
"Popularity,NbPlays,Themes,GameUrl")
|
||||
write(io, plain)
|
||||
end
|
||||
|
||||
readlines("puzzles.csv")
|
||||
|
||||
# Code for section 6.2
|
||||
|
||||
using CSV
|
||||
using DataFrames
|
||||
puzzles = CSV.read("puzzles.csv", DataFrame);
|
||||
|
||||
CSV.read(plain, DataFrame);
|
||||
|
||||
compressed = nothing
|
||||
plain = nothing
|
||||
|
||||
# Code for listing 6.1
|
||||
|
||||
puzzles
|
||||
|
||||
# Code for listing 6.2
|
||||
|
||||
describe(puzzles)
|
||||
|
||||
# Code for getting basic information about a data frame
|
||||
|
||||
ncol(puzzles)
|
||||
|
||||
nrow(puzzles)
|
||||
|
||||
names(puzzles)
|
||||
|
||||
# Code for section 6.3
|
||||
|
||||
puzzles.Rating
|
||||
|
||||
using BenchmarkTools
|
||||
@benchmark $puzzles.Rating
|
||||
|
||||
puzzles.Rating == copy(puzzles.Rating)
|
||||
|
||||
puzzles.Rating === copy(puzzles.Rating)
|
||||
|
||||
puzzles.Rating === puzzles.Rating
|
||||
|
||||
copy(puzzles.Rating) === copy(puzzles.Rating)
|
||||
|
||||
puzzles."Rating"
|
||||
|
||||
col = "Rating"
|
||||
|
||||
data_frame_name[selected_rows, selected_columns]
|
||||
|
||||
puzzles[:, "Rating"]
|
||||
puzzles[:, :Rating]
|
||||
puzzles[:, 4]
|
||||
puzzles[:, col]
|
||||
|
||||
columnindex(puzzles, "Rating")
|
||||
|
||||
columnindex(puzzles, "Some fancy column name")
|
||||
|
||||
hasproperty(puzzles, "Rating")
|
||||
hasproperty(puzzles, "Some fancy column name")
|
||||
|
||||
@benchmark $puzzles[:, :Rating]
|
||||
|
||||
puzzles[!, "Rating"]
|
||||
puzzles[!, :Rating]
|
||||
puzzles[!, 4]
|
||||
puzzles[!, col]
|
||||
|
||||
plot(histogram(puzzles.Rating, label="Rating"),
|
||||
histogram(puzzles.RatingDeviation, label="RatingDeviation"),
|
||||
histogram(puzzles.Popularity, label="Popularity"),
|
||||
histogram(puzzles.NbPlays, label="NbPlays"))
|
||||
|
||||
plot([histogram(puzzles[!, col], label=col) for
|
||||
col in ["Rating", "RatingDeviation",
|
||||
"Popularity", "NbPlays"]]...)
|
||||
|
||||
# Code for section 6.4
|
||||
|
||||
using Statistics
|
||||
plays_lo = median(puzzles.NbPlays)
|
||||
puzzles.NbPlays .> plays_lo
|
||||
|
||||
puzzles.NbPlays > plays_lo
|
||||
|
||||
rating_lo = 1500
|
||||
rating_hi = quantile(puzzles.Rating, 0.99)
|
||||
rating_lo .< puzzles.Rating .< rating_hi
|
||||
|
||||
row_selector = (puzzles.NbPlays .> plays_lo) .&&
|
||||
(rating_lo .< puzzles.Rating .< rating_hi)
|
||||
|
||||
sum(row_selector)
|
||||
count(row_selector)
|
||||
|
||||
# Code for listing 6.3
|
||||
|
||||
good = puzzles[row_selector, ["Rating", "Popularity"]]
|
||||
|
||||
# Code for plotting histograms
|
||||
|
||||
plot(histogram(good.Rating, label="Rating"),
|
||||
histogram(good.Popularity, label="Popularity"))
|
||||
|
||||
# Code for column selectors
|
||||
|
||||
puzzles[1, "Rating"]
|
||||
|
||||
puzzles[:, "Rating"]
|
||||
|
||||
row1 = puzzles[1, ["Rating", "Popularity"]]
|
||||
|
||||
row1["Rating"]
|
||||
row1[:Rating]
|
||||
row1[1]
|
||||
row1.Rating
|
||||
row1."Rating"
|
||||
|
||||
good = puzzles[row_selector, ["Rating", "Popularity"]]
|
||||
|
||||
good[1, "Rating"]
|
||||
good[1, :]
|
||||
good[:, "Rating"]
|
||||
good[:, :]
|
||||
|
||||
names(puzzles, ["Rating", "Popularity"])
|
||||
names(puzzles, [:Rating, :Popularity])
|
||||
names(puzzles, [4, 6])
|
||||
names(puzzles, [false, false, false, true, false, true, false, false, false])
|
||||
names(puzzles, r"Rating")
|
||||
names(puzzles, Not([4, 6]))
|
||||
names(puzzles, Not(r"Rating"))
|
||||
names(puzzles, Between("Rating", "Popularity"))
|
||||
names(puzzles, :)
|
||||
names(puzzles, All())
|
||||
names(puzzles, Cols(r"Rating", "NbPlays"))
|
||||
names(puzzles, Cols(startswith("P")))
|
||||
|
||||
names(puzzles, startswith("P"))
|
||||
|
||||
names(puzzles, Real)
|
||||
|
||||
names(puzzles, AbstractString)
|
||||
|
||||
puzzles[:, names(puzzles, Real)]
|
||||
|
||||
# Code for row subsetting
|
||||
|
||||
df1 = puzzles[:, ["Rating", "Popularity"]];
|
||||
df2 = puzzles[!, ["Rating", "Popularity"]];
|
||||
|
||||
df1 == df2
|
||||
df1 == puzzles
|
||||
df2 == puzzles
|
||||
|
||||
df1.Rating === puzzles.Rating
|
||||
df1.Popularity === puzzles.Popularity
|
||||
df2.Rating === puzzles.Rating
|
||||
df2.Popularity === puzzles.Popularity
|
||||
|
||||
@benchmark $puzzles[:, ["Rating", "Popularity"]]
|
||||
@benchmark $puzzles[!, ["Rating", "Popularity"]]
|
||||
|
||||
puzzles[1, 1]
|
||||
puzzles[[1], 1]
|
||||
puzzles[1, [1]]
|
||||
puzzles[[1], [1]]
|
||||
|
||||
# Code for making views
|
||||
|
||||
@view puzzles[1, 1]
|
||||
|
||||
@view puzzles[[1], 1]
|
||||
|
||||
@view puzzles[1, [1]]
|
||||
|
||||
@view puzzles[[1], [1]]
|
||||
|
||||
@btime $puzzles[$row_selector, ["Rating", "Popularity"]];
|
||||
@btime @view $puzzles[$row_selector, ["Rating", "Popularity"]];
|
||||
|
||||
parentindices(@view puzzles[row_selector, ["Rating", "Popularity"]])
|
||||
|
||||
# Code for section 6.5
|
||||
|
||||
describe(good)
|
||||
|
||||
rating_mapping = Dict{Int, Vector{Int}}()
|
||||
for (i, rating) in enumerate(good.Rating)
|
||||
if haskey(rating_mapping, rating)
|
||||
push!(rating_mapping[rating], i)
|
||||
else
|
||||
rating_mapping[rating] = [i]
|
||||
end
|
||||
end
|
||||
rating_mapping
|
||||
|
||||
good[rating_mapping[2108], :]
|
||||
|
||||
unique(good[rating_mapping[2108], :].Rating)
|
||||
|
||||
using Statistics
|
||||
mean(good[rating_mapping[2108], "Popularity"])
|
||||
|
||||
ratings = unique(good.Rating)
|
||||
|
||||
mean_popularities = map(ratings) do rating
|
||||
indices = rating_mapping[rating]
|
||||
popularities = good[indices, "Popularity"]
|
||||
return mean(popularities)
|
||||
end
|
||||
|
||||
using Plots
|
||||
scatter(ratings, mean_popularities;
|
||||
xlabel="rating", ylabel="mean popularity", legend=false)
|
||||
|
||||
import Loess
|
||||
model = Loess.loess(ratings, mean_popularities);
|
||||
ratings_predict = float.(sort(ratings))
|
||||
popularity_predict = Loess.predict(model, ratings_predict)
|
||||
|
||||
plot!(ratings_predict, popularity_predict, width=5, color="black")
|
Loading…
Reference in New Issue
Block a user