reorganize chapters
This commit is contained in:
139
ch02.jl
139
ch02.jl
@@ -329,142 +329,3 @@ function fun6()
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
fun6()
|
fun6()
|
||||||
|
|
||||||
# Code from section 2.6
|
|
||||||
|
|
||||||
methods(cd)
|
|
||||||
|
|
||||||
sum isa Function
|
|
||||||
|
|
||||||
typeof(sum)
|
|
||||||
typeof(sum) == Function
|
|
||||||
|
|
||||||
supertype(typeof(sum))
|
|
||||||
|
|
||||||
function traverse(T)
|
|
||||||
println(T)
|
|
||||||
T == Any || traverse(supertype(T))
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
traverse(Int64)
|
|
||||||
|
|
||||||
function print_subtypes(T, indent_level=0)
|
|
||||||
println(" " ^ indent_level, T)
|
|
||||||
for S in subtypes(T)
|
|
||||||
print_subtypes(S, indent_level + 2)
|
|
||||||
end
|
|
||||||
return nothing
|
|
||||||
end
|
|
||||||
print_subtypes(Integer)
|
|
||||||
|
|
||||||
traverse(typeof([1.0, 2.0, 3.0]))
|
|
||||||
traverse(typeof(1:3))
|
|
||||||
|
|
||||||
AbstractVector
|
|
||||||
|
|
||||||
typejoin(typeof([1.0, 2.0, 3.0]), typeof(1:3))
|
|
||||||
|
|
||||||
# Code from section 2.7
|
|
||||||
|
|
||||||
fun(x) = println("unsupported type")
|
|
||||||
fun(x::Number) = println("a number was passed")
|
|
||||||
fun(x::Float64) = println("a Float64 value")
|
|
||||||
methods(fun)
|
|
||||||
|
|
||||||
fun("hello!")
|
|
||||||
fun(1)
|
|
||||||
fun(1.0)
|
|
||||||
|
|
||||||
bar(x, y) = "no numbers passed"
|
|
||||||
bar(x::Number, y) = "first argument is a number"
|
|
||||||
bar(x, y::Number) = "second argument is a number"
|
|
||||||
bar("hello", "world")
|
|
||||||
bar(1, "world")
|
|
||||||
bar("hello", 2)
|
|
||||||
bar(1, 2)
|
|
||||||
|
|
||||||
bar(x::Number, y::Number) = "both arguments are numbers"
|
|
||||||
bar(1, 2)
|
|
||||||
methods(bar)
|
|
||||||
|
|
||||||
function winsorized_mean(x::AbstractVector, k::Integer)
|
|
||||||
k >= 0 || throw(ArgumentError("k must be non-negative"))
|
|
||||||
length(x) > 2 * k || throw(ArgumentError("k is too large"))
|
|
||||||
y = sort!(collect(x))
|
|
||||||
for i in 1:k
|
|
||||||
y[i] = y[k + 1]
|
|
||||||
y[end - i + 1] = y[end - k]
|
|
||||||
end
|
|
||||||
return sum(y) / length(y)
|
|
||||||
end
|
|
||||||
|
|
||||||
winsorized_mean([8, 3, 1, 5, 7], 1)
|
|
||||||
winsorized_mean(1:10, 2)
|
|
||||||
winsorized_mean(1:10, "a")
|
|
||||||
winsorized_mean(10, 1)
|
|
||||||
|
|
||||||
winsorized_mean(1:10, -1)
|
|
||||||
winsorized_mean(1:10, 5)
|
|
||||||
|
|
||||||
# Code from section 2.8
|
|
||||||
|
|
||||||
module ExampleModule
|
|
||||||
|
|
||||||
function example()
|
|
||||||
println("Hello")
|
|
||||||
end
|
|
||||||
|
|
||||||
end # ExampleModule
|
|
||||||
|
|
||||||
import Statistics
|
|
||||||
x = [1, 2, 3]
|
|
||||||
mean(x)
|
|
||||||
Statistics.mean(x)
|
|
||||||
|
|
||||||
using Statistics
|
|
||||||
mean(x)
|
|
||||||
|
|
||||||
# start a fresh Julia session before running this code
|
|
||||||
mean = 1
|
|
||||||
using Statistics
|
|
||||||
mean
|
|
||||||
|
|
||||||
# start a fresh Julia session before running this code
|
|
||||||
using Statistics
|
|
||||||
mean([1, 2, 3])
|
|
||||||
mean = 1
|
|
||||||
|
|
||||||
# start a fresh Julia session before running this code
|
|
||||||
using Statistics
|
|
||||||
mean = 1
|
|
||||||
mean([1, 2, 3])
|
|
||||||
|
|
||||||
# start a fresh Julia session before running this code
|
|
||||||
using Statistics
|
|
||||||
using StatsBase
|
|
||||||
?winsor
|
|
||||||
mean(winsor([8, 3, 1, 5, 7], count=1))
|
|
||||||
|
|
||||||
# Code from section 2.9
|
|
||||||
|
|
||||||
@time 1 + 2
|
|
||||||
|
|
||||||
@time(1 + 2)
|
|
||||||
|
|
||||||
@assert 1 == 2 "1 is not equal 2"
|
|
||||||
@assert(1 == 2, "1 is not equal 2")
|
|
||||||
|
|
||||||
@macroexpand @assert(1 == 2, "1 is not equal 2")
|
|
||||||
|
|
||||||
@macroexpand @time 1 + 2
|
|
||||||
|
|
||||||
# before running these codes
|
|
||||||
# define the winsorized_mean function using the code from section 2.7
|
|
||||||
|
|
||||||
using BenchmarkTools
|
|
||||||
x = rand(10^6);
|
|
||||||
@benchmark winsorized_mean($x, 10^5)
|
|
||||||
using Statistics, StatsBase
|
|
||||||
@benchmark mean(winsor($x; count=10^5))
|
|
||||||
|
|
||||||
@edit winsor(x, count=10^5)
|
|
||||||
|
|||||||
475
ch03.jl
475
ch03.jl
@@ -2,358 +2,141 @@
|
|||||||
|
|
||||||
# Codes for chapter 3
|
# Codes for chapter 3
|
||||||
|
|
||||||
# Code for listing 3.1
|
# Code from section 3.1
|
||||||
|
|
||||||
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
methods(cd)
|
||||||
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
|
||||||
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
|
||||||
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
|
||||||
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
|
||||||
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
|
||||||
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
|
||||||
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
|
||||||
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
|
||||||
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
|
||||||
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
|
||||||
|
|
||||||
# Code for checking size of a matrix
|
sum isa Function
|
||||||
|
|
||||||
size(aq)
|
typeof(sum)
|
||||||
size(aq, 1)
|
typeof(sum) == Function
|
||||||
size(aq, 2)
|
|
||||||
|
|
||||||
# Code comparing tuple to a vector
|
supertype(typeof(sum))
|
||||||
|
|
||||||
v = [1, 2, 3]
|
function traverse(T)
|
||||||
t = (1, 2, 3)
|
println(T)
|
||||||
v[1]
|
T == Any || traverse(supertype(T))
|
||||||
t[1]
|
return nothing
|
||||||
v[1] = 10
|
end
|
||||||
v
|
traverse(Int64)
|
||||||
t[1] = 10
|
|
||||||
|
|
||||||
# Code for figure 3.2
|
function print_subtypes(T, indent_level=0)
|
||||||
|
println(" " ^ indent_level, T)
|
||||||
|
for S in subtypes(T)
|
||||||
|
print_subtypes(S, indent_level + 2)
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
print_subtypes(Integer)
|
||||||
|
|
||||||
|
traverse(typeof([1.0, 2.0, 3.0]))
|
||||||
|
traverse(typeof(1:3))
|
||||||
|
|
||||||
|
AbstractVector
|
||||||
|
|
||||||
|
typejoin(typeof([1.0, 2.0, 3.0]), typeof(1:3))
|
||||||
|
|
||||||
|
# Code from section 3.2
|
||||||
|
|
||||||
|
fun(x) = println("unsupported type")
|
||||||
|
fun(x::Number) = println("a number was passed")
|
||||||
|
fun(x::Float64) = println("a Float64 value")
|
||||||
|
methods(fun)
|
||||||
|
|
||||||
|
fun("hello!")
|
||||||
|
fun(1)
|
||||||
|
fun(1.0)
|
||||||
|
|
||||||
|
bar(x, y) = "no numbers passed"
|
||||||
|
bar(x::Number, y) = "first argument is a number"
|
||||||
|
bar(x, y::Number) = "second argument is a number"
|
||||||
|
bar("hello", "world")
|
||||||
|
bar(1, "world")
|
||||||
|
bar("hello", 2)
|
||||||
|
bar(1, 2)
|
||||||
|
|
||||||
|
bar(x::Number, y::Number) = "both arguments are numbers"
|
||||||
|
bar(1, 2)
|
||||||
|
methods(bar)
|
||||||
|
|
||||||
|
function winsorized_mean(x::AbstractVector, k::Integer)
|
||||||
|
k >= 0 || throw(ArgumentError("k must be non-negative"))
|
||||||
|
length(x) > 2 * k || throw(ArgumentError("k is too large"))
|
||||||
|
y = sort!(collect(x))
|
||||||
|
for i in 1:k
|
||||||
|
y[i] = y[k + 1]
|
||||||
|
y[end - i + 1] = y[end - k]
|
||||||
|
end
|
||||||
|
return sum(y) / length(y)
|
||||||
|
end
|
||||||
|
|
||||||
|
winsorized_mean([8, 3, 1, 5, 7], 1)
|
||||||
|
winsorized_mean(1:10, 2)
|
||||||
|
winsorized_mean(1:10, "a")
|
||||||
|
winsorized_mean(10, 1)
|
||||||
|
|
||||||
|
winsorized_mean(1:10, -1)
|
||||||
|
winsorized_mean(1:10, 5)
|
||||||
|
|
||||||
|
# Code from section 3.3
|
||||||
|
|
||||||
|
module ExampleModule
|
||||||
|
|
||||||
|
function example()
|
||||||
|
println("Hello")
|
||||||
|
end
|
||||||
|
|
||||||
|
end # ExampleModule
|
||||||
|
|
||||||
|
import Statistics
|
||||||
|
x = [1, 2, 3]
|
||||||
|
mean(x)
|
||||||
|
Statistics.mean(x)
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
mean(x)
|
||||||
|
|
||||||
|
# start a fresh Julia session before running this code
|
||||||
|
mean = 1
|
||||||
|
using Statistics
|
||||||
|
mean
|
||||||
|
|
||||||
|
# start a fresh Julia session before running this code
|
||||||
|
using Statistics
|
||||||
|
mean([1, 2, 3])
|
||||||
|
mean = 1
|
||||||
|
|
||||||
|
# start a fresh Julia session before running this code
|
||||||
|
using Statistics
|
||||||
|
mean = 1
|
||||||
|
mean([1, 2, 3])
|
||||||
|
|
||||||
|
# start a fresh Julia session before running this code
|
||||||
|
using Statistics
|
||||||
|
using StatsBase
|
||||||
|
?winsor
|
||||||
|
mean(winsor([8, 3, 1, 5, 7], count=1))
|
||||||
|
|
||||||
|
# Code from section 3.4
|
||||||
|
|
||||||
|
@time 1 + 2
|
||||||
|
|
||||||
|
@time(1 + 2)
|
||||||
|
|
||||||
|
@assert 1 == 2 "1 is not equal 2"
|
||||||
|
@assert(1 == 2, "1 is not equal 2")
|
||||||
|
|
||||||
|
@macroexpand @assert(1 == 2, "1 is not equal 2")
|
||||||
|
|
||||||
|
@macroexpand @time 1 + 2
|
||||||
|
|
||||||
|
# before running these codes
|
||||||
|
# define the winsorized_mean function using the code from section 3.1
|
||||||
|
|
||||||
using BenchmarkTools
|
using BenchmarkTools
|
||||||
@benchmark (1, 2, 3)
|
x = rand(10^6);
|
||||||
@benchmark [1, 2, 3]
|
@benchmark winsorized_mean($x, 10^5)
|
||||||
|
using Statistics, StatsBase
|
||||||
|
@benchmark mean(winsor($x; count=10^5))
|
||||||
|
|
||||||
# Code for section 3.1.2
|
@edit winsor(x, count=10^5)
|
||||||
|
|
||||||
using Statistics
|
|
||||||
mean(aq; dims=1)
|
|
||||||
std(aq; dims=1)
|
|
||||||
|
|
||||||
map(mean, eachcol(aq))
|
|
||||||
map(std, eachcol(aq))
|
|
||||||
|
|
||||||
map(eachcol(aq)) do col
|
|
||||||
mean(col)
|
|
||||||
end
|
|
||||||
|
|
||||||
[mean(col) for col in eachcol(aq)]
|
|
||||||
[std(col) for col in eachcol(aq)]
|
|
||||||
|
|
||||||
# Code for section 3.1.3
|
|
||||||
|
|
||||||
[mean(aq[:, j]) for j in axes(aq, 2)]
|
|
||||||
[std(aq[:, j]) for j in axes(aq, 2)]
|
|
||||||
|
|
||||||
axes(aq, 2)
|
|
||||||
?Base.OneTo
|
|
||||||
|
|
||||||
[mean(view(aq, :, j)) for j in axes(aq, 2)]
|
|
||||||
[std(@view aq[:, j]) for j in axes(aq, 2)]
|
|
||||||
|
|
||||||
# Code for section 3.1.4
|
|
||||||
|
|
||||||
using BenchmarkTools
|
|
||||||
x = ones(10^7, 10)
|
|
||||||
@benchmark [mean(@view $x[:, j]) for j in axes($x, 2)]
|
|
||||||
@benchmark [mean($x[:, j]) for j in axes($x, 2)]
|
|
||||||
@benchmark mean($x, dims=1)
|
|
||||||
|
|
||||||
# Code for section 3.1.5
|
|
||||||
|
|
||||||
[cor(aq[:, i], aq[:, i+1]) for i in 1:2:7]
|
|
||||||
collect(1:2:7)
|
|
||||||
|
|
||||||
# Code for section 3.1.6
|
|
||||||
|
|
||||||
y = aq[:, 2]
|
|
||||||
X = [ones(11) aq[:, 1]]
|
|
||||||
X \ y
|
|
||||||
[[ones(11) aq[:, i]] \ aq[:, i+1] for i in 1:2:7]
|
|
||||||
|
|
||||||
function R²(x, y)
|
|
||||||
X = [ones(11) x]
|
|
||||||
model = X \ y
|
|
||||||
prediction = X * model
|
|
||||||
error = y - prediction
|
|
||||||
SS_res = sum(v -> v ^ 2, error)
|
|
||||||
mean_y = mean(y)
|
|
||||||
SS_tot = sum(v -> (v - mean_y) ^ 2, y)
|
|
||||||
return 1 - SS_res / SS_tot
|
|
||||||
end
|
|
||||||
[R²(aq[:, i], aq[:, i+1]) for i in 1:2:7]
|
|
||||||
|
|
||||||
?²
|
|
||||||
|
|
||||||
# Code for section 3.1.7
|
|
||||||
|
|
||||||
using Plots
|
|
||||||
scatter(aq[:, 1], aq[:, 2]; legend=false)
|
|
||||||
|
|
||||||
plot(scatter(aq[:, 1], aq[:, 2]; legend=false),
|
|
||||||
scatter(aq[:, 3], aq[:, 4]; legend=false),
|
|
||||||
scatter(aq[:, 5], aq[:, 6]; legend=false),
|
|
||||||
scatter(aq[:, 7], aq[:, 8]; legend=false))
|
|
||||||
|
|
||||||
plot([scatter(aq[:, i], aq[:, i+1]; legend=false)
|
|
||||||
for i in 1:2:7]...)
|
|
||||||
|
|
||||||
# Code for section 3.2
|
|
||||||
|
|
||||||
two_standard = Dict{Int, Int}()
|
|
||||||
for i in [1, 2, 3, 4, 5, 6]
|
|
||||||
for j in [1, 2, 3, 4, 5, 6]
|
|
||||||
s = i + j
|
|
||||||
if haskey(two_standard, s)
|
|
||||||
two_standard[s] += 1
|
|
||||||
else
|
|
||||||
two_standard[s] = 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
two_standard
|
|
||||||
|
|
||||||
keys(two_standard)
|
|
||||||
values(two_standard)
|
|
||||||
|
|
||||||
using Plots
|
|
||||||
scatter(collect(keys(two_standard)), collect(values(two_standard));
|
|
||||||
legend=false, xaxis=2:12)
|
|
||||||
|
|
||||||
all_dice = [[1, x2, x3, x4, x5, x6]
|
|
||||||
for x2 in 2:11
|
|
||||||
for x3 in x2:11
|
|
||||||
for x4 in x3:11
|
|
||||||
for x5 in x4:11
|
|
||||||
for x6 in x5:11]
|
|
||||||
|
|
||||||
for d1 in all_dice, d2 in all_dice
|
|
||||||
test = Dict{Int, Int}()
|
|
||||||
for i in d1, j in d2
|
|
||||||
s = i + j
|
|
||||||
if haskey(test, s)
|
|
||||||
test[s] += 1
|
|
||||||
else
|
|
||||||
test[s] = 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
if test == two_standard
|
|
||||||
println(d1, " ", d2)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Code for section 3.3
|
|
||||||
|
|
||||||
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
|
||||||
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
|
||||||
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
|
||||||
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
|
||||||
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
|
||||||
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
|
||||||
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
|
||||||
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
|
||||||
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
|
||||||
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
|
||||||
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
|
||||||
|
|
||||||
dataset1 = (x=aq[:, 1], y=aq[:, 2])
|
|
||||||
|
|
||||||
dataset1[1]
|
|
||||||
dataset1.x
|
|
||||||
|
|
||||||
# Code for listing 3.2
|
|
||||||
|
|
||||||
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
|
||||||
set2=(x=aq[:, 3], y=aq[:, 4]),
|
|
||||||
set3=(x=aq[:, 5], y=aq[:, 6]),
|
|
||||||
set4=(x=aq[:, 7], y=aq[:, 8]))
|
|
||||||
|
|
||||||
# Code for section 3.3.2
|
|
||||||
|
|
||||||
using Statistics
|
|
||||||
map(s -> mean(s.x), data)
|
|
||||||
|
|
||||||
map(s -> cor(s.x, s.y), data)
|
|
||||||
|
|
||||||
using GLM
|
|
||||||
model = lm(@formula(y ~ x), data.set1)
|
|
||||||
|
|
||||||
r2(model)
|
|
||||||
|
|
||||||
# Code for section 3.3.3
|
|
||||||
|
|
||||||
model.mm
|
|
||||||
|
|
||||||
x = [3, 1, 2]
|
|
||||||
sort(x)
|
|
||||||
x
|
|
||||||
sort!(x)
|
|
||||||
x
|
|
||||||
|
|
||||||
empty_field!(nt, i) = empty!(nt[i])
|
|
||||||
nt = (dict = Dict("a" => 1, "b" => 2), int=10)
|
|
||||||
empty_field!(nt, 1)
|
|
||||||
nt
|
|
||||||
|
|
||||||
# Code for section 3.4.1
|
|
||||||
|
|
||||||
x = [1 2 3]
|
|
||||||
y = [1, 2, 3]
|
|
||||||
x * y
|
|
||||||
|
|
||||||
a = [1, 2, 3]
|
|
||||||
b = [4, 5, 6]
|
|
||||||
a * b
|
|
||||||
|
|
||||||
a .* b
|
|
||||||
|
|
||||||
map(*, a, b)
|
|
||||||
[a[i] * b[i] for i in eachindex(a, b)]
|
|
||||||
|
|
||||||
eachindex(a, b)
|
|
||||||
|
|
||||||
eachindex([1, 2, 3], [4, 5])
|
|
||||||
|
|
||||||
map(*, [1, 2, 3], [4, 5])
|
|
||||||
|
|
||||||
[1, 2, 3] .* [4, 5]
|
|
||||||
|
|
||||||
# Code for section 3.4.2
|
|
||||||
|
|
||||||
[1, 2, 3] .* [4]
|
|
||||||
|
|
||||||
[1, 2, 3] .^ 2
|
|
||||||
|
|
||||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] .* [1 2 3 4 5 6 7 8 9 10]
|
|
||||||
|
|
||||||
["x", "y", "z"] .=> [sum minimum maximum]
|
|
||||||
|
|
||||||
abs.([1, -2, 3, -4])
|
|
||||||
|
|
||||||
abs([1, 2, 3])
|
|
||||||
|
|
||||||
string(1, 2, 3)
|
|
||||||
|
|
||||||
string.("x", 1:10)
|
|
||||||
|
|
||||||
f(i::Int) = string("got integer ", i)
|
|
||||||
f(s::String) = string("got string ", s)
|
|
||||||
f.([1, "1"])
|
|
||||||
|
|
||||||
# Code for section 3.4.3
|
|
||||||
|
|
||||||
in(1, [1, 2, 3])
|
|
||||||
in(4, [1, 2, 3])
|
|
||||||
|
|
||||||
in([1, 3, 5, 7, 9], [1, 2, 3, 4])
|
|
||||||
|
|
||||||
in.([1, 3, 5, 7, 9], [1, 2, 3, 4])
|
|
||||||
|
|
||||||
in.([1, 3, 5, 7, 9], Ref([1, 2, 3, 4]))
|
|
||||||
|
|
||||||
# Code for section 3.4.4
|
|
||||||
|
|
||||||
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
|
||||||
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
|
||||||
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
|
||||||
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
|
||||||
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
|
||||||
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
|
||||||
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
|
||||||
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
|
||||||
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
|
||||||
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
|
||||||
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
|
||||||
using Statistics
|
|
||||||
|
|
||||||
mean.(eachcol(aq))
|
|
||||||
|
|
||||||
mean(eachcol(aq))
|
|
||||||
|
|
||||||
function R²(x, y)
|
|
||||||
X = [ones(11) x]
|
|
||||||
model = X \ y
|
|
||||||
prediction = X * model
|
|
||||||
error = y - prediction
|
|
||||||
SS_res = sum(v -> v ^ 2, error)
|
|
||||||
mean_y = mean(y)
|
|
||||||
SS_tot = sum(v -> (v - mean_y) ^ 2, y)
|
|
||||||
return 1 - SS_res / SS_tot
|
|
||||||
end
|
|
||||||
|
|
||||||
function R²(x, y)
|
|
||||||
X = [ones(11) x]
|
|
||||||
model = X \ y
|
|
||||||
prediction = X * model
|
|
||||||
SS_res = sum((y .- prediction) .^ 2)
|
|
||||||
SS_tot = sum((y .- mean(y)) .^ 2)
|
|
||||||
return 1 - SS_res / SS_tot
|
|
||||||
end
|
|
||||||
|
|
||||||
# Code for section 3.5
|
|
||||||
|
|
||||||
[]
|
|
||||||
Dict()
|
|
||||||
|
|
||||||
Float64[1, 2, 3]
|
|
||||||
|
|
||||||
Dict{UInt8, Float64}(0 => 0, 1 => 1)
|
|
||||||
|
|
||||||
UInt32(200)
|
|
||||||
|
|
||||||
Real[1, 1.0, 0x3]
|
|
||||||
|
|
||||||
v1 = Any[1, 2, 3]
|
|
||||||
eltype(v1)
|
|
||||||
v2 = Float64[1, 2, 3]
|
|
||||||
eltype(v2)
|
|
||||||
v3 = [1, 2, 3]
|
|
||||||
eltype(v2)
|
|
||||||
d1 = Dict()
|
|
||||||
eltype(d1)
|
|
||||||
d2 = Dict(1 => 2, 3 => 4)
|
|
||||||
eltype(d2)
|
|
||||||
|
|
||||||
p = 1 => 2
|
|
||||||
typeof(p)
|
|
||||||
|
|
||||||
# Code for section 3.5.1
|
|
||||||
|
|
||||||
[1, 2, 3] isa AbstractVector{Int}
|
|
||||||
[1, 2, 3] isa AbstractVector{Real}
|
|
||||||
|
|
||||||
AbstractVector{<:Real}
|
|
||||||
|
|
||||||
# Code for section 3.5.2
|
|
||||||
|
|
||||||
using Statistics
|
|
||||||
function ourcov(x::AbstractVector{<:Real},
|
|
||||||
y::AbstractVector{<:Real})
|
|
||||||
len = length(x)
|
|
||||||
@assert len == length(y) > 0
|
|
||||||
return sum((x .- mean(x)) .* (y .- mean(y))) / (len - 1)
|
|
||||||
end
|
|
||||||
|
|
||||||
ourcov(1:4, [1.0, 3.0, 2.0, 4.0])
|
|
||||||
cov(1:4, [1.0, 3.0, 2.0, 4.0])
|
|
||||||
|
|
||||||
ourcov(1:4, Any[1.0, 3.0, 2.0, 4.0])
|
|
||||||
|
|
||||||
x = Any[1, 2, 3]
|
|
||||||
identity.(x)
|
|
||||||
y = Any[1, 2.0]
|
|
||||||
identity.(y)
|
|
||||||
|
|||||||
224
ch04.jl
224
ch04.jl
@@ -1,224 +0,0 @@
|
|||||||
# Bogumił Kamiński, 2022
|
|
||||||
|
|
||||||
# Codes for chapter 4
|
|
||||||
|
|
||||||
# Code for listing 4.1
|
|
||||||
|
|
||||||
import Downloads
|
|
||||||
Downloads.download("https://raw.githubusercontent.com/" *
|
|
||||||
"sidooms/MovieTweetings/" *
|
|
||||||
"44c525d0c766944910686c60697203cda39305d6/" *
|
|
||||||
"snapshots/10K/movies.dat",
|
|
||||||
"movies.dat")
|
|
||||||
|
|
||||||
# Code for string interpolation examples
|
|
||||||
|
|
||||||
x = 10
|
|
||||||
"I have $x apples"
|
|
||||||
|
|
||||||
"I have \$100."
|
|
||||||
"I have $100."
|
|
||||||
|
|
||||||
# Code for multiline strings
|
|
||||||
|
|
||||||
Downloads.download("https://raw.githubusercontent.com/\
|
|
||||||
sidooms/MovieTweetings/\
|
|
||||||
44c525d0c766944910686c60697203cda39305d6/\
|
|
||||||
snapshots/10K/movies.dat",
|
|
||||||
"movies.dat")
|
|
||||||
|
|
||||||
"a\
|
|
||||||
b\
|
|
||||||
c"
|
|
||||||
|
|
||||||
# Code for raw strings
|
|
||||||
|
|
||||||
"C:\my_folder\my_file.txt"
|
|
||||||
|
|
||||||
raw"C:\my_folder\my_file.txt"
|
|
||||||
|
|
||||||
# Code for listing 4.2
|
|
||||||
|
|
||||||
movies = readlines("movies.dat")
|
|
||||||
|
|
||||||
# Code for section 4.2
|
|
||||||
|
|
||||||
movie1 = first(movies)
|
|
||||||
|
|
||||||
movie1_parts = split(movie1, "::")
|
|
||||||
|
|
||||||
supertype(String)
|
|
||||||
supertype(SubString{String})
|
|
||||||
|
|
||||||
# Code for section 4.3
|
|
||||||
|
|
||||||
movie1_parts[2]
|
|
||||||
|
|
||||||
rx = r"(.*) \((\d{4})\)$"
|
|
||||||
|
|
||||||
m = match(rx, movie1_parts[2])
|
|
||||||
|
|
||||||
m[1]
|
|
||||||
m[2]
|
|
||||||
|
|
||||||
parse(Int, m[2])
|
|
||||||
|
|
||||||
# Code for listing 4.3
|
|
||||||
|
|
||||||
function parseline(line::String)
|
|
||||||
parts = split(line, "::")
|
|
||||||
m = match(r"(.*) \((\d{4})\)", parts[2])
|
|
||||||
return (id=parts[1],
|
|
||||||
name=m[1],
|
|
||||||
year=parse(Int, m[2]),
|
|
||||||
genres=split(parts[3], "|"))
|
|
||||||
end
|
|
||||||
|
|
||||||
# Code for parsing one line of movies data
|
|
||||||
|
|
||||||
record1 = parseline(movie1)
|
|
||||||
|
|
||||||
# Code for listing 4.4
|
|
||||||
|
|
||||||
codeunits("a")
|
|
||||||
codeunits("ε")
|
|
||||||
codeunits("∀")
|
|
||||||
|
|
||||||
# Codes for different patterns of string subsetting
|
|
||||||
|
|
||||||
word = first(record1.name, 8)
|
|
||||||
|
|
||||||
record1.name[1:8]
|
|
||||||
|
|
||||||
for i in eachindex(word)
|
|
||||||
println(i, ": ", word[i])
|
|
||||||
end
|
|
||||||
|
|
||||||
codeunits("ô")
|
|
||||||
|
|
||||||
codeunits("Fantômas")
|
|
||||||
|
|
||||||
isascii("Hello world!")
|
|
||||||
isascii("∀ x: x≥0")
|
|
||||||
|
|
||||||
word[1]
|
|
||||||
word[5]
|
|
||||||
|
|
||||||
# Code for section 4.5
|
|
||||||
|
|
||||||
records = parseline.(movies)
|
|
||||||
|
|
||||||
genres = String[]
|
|
||||||
for record in records
|
|
||||||
append!(genres, record.genres)
|
|
||||||
end
|
|
||||||
genres
|
|
||||||
|
|
||||||
using FreqTables
|
|
||||||
table = freqtable(genres)
|
|
||||||
sort!(table)
|
|
||||||
|
|
||||||
years = [record.year for record in records]
|
|
||||||
has_drama = ["Drama" in record.genres for record in records]
|
|
||||||
drama_prop = proptable(years, has_drama; margins=1)
|
|
||||||
|
|
||||||
# Code for listing 4.5
|
|
||||||
|
|
||||||
using Plots
|
|
||||||
|
|
||||||
plot(names(drama_prop, 1), drama_prop[:, 2]; legend=false,
|
|
||||||
xlabel="year", ylabel="Drama probability")
|
|
||||||
|
|
||||||
# Code for section 4.6.1
|
|
||||||
|
|
||||||
s1 = Symbol("x")
|
|
||||||
s2 = Symbol("hello world!")
|
|
||||||
s3 = Symbol("x", 1)
|
|
||||||
|
|
||||||
typeof(s1)
|
|
||||||
typeof(s2)
|
|
||||||
typeof(s3)
|
|
||||||
|
|
||||||
Symbol("1")
|
|
||||||
|
|
||||||
:x
|
|
||||||
:x1
|
|
||||||
|
|
||||||
:hello world
|
|
||||||
:1
|
|
||||||
|
|
||||||
# Code for section 4.6.2
|
|
||||||
|
|
||||||
supertype(Symbol)
|
|
||||||
|
|
||||||
:x == :x
|
|
||||||
:x == :y
|
|
||||||
|
|
||||||
# Code for listing 4.6
|
|
||||||
|
|
||||||
using BenchmarkTools
|
|
||||||
str = string.("x", 1:10^6)
|
|
||||||
symb = Symbol.(str)
|
|
||||||
@benchmark "x" in $str
|
|
||||||
@benchmark :x in $symb
|
|
||||||
|
|
||||||
# Code for section 4.7
|
|
||||||
|
|
||||||
using InlineStrings
|
|
||||||
s1 = InlineString("x")
|
|
||||||
typeof(s1)
|
|
||||||
s2 = InlineString("∀")
|
|
||||||
typeof(s2)
|
|
||||||
sv = inlinestrings(["The", "quick", "brown", "fox", "jumps",
|
|
||||||
"over", "the", "lazy", "dog"])
|
|
||||||
|
|
||||||
# Code for listing 4.7
|
|
||||||
|
|
||||||
using Random
|
|
||||||
using BenchmarkTools
|
|
||||||
Random.seed!(1234);
|
|
||||||
s1 = [randstring(3) for i in 1:10^6]
|
|
||||||
s2 = inlinestrings(s1)
|
|
||||||
|
|
||||||
# Code for analyzing properties of InlineStrings.jl
|
|
||||||
|
|
||||||
Base.summarysize(s1)
|
|
||||||
Base.summarysize(s2)
|
|
||||||
|
|
||||||
@benchmark sort($s1)
|
|
||||||
@benchmark sort($s2)
|
|
||||||
|
|
||||||
# Code for listing 4.8
|
|
||||||
|
|
||||||
open("iris.txt", "w") do io
|
|
||||||
for i in 1:10^6
|
|
||||||
println(io, "Iris setosa")
|
|
||||||
println(io, "Iris virginica")
|
|
||||||
println(io, "Iris versicolor")
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Code for section 4.8.2
|
|
||||||
|
|
||||||
uncompressed = readlines("iris.txt")
|
|
||||||
|
|
||||||
using PooledArrays
|
|
||||||
compressed = PooledArray(uncompressed)
|
|
||||||
|
|
||||||
Base.summarysize(uncompressed)
|
|
||||||
Base.summarysize(compressed)
|
|
||||||
|
|
||||||
# Code for section 4.8.3
|
|
||||||
|
|
||||||
compressed.invpool
|
|
||||||
compressed.pool
|
|
||||||
|
|
||||||
compressed[10]
|
|
||||||
compressed.pool[compressed.refs[10]]
|
|
||||||
|
|
||||||
Base.summarysize.(compressed.pool)
|
|
||||||
|
|
||||||
v1 = string.("x", 1:10^6)
|
|
||||||
v2 = PooledArray(v1)
|
|
||||||
Base.summarysize(v1)
|
|
||||||
Base.summarysize(v2)
|
|
||||||
359
ch045.jl
Normal file
359
ch045.jl
Normal file
@@ -0,0 +1,359 @@
|
|||||||
|
# Bogumił Kamiński, 2021
|
||||||
|
|
||||||
|
# Codes for chapter 3
|
||||||
|
|
||||||
|
# Code for listing 3.1
|
||||||
|
|
||||||
|
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||||
|
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||||
|
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||||
|
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||||
|
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||||
|
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||||
|
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||||
|
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||||
|
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||||
|
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||||
|
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
||||||
|
|
||||||
|
# Code for checking size of a matrix
|
||||||
|
|
||||||
|
size(aq)
|
||||||
|
size(aq, 1)
|
||||||
|
size(aq, 2)
|
||||||
|
|
||||||
|
# Code comparing tuple to a vector
|
||||||
|
|
||||||
|
v = [1, 2, 3]
|
||||||
|
t = (1, 2, 3)
|
||||||
|
v[1]
|
||||||
|
t[1]
|
||||||
|
v[1] = 10
|
||||||
|
v
|
||||||
|
t[1] = 10
|
||||||
|
|
||||||
|
# Code for figure 3.2
|
||||||
|
|
||||||
|
using BenchmarkTools
|
||||||
|
@benchmark (1, 2, 3)
|
||||||
|
@benchmark [1, 2, 3]
|
||||||
|
|
||||||
|
# Code for section 3.1.2
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
mean(aq; dims=1)
|
||||||
|
std(aq; dims=1)
|
||||||
|
|
||||||
|
map(mean, eachcol(aq))
|
||||||
|
map(std, eachcol(aq))
|
||||||
|
|
||||||
|
map(eachcol(aq)) do col
|
||||||
|
mean(col)
|
||||||
|
end
|
||||||
|
|
||||||
|
[mean(col) for col in eachcol(aq)]
|
||||||
|
[std(col) for col in eachcol(aq)]
|
||||||
|
|
||||||
|
# Code for section 3.1.3
|
||||||
|
|
||||||
|
[mean(aq[:, j]) for j in axes(aq, 2)]
|
||||||
|
[std(aq[:, j]) for j in axes(aq, 2)]
|
||||||
|
|
||||||
|
axes(aq, 2)
|
||||||
|
?Base.OneTo
|
||||||
|
|
||||||
|
[mean(view(aq, :, j)) for j in axes(aq, 2)]
|
||||||
|
[std(@view aq[:, j]) for j in axes(aq, 2)]
|
||||||
|
|
||||||
|
# Code for section 3.1.4
|
||||||
|
|
||||||
|
using BenchmarkTools
|
||||||
|
x = ones(10^7, 10)
|
||||||
|
@benchmark [mean(@view $x[:, j]) for j in axes($x, 2)]
|
||||||
|
@benchmark [mean($x[:, j]) for j in axes($x, 2)]
|
||||||
|
@benchmark mean($x, dims=1)
|
||||||
|
|
||||||
|
# Code for section 3.1.5
|
||||||
|
|
||||||
|
[cor(aq[:, i], aq[:, i+1]) for i in 1:2:7]
|
||||||
|
collect(1:2:7)
|
||||||
|
|
||||||
|
# Code for section 3.1.6
|
||||||
|
|
||||||
|
y = aq[:, 2]
|
||||||
|
X = [ones(11) aq[:, 1]]
|
||||||
|
X \ y
|
||||||
|
[[ones(11) aq[:, i]] \ aq[:, i+1] for i in 1:2:7]
|
||||||
|
|
||||||
|
function R²(x, y)
|
||||||
|
X = [ones(11) x]
|
||||||
|
model = X \ y
|
||||||
|
prediction = X * model
|
||||||
|
error = y - prediction
|
||||||
|
SS_res = sum(v -> v ^ 2, error)
|
||||||
|
mean_y = mean(y)
|
||||||
|
SS_tot = sum(v -> (v - mean_y) ^ 2, y)
|
||||||
|
return 1 - SS_res / SS_tot
|
||||||
|
end
|
||||||
|
[R²(aq[:, i], aq[:, i+1]) for i in 1:2:7]
|
||||||
|
|
||||||
|
?²
|
||||||
|
|
||||||
|
# Code for section 3.1.7
|
||||||
|
|
||||||
|
using Plots
|
||||||
|
scatter(aq[:, 1], aq[:, 2]; legend=false)
|
||||||
|
|
||||||
|
plot(scatter(aq[:, 1], aq[:, 2]; legend=false),
|
||||||
|
scatter(aq[:, 3], aq[:, 4]; legend=false),
|
||||||
|
scatter(aq[:, 5], aq[:, 6]; legend=false),
|
||||||
|
scatter(aq[:, 7], aq[:, 8]; legend=false))
|
||||||
|
|
||||||
|
plot([scatter(aq[:, i], aq[:, i+1]; legend=false)
|
||||||
|
for i in 1:2:7]...)
|
||||||
|
|
||||||
|
# Code for section 3.2
|
||||||
|
|
||||||
|
two_standard = Dict{Int, Int}()
|
||||||
|
for i in [1, 2, 3, 4, 5, 6]
|
||||||
|
for j in [1, 2, 3, 4, 5, 6]
|
||||||
|
s = i + j
|
||||||
|
if haskey(two_standard, s)
|
||||||
|
two_standard[s] += 1
|
||||||
|
else
|
||||||
|
two_standard[s] = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
two_standard
|
||||||
|
|
||||||
|
keys(two_standard)
|
||||||
|
values(two_standard)
|
||||||
|
|
||||||
|
using Plots
|
||||||
|
scatter(collect(keys(two_standard)), collect(values(two_standard));
|
||||||
|
legend=false, xaxis=2:12)
|
||||||
|
|
||||||
|
all_dice = [[1, x2, x3, x4, x5, x6]
|
||||||
|
for x2 in 2:11
|
||||||
|
for x3 in x2:11
|
||||||
|
for x4 in x3:11
|
||||||
|
for x5 in x4:11
|
||||||
|
for x6 in x5:11]
|
||||||
|
|
||||||
|
for d1 in all_dice, d2 in all_dice
|
||||||
|
test = Dict{Int, Int}()
|
||||||
|
for i in d1, j in d2
|
||||||
|
s = i + j
|
||||||
|
if haskey(test, s)
|
||||||
|
test[s] += 1
|
||||||
|
else
|
||||||
|
test[s] = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if test == two_standard
|
||||||
|
println(d1, " ", d2)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code for section 3.3
|
||||||
|
|
||||||
|
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||||
|
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||||
|
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||||
|
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||||
|
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||||
|
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||||
|
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||||
|
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||||
|
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||||
|
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||||
|
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
||||||
|
|
||||||
|
dataset1 = (x=aq[:, 1], y=aq[:, 2])
|
||||||
|
|
||||||
|
dataset1[1]
|
||||||
|
dataset1.x
|
||||||
|
|
||||||
|
# Code for listing 3.2
|
||||||
|
|
||||||
|
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
||||||
|
set2=(x=aq[:, 3], y=aq[:, 4]),
|
||||||
|
set3=(x=aq[:, 5], y=aq[:, 6]),
|
||||||
|
set4=(x=aq[:, 7], y=aq[:, 8]))
|
||||||
|
|
||||||
|
# Code for section 3.3.2
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
map(s -> mean(s.x), data)
|
||||||
|
|
||||||
|
map(s -> cor(s.x, s.y), data)
|
||||||
|
|
||||||
|
using GLM
|
||||||
|
model = lm(@formula(y ~ x), data.set1)
|
||||||
|
|
||||||
|
r2(model)
|
||||||
|
|
||||||
|
# Code for section 3.3.3
|
||||||
|
|
||||||
|
model.mm
|
||||||
|
|
||||||
|
x = [3, 1, 2]
|
||||||
|
sort(x)
|
||||||
|
x
|
||||||
|
sort!(x)
|
||||||
|
x
|
||||||
|
|
||||||
|
empty_field!(nt, i) = empty!(nt[i])
|
||||||
|
nt = (dict = Dict("a" => 1, "b" => 2), int=10)
|
||||||
|
empty_field!(nt, 1)
|
||||||
|
nt
|
||||||
|
|
||||||
|
# Code for section 3.4.1
|
||||||
|
|
||||||
|
x = [1 2 3]
|
||||||
|
y = [1, 2, 3]
|
||||||
|
x * y
|
||||||
|
|
||||||
|
a = [1, 2, 3]
|
||||||
|
b = [4, 5, 6]
|
||||||
|
a * b
|
||||||
|
|
||||||
|
a .* b
|
||||||
|
|
||||||
|
map(*, a, b)
|
||||||
|
[a[i] * b[i] for i in eachindex(a, b)]
|
||||||
|
|
||||||
|
eachindex(a, b)
|
||||||
|
|
||||||
|
eachindex([1, 2, 3], [4, 5])
|
||||||
|
|
||||||
|
map(*, [1, 2, 3], [4, 5])
|
||||||
|
|
||||||
|
[1, 2, 3] .* [4, 5]
|
||||||
|
|
||||||
|
# Code for section 3.4.2
|
||||||
|
|
||||||
|
[1, 2, 3] .* [4]
|
||||||
|
|
||||||
|
[1, 2, 3] .^ 2
|
||||||
|
|
||||||
|
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] .* [1 2 3 4 5 6 7 8 9 10]
|
||||||
|
|
||||||
|
["x", "y", "z"] .=> [sum minimum maximum]
|
||||||
|
|
||||||
|
abs.([1, -2, 3, -4])
|
||||||
|
|
||||||
|
abs([1, 2, 3])
|
||||||
|
|
||||||
|
string(1, 2, 3)
|
||||||
|
|
||||||
|
string.("x", 1:10)
|
||||||
|
|
||||||
|
f(i::Int) = string("got integer ", i)
|
||||||
|
f(s::String) = string("got string ", s)
|
||||||
|
f.([1, "1"])
|
||||||
|
|
||||||
|
# Code for section 3.4.3
|
||||||
|
|
||||||
|
in(1, [1, 2, 3])
|
||||||
|
in(4, [1, 2, 3])
|
||||||
|
|
||||||
|
in([1, 3, 5, 7, 9], [1, 2, 3, 4])
|
||||||
|
|
||||||
|
in.([1, 3, 5, 7, 9], [1, 2, 3, 4])
|
||||||
|
|
||||||
|
in.([1, 3, 5, 7, 9], Ref([1, 2, 3, 4]))
|
||||||
|
|
||||||
|
# Code for section 3.4.4
|
||||||
|
|
||||||
|
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||||
|
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||||
|
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||||
|
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||||
|
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||||
|
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||||
|
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||||
|
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||||
|
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||||
|
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||||
|
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
||||||
|
using Statistics
|
||||||
|
|
||||||
|
mean.(eachcol(aq))
|
||||||
|
|
||||||
|
mean(eachcol(aq))
|
||||||
|
|
||||||
|
function R²(x, y)
|
||||||
|
X = [ones(11) x]
|
||||||
|
model = X \ y
|
||||||
|
prediction = X * model
|
||||||
|
error = y - prediction
|
||||||
|
SS_res = sum(v -> v ^ 2, error)
|
||||||
|
mean_y = mean(y)
|
||||||
|
SS_tot = sum(v -> (v - mean_y) ^ 2, y)
|
||||||
|
return 1 - SS_res / SS_tot
|
||||||
|
end
|
||||||
|
|
||||||
|
function R²(x, y)
|
||||||
|
X = [ones(11) x]
|
||||||
|
model = X \ y
|
||||||
|
prediction = X * model
|
||||||
|
SS_res = sum((y .- prediction) .^ 2)
|
||||||
|
SS_tot = sum((y .- mean(y)) .^ 2)
|
||||||
|
return 1 - SS_res / SS_tot
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code for section 3.5
|
||||||
|
|
||||||
|
[]
|
||||||
|
Dict()
|
||||||
|
|
||||||
|
Float64[1, 2, 3]
|
||||||
|
|
||||||
|
Dict{UInt8, Float64}(0 => 0, 1 => 1)
|
||||||
|
|
||||||
|
UInt32(200)
|
||||||
|
|
||||||
|
Real[1, 1.0, 0x3]
|
||||||
|
|
||||||
|
v1 = Any[1, 2, 3]
|
||||||
|
eltype(v1)
|
||||||
|
v2 = Float64[1, 2, 3]
|
||||||
|
eltype(v2)
|
||||||
|
v3 = [1, 2, 3]
|
||||||
|
eltype(v2)
|
||||||
|
d1 = Dict()
|
||||||
|
eltype(d1)
|
||||||
|
d2 = Dict(1 => 2, 3 => 4)
|
||||||
|
eltype(d2)
|
||||||
|
|
||||||
|
p = 1 => 2
|
||||||
|
typeof(p)
|
||||||
|
|
||||||
|
# Code for section 3.5.1
|
||||||
|
|
||||||
|
[1, 2, 3] isa AbstractVector{Int}
|
||||||
|
[1, 2, 3] isa AbstractVector{Real}
|
||||||
|
|
||||||
|
AbstractVector{<:Real}
|
||||||
|
|
||||||
|
# Code for section 3.5.2
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
function ourcov(x::AbstractVector{<:Real},
|
||||||
|
y::AbstractVector{<:Real})
|
||||||
|
len = length(x)
|
||||||
|
@assert len == length(y) > 0
|
||||||
|
return sum((x .- mean(x)) .* (y .- mean(y))) / (len - 1)
|
||||||
|
end
|
||||||
|
|
||||||
|
ourcov(1:4, [1.0, 3.0, 2.0, 4.0])
|
||||||
|
cov(1:4, [1.0, 3.0, 2.0, 4.0])
|
||||||
|
|
||||||
|
ourcov(1:4, Any[1.0, 3.0, 2.0, 4.0])
|
||||||
|
|
||||||
|
x = Any[1, 2, 3]
|
||||||
|
identity.(x)
|
||||||
|
y = Any[1, 2.0]
|
||||||
|
identity.(y)
|
||||||
214
ch05.jl
214
ch05.jl
@@ -1,214 +0,0 @@
|
|||||||
# Bogumił Kamiński, 2022
|
|
||||||
|
|
||||||
# Codes for chapter 5
|
|
||||||
|
|
||||||
# Code for listing 5.1
|
|
||||||
|
|
||||||
using HTTP
|
|
||||||
using JSON3
|
|
||||||
query = "https://api.nbp.pl/api/exchangerates/rates/a/usd/" *
|
|
||||||
"2020-06-01/?format=json"
|
|
||||||
response = HTTP.get(query)
|
|
||||||
json = JSON3.read(response.body)
|
|
||||||
|
|
||||||
# Code for the remainder of section 5.1.2
|
|
||||||
|
|
||||||
response.body
|
|
||||||
|
|
||||||
String(response.body)
|
|
||||||
|
|
||||||
response.body
|
|
||||||
|
|
||||||
json.table
|
|
||||||
json.currency
|
|
||||||
json.code
|
|
||||||
json.rates
|
|
||||||
|
|
||||||
json.rates[1].mid
|
|
||||||
|
|
||||||
only(json.rates).mid
|
|
||||||
|
|
||||||
only([])
|
|
||||||
only([1, 2])
|
|
||||||
|
|
||||||
# Code for listing 5.2
|
|
||||||
|
|
||||||
query = "https://api.nbp.pl/api/exchangerates/rates/a/usd/" *
|
|
||||||
"2020-06-06/?format=json"
|
|
||||||
response = HTTP.get(query)
|
|
||||||
|
|
||||||
# Code for listing 5.3
|
|
||||||
|
|
||||||
query = "https://api.nbp.pl/api/exchangerates/rates/a/usd/" *
|
|
||||||
"2020-06-01/?format=json"
|
|
||||||
try
|
|
||||||
response = HTTP.get(query)
|
|
||||||
json = JSON3.read(response.body)
|
|
||||||
only(json.rates).mid
|
|
||||||
catch e
|
|
||||||
if e isa HTTP.ExceptionRequest.StatusError
|
|
||||||
missing
|
|
||||||
else
|
|
||||||
rethrow(e)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
query = "https://api.nbp.pl/api/exchangerates/rates/a/usd/" *
|
|
||||||
"2020-06-06/?format=json"
|
|
||||||
try
|
|
||||||
response = HTTP.get(query)
|
|
||||||
json = JSON3.read(response.body)
|
|
||||||
only(json.rates).mid
|
|
||||||
catch e
|
|
||||||
if e isa HTTP.ExceptionRequest.StatusError
|
|
||||||
missing
|
|
||||||
else
|
|
||||||
rethrow(e)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Code for section 5.2
|
|
||||||
|
|
||||||
ismissing(missing)
|
|
||||||
ismissing(1)
|
|
||||||
|
|
||||||
1 + missing
|
|
||||||
sin(missing)
|
|
||||||
|
|
||||||
1 == missing
|
|
||||||
1 > missing
|
|
||||||
1 < missing
|
|
||||||
|
|
||||||
if missing
|
|
||||||
print("this is not printed")
|
|
||||||
end
|
|
||||||
missing && true
|
|
||||||
|
|
||||||
coalesce(missing, true)
|
|
||||||
coalesce(missing, false)
|
|
||||||
|
|
||||||
isequal(1, missing)
|
|
||||||
isequal(missing, missing)
|
|
||||||
isless(1, missing)
|
|
||||||
isless(missing, missing)
|
|
||||||
|
|
||||||
isless(Inf, missing)
|
|
||||||
|
|
||||||
a = [1]
|
|
||||||
b = [1]
|
|
||||||
isequal(a, b)
|
|
||||||
a === b
|
|
||||||
|
|
||||||
x = [1, missing, 3, 4, missing]
|
|
||||||
|
|
||||||
coalesce.(x, 0)
|
|
||||||
|
|
||||||
sum(x)
|
|
||||||
|
|
||||||
y = skipmissing(x)
|
|
||||||
|
|
||||||
sum(y)
|
|
||||||
|
|
||||||
sum(skipmissing(x))
|
|
||||||
|
|
||||||
fun(x::Int, y::Int) = x + y
|
|
||||||
fun(1, 2)
|
|
||||||
fun(1, missing)
|
|
||||||
|
|
||||||
using Missings
|
|
||||||
fun2 = passmissing(fun)
|
|
||||||
fun2(1, 2)
|
|
||||||
fun2(1, missing)
|
|
||||||
|
|
||||||
# Code for section 5.3
|
|
||||||
|
|
||||||
using Dates
|
|
||||||
d = Date("2020-06-01")
|
|
||||||
|
|
||||||
typeof(d)
|
|
||||||
year(d)
|
|
||||||
month(d)
|
|
||||||
day(d)
|
|
||||||
|
|
||||||
dayofweek(d)
|
|
||||||
dayname(d)
|
|
||||||
|
|
||||||
Date(2020, 6, 1)
|
|
||||||
|
|
||||||
dates = Date.(2020, 6, 1:30)
|
|
||||||
|
|
||||||
Day(1)
|
|
||||||
|
|
||||||
d
|
|
||||||
d + Day(1)
|
|
||||||
|
|
||||||
Date(2020, 5, 20):Day(1):Date(2020, 7, 5)
|
|
||||||
|
|
||||||
collect(Date(2020, 5, 20):Day(1):Date(2020, 7, 5))
|
|
||||||
|
|
||||||
# Code for listing 5.6
|
|
||||||
|
|
||||||
function get_rate(date::Date)
|
|
||||||
query = "https://api.nbp.pl/api/exchangerates/rates/" *
|
|
||||||
"a/usd/$date/?format=json"
|
|
||||||
try
|
|
||||||
response = HTTP.get(query)
|
|
||||||
json = JSON3.read(response.body)
|
|
||||||
return only(json.rates).mid
|
|
||||||
catch e
|
|
||||||
if e isa HTTP.ExceptionRequest.StatusError
|
|
||||||
return missing
|
|
||||||
else
|
|
||||||
rethrow(e)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Code for showing how string interpolation works
|
|
||||||
|
|
||||||
"https://api.nbp.pl/api/exchangerates/rates/" *
|
|
||||||
"a/usd/$(dates[1])/?format=json"
|
|
||||||
|
|
||||||
"https://api.nbp.pl/api/exchangerates/rates/" *
|
|
||||||
"a/usd/$dates[1]/?format=json"
|
|
||||||
|
|
||||||
# Code for listing 5.7
|
|
||||||
|
|
||||||
rates = get_rate.(dates)
|
|
||||||
|
|
||||||
# Code for section 5.4
|
|
||||||
|
|
||||||
using Statistics
|
|
||||||
mean(rates)
|
|
||||||
std(rates)
|
|
||||||
|
|
||||||
mean(skipmissing(rates))
|
|
||||||
std(skipmissing(rates))
|
|
||||||
|
|
||||||
# Code for listing 5.8
|
|
||||||
|
|
||||||
using FreqTables
|
|
||||||
proptable(dayname.(dates), ismissing.(rates); margins=1)
|
|
||||||
|
|
||||||
# Code showing how to specify a complex condition using broadcasting
|
|
||||||
|
|
||||||
dayname.(dates) .== "Thursday" .&& ismissing.(rates)
|
|
||||||
|
|
||||||
# Code for listing 5.9
|
|
||||||
|
|
||||||
dates[dayname.(dates) .== "Thursday" .&& ismissing.(rates)]
|
|
||||||
|
|
||||||
# Codes for plotting exchange rate data
|
|
||||||
|
|
||||||
using Plots
|
|
||||||
plot(dates, rates; xlabel="day", ylabel="PLN/USD", legend=false)
|
|
||||||
|
|
||||||
rates_ok = .!ismissing.(rates)
|
|
||||||
|
|
||||||
plot(dates[rates_ok], rates[rates_ok];
|
|
||||||
xlabel="day", ylabel="PLN/USD", legend=false)
|
|
||||||
|
|
||||||
using Impute
|
|
||||||
rates_filled = Impute.interp(rates)
|
|
||||||
|
|
||||||
scatter!(dates, rates_filled)
|
|
||||||
370
ch06.jl
370
ch06.jl
@@ -1,248 +1,224 @@
|
|||||||
# Bogumił Kamiński, 2022
|
# Bogumił Kamiński, 2022
|
||||||
|
|
||||||
# Codes for chapter 6
|
# Codes for chapter 4
|
||||||
|
|
||||||
# Code for section 6.1
|
# Code for listing 4.1
|
||||||
|
|
||||||
if isfile("puzzles.csv.bz2")
|
import Downloads
|
||||||
@info "file already present"
|
Downloads.download("https://raw.githubusercontent.com/" *
|
||||||
else
|
"sidooms/MovieTweetings/" *
|
||||||
@info "fetching file"
|
"44c525d0c766944910686c60697203cda39305d6/" *
|
||||||
download("https://database.lichess.org/" *
|
"snapshots/10K/movies.dat",
|
||||||
"lichess_db_puzzle.csv.bz2",
|
"movies.dat")
|
||||||
"puzzles.csv.bz2")
|
|
||||||
|
# Code for string interpolation examples
|
||||||
|
|
||||||
|
x = 10
|
||||||
|
"I have $x apples"
|
||||||
|
|
||||||
|
"I have \$100."
|
||||||
|
"I have $100."
|
||||||
|
|
||||||
|
# Code for multiline strings
|
||||||
|
|
||||||
|
Downloads.download("https://raw.githubusercontent.com/\
|
||||||
|
sidooms/MovieTweetings/\
|
||||||
|
44c525d0c766944910686c60697203cda39305d6/\
|
||||||
|
snapshots/10K/movies.dat",
|
||||||
|
"movies.dat")
|
||||||
|
|
||||||
|
"a\
|
||||||
|
b\
|
||||||
|
c"
|
||||||
|
|
||||||
|
# Code for raw strings
|
||||||
|
|
||||||
|
"C:\my_folder\my_file.txt"
|
||||||
|
|
||||||
|
raw"C:\my_folder\my_file.txt"
|
||||||
|
|
||||||
|
# Code for listing 4.2
|
||||||
|
|
||||||
|
movies = readlines("movies.dat")
|
||||||
|
|
||||||
|
# Code for section 4.2
|
||||||
|
|
||||||
|
movie1 = first(movies)
|
||||||
|
|
||||||
|
movie1_parts = split(movie1, "::")
|
||||||
|
|
||||||
|
supertype(String)
|
||||||
|
supertype(SubString{String})
|
||||||
|
|
||||||
|
# Code for section 4.3
|
||||||
|
|
||||||
|
movie1_parts[2]
|
||||||
|
|
||||||
|
rx = r"(.*) \((\d{4})\)$"
|
||||||
|
|
||||||
|
m = match(rx, movie1_parts[2])
|
||||||
|
|
||||||
|
m[1]
|
||||||
|
m[2]
|
||||||
|
|
||||||
|
parse(Int, m[2])
|
||||||
|
|
||||||
|
# Code for listing 4.3
|
||||||
|
|
||||||
|
function parseline(line::String)
|
||||||
|
parts = split(line, "::")
|
||||||
|
m = match(r"(.*) \((\d{4})\)", parts[2])
|
||||||
|
return (id=parts[1],
|
||||||
|
name=m[1],
|
||||||
|
year=parse(Int, m[2]),
|
||||||
|
genres=split(parts[3], "|"))
|
||||||
end
|
end
|
||||||
|
|
||||||
using CodecBzip2
|
# Code for parsing one line of movies data
|
||||||
compressed = read("puzzles.csv.bz2")
|
|
||||||
plain = transcode(Bzip2Decompressor, compressed)
|
|
||||||
|
|
||||||
open("puzzles.csv", "w") do io
|
record1 = parseline(movie1)
|
||||||
println(io, "PuzzleId,FEN,Moves,Rating,RatingDeviation," *
|
|
||||||
"Popularity,NbPlays,Themes,GameUrl")
|
# Code for listing 4.4
|
||||||
write(io, plain)
|
|
||||||
|
codeunits("a")
|
||||||
|
codeunits("ε")
|
||||||
|
codeunits("∀")
|
||||||
|
|
||||||
|
# Codes for different patterns of string subsetting
|
||||||
|
|
||||||
|
word = first(record1.name, 8)
|
||||||
|
|
||||||
|
record1.name[1:8]
|
||||||
|
|
||||||
|
for i in eachindex(word)
|
||||||
|
println(i, ": ", word[i])
|
||||||
end
|
end
|
||||||
|
|
||||||
readlines("puzzles.csv")
|
codeunits("ô")
|
||||||
|
|
||||||
# Code for section 6.2
|
codeunits("Fantômas")
|
||||||
|
|
||||||
using CSV
|
isascii("Hello world!")
|
||||||
using DataFrames
|
isascii("∀ x: x≥0")
|
||||||
puzzles = CSV.read("puzzles.csv", DataFrame);
|
|
||||||
|
|
||||||
CSV.read(plain, DataFrame);
|
word[1]
|
||||||
|
word[5]
|
||||||
|
|
||||||
compressed = nothing
|
# Code for section 4.5
|
||||||
plain = nothing
|
|
||||||
|
|
||||||
# Code for listing 6.1
|
records = parseline.(movies)
|
||||||
|
|
||||||
puzzles
|
genres = String[]
|
||||||
|
for record in records
|
||||||
|
append!(genres, record.genres)
|
||||||
|
end
|
||||||
|
genres
|
||||||
|
|
||||||
# Code for listing 6.2
|
using FreqTables
|
||||||
|
table = freqtable(genres)
|
||||||
|
sort!(table)
|
||||||
|
|
||||||
describe(puzzles)
|
years = [record.year for record in records]
|
||||||
|
has_drama = ["Drama" in record.genres for record in records]
|
||||||
|
drama_prop = proptable(years, has_drama; margins=1)
|
||||||
|
|
||||||
# Code for getting basic information about a data frame
|
# Code for listing 4.5
|
||||||
|
|
||||||
ncol(puzzles)
|
|
||||||
|
|
||||||
nrow(puzzles)
|
|
||||||
|
|
||||||
names(puzzles)
|
|
||||||
|
|
||||||
# Code for section 6.3
|
|
||||||
|
|
||||||
puzzles.Rating
|
|
||||||
|
|
||||||
using BenchmarkTools
|
|
||||||
@benchmark $puzzles.Rating
|
|
||||||
|
|
||||||
puzzles.Rating == copy(puzzles.Rating)
|
|
||||||
|
|
||||||
puzzles.Rating === copy(puzzles.Rating)
|
|
||||||
|
|
||||||
puzzles.Rating === puzzles.Rating
|
|
||||||
|
|
||||||
copy(puzzles.Rating) === copy(puzzles.Rating)
|
|
||||||
|
|
||||||
puzzles."Rating"
|
|
||||||
|
|
||||||
col = "Rating"
|
|
||||||
|
|
||||||
data_frame_name[selected_rows, selected_columns]
|
|
||||||
|
|
||||||
puzzles[:, "Rating"]
|
|
||||||
puzzles[:, :Rating]
|
|
||||||
puzzles[:, 4]
|
|
||||||
puzzles[:, col]
|
|
||||||
|
|
||||||
columnindex(puzzles, "Rating")
|
|
||||||
|
|
||||||
columnindex(puzzles, "Some fancy column name")
|
|
||||||
|
|
||||||
hasproperty(puzzles, "Rating")
|
|
||||||
hasproperty(puzzles, "Some fancy column name")
|
|
||||||
|
|
||||||
@benchmark $puzzles[:, :Rating]
|
|
||||||
|
|
||||||
puzzles[!, "Rating"]
|
|
||||||
puzzles[!, :Rating]
|
|
||||||
puzzles[!, 4]
|
|
||||||
puzzles[!, col]
|
|
||||||
|
|
||||||
using Plots
|
using Plots
|
||||||
plot(histogram(puzzles.Rating, label="Rating"),
|
|
||||||
histogram(puzzles.RatingDeviation, label="RatingDeviation"),
|
|
||||||
histogram(puzzles.Popularity, label="Popularity"),
|
|
||||||
histogram(puzzles.NbPlays, label="NbPlays"))
|
|
||||||
|
|
||||||
plot([histogram(puzzles[!, col]; label=col) for
|
plot(names(drama_prop, 1), drama_prop[:, 2]; legend=false,
|
||||||
col in ["Rating", "RatingDeviation",
|
xlabel="year", ylabel="Drama probability")
|
||||||
"Popularity", "NbPlays"]]...)
|
|
||||||
|
|
||||||
# Code for section 6.4
|
# Code for section 4.6.1
|
||||||
|
|
||||||
using Statistics
|
s1 = Symbol("x")
|
||||||
plays_lo = median(puzzles.NbPlays)
|
s2 = Symbol("hello world!")
|
||||||
puzzles.NbPlays .> plays_lo
|
s3 = Symbol("x", 1)
|
||||||
|
|
||||||
puzzles.NbPlays > plays_lo
|
typeof(s1)
|
||||||
|
typeof(s2)
|
||||||
|
typeof(s3)
|
||||||
|
|
||||||
rating_lo = 1500
|
Symbol("1")
|
||||||
rating_hi = quantile(puzzles.Rating, 0.99)
|
|
||||||
rating_lo .< puzzles.Rating .< rating_hi
|
|
||||||
|
|
||||||
row_selector = (puzzles.NbPlays .> plays_lo) .&&
|
:x
|
||||||
(rating_lo .< puzzles.Rating .< rating_hi)
|
:x1
|
||||||
|
|
||||||
sum(row_selector)
|
:hello world
|
||||||
count(row_selector)
|
:1
|
||||||
|
|
||||||
# Code for listing 6.3
|
# Code for section 4.6.2
|
||||||
|
|
||||||
good = puzzles[row_selector, ["Rating", "Popularity"]]
|
supertype(Symbol)
|
||||||
|
|
||||||
# Code for plotting histograms
|
:x == :x
|
||||||
|
:x == :y
|
||||||
|
|
||||||
plot(histogram(good.Rating; label="Rating"),
|
# Code for listing 4.6
|
||||||
histogram(good.Popularity; label="Popularity"))
|
|
||||||
|
|
||||||
# Code for column selectors
|
using BenchmarkTools
|
||||||
|
str = string.("x", 1:10^6)
|
||||||
|
symb = Symbol.(str)
|
||||||
|
@benchmark "x" in $str
|
||||||
|
@benchmark :x in $symb
|
||||||
|
|
||||||
puzzles[1, "Rating"]
|
# Code for section 4.7
|
||||||
|
|
||||||
puzzles[:, "Rating"]
|
using InlineStrings
|
||||||
|
s1 = InlineString("x")
|
||||||
|
typeof(s1)
|
||||||
|
s2 = InlineString("∀")
|
||||||
|
typeof(s2)
|
||||||
|
sv = inlinestrings(["The", "quick", "brown", "fox", "jumps",
|
||||||
|
"over", "the", "lazy", "dog"])
|
||||||
|
|
||||||
row1 = puzzles[1, ["Rating", "Popularity"]]
|
# Code for listing 4.7
|
||||||
|
|
||||||
row1["Rating"]
|
using Random
|
||||||
row1[:Rating]
|
using BenchmarkTools
|
||||||
row1[1]
|
Random.seed!(1234);
|
||||||
row1.Rating
|
s1 = [randstring(3) for i in 1:10^6]
|
||||||
row1."Rating"
|
s2 = inlinestrings(s1)
|
||||||
|
|
||||||
good = puzzles[row_selector, ["Rating", "Popularity"]]
|
# Code for analyzing properties of InlineStrings.jl
|
||||||
|
|
||||||
good[1, "Rating"]
|
Base.summarysize(s1)
|
||||||
good[1, :]
|
Base.summarysize(s2)
|
||||||
good[:, "Rating"]
|
|
||||||
good[:, :]
|
|
||||||
|
|
||||||
names(puzzles, ["Rating", "Popularity"])
|
@benchmark sort($s1)
|
||||||
names(puzzles, [:Rating, :Popularity])
|
@benchmark sort($s2)
|
||||||
names(puzzles, [4, 6])
|
|
||||||
names(puzzles, [false, false, false, true, false, true, false, false, false])
|
|
||||||
names(puzzles, r"Rating")
|
|
||||||
names(puzzles, Not([4, 6]))
|
|
||||||
names(puzzles, Not(r"Rating"))
|
|
||||||
names(puzzles, Between("Rating", "Popularity"))
|
|
||||||
names(puzzles, :)
|
|
||||||
names(puzzles, All())
|
|
||||||
names(puzzles, Cols(r"Rating", "NbPlays"))
|
|
||||||
names(puzzles, Cols(startswith("P")))
|
|
||||||
|
|
||||||
names(puzzles, startswith("P"))
|
# Code for listing 4.8
|
||||||
|
|
||||||
names(puzzles, Real)
|
open("iris.txt", "w") do io
|
||||||
|
for i in 1:10^6
|
||||||
names(puzzles, AbstractString)
|
println(io, "Iris setosa")
|
||||||
|
println(io, "Iris virginica")
|
||||||
puzzles[:, names(puzzles, Real)]
|
println(io, "Iris versicolor")
|
||||||
|
|
||||||
# Code for row subsetting
|
|
||||||
|
|
||||||
df1 = puzzles[:, ["Rating", "Popularity"]];
|
|
||||||
df2 = puzzles[!, ["Rating", "Popularity"]];
|
|
||||||
|
|
||||||
df1 == df2
|
|
||||||
df1 == puzzles
|
|
||||||
df2 == puzzles
|
|
||||||
|
|
||||||
df1.Rating === puzzles.Rating
|
|
||||||
df1.Popularity === puzzles.Popularity
|
|
||||||
df2.Rating === puzzles.Rating
|
|
||||||
df2.Popularity === puzzles.Popularity
|
|
||||||
|
|
||||||
@benchmark $puzzles[:, ["Rating", "Popularity"]]
|
|
||||||
@benchmark $puzzles[!, ["Rating", "Popularity"]]
|
|
||||||
|
|
||||||
puzzles[1, 1]
|
|
||||||
puzzles[[1], 1]
|
|
||||||
puzzles[1, [1]]
|
|
||||||
puzzles[[1], [1]]
|
|
||||||
|
|
||||||
# Code for making views
|
|
||||||
|
|
||||||
@view puzzles[1, 1]
|
|
||||||
|
|
||||||
@view puzzles[[1], 1]
|
|
||||||
|
|
||||||
@view puzzles[1, [1]]
|
|
||||||
|
|
||||||
@view puzzles[[1], [1]]
|
|
||||||
|
|
||||||
@btime $puzzles[$row_selector, ["Rating", "Popularity"]];
|
|
||||||
@btime @view $puzzles[$row_selector, ["Rating", "Popularity"]];
|
|
||||||
|
|
||||||
parentindices(@view puzzles[row_selector, ["Rating", "Popularity"]])
|
|
||||||
|
|
||||||
# Code for section 6.5
|
|
||||||
|
|
||||||
describe(good)
|
|
||||||
|
|
||||||
rating_mapping = Dict{Int, Vector{Int}}()
|
|
||||||
for (i, rating) in enumerate(good.Rating)
|
|
||||||
if haskey(rating_mapping, rating)
|
|
||||||
push!(rating_mapping[rating], i)
|
|
||||||
else
|
|
||||||
rating_mapping[rating] = [i]
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
rating_mapping
|
|
||||||
|
|
||||||
good[rating_mapping[2108], :]
|
# Code for section 4.8.2
|
||||||
|
|
||||||
unique(good[rating_mapping[2108], :].Rating)
|
uncompressed = readlines("iris.txt")
|
||||||
|
|
||||||
using Statistics
|
using PooledArrays
|
||||||
mean(good[rating_mapping[2108], "Popularity"])
|
compressed = PooledArray(uncompressed)
|
||||||
|
|
||||||
ratings = unique(good.Rating)
|
Base.summarysize(uncompressed)
|
||||||
|
Base.summarysize(compressed)
|
||||||
|
|
||||||
mean_popularities = map(ratings) do rating
|
# Code for section 4.8.3
|
||||||
indices = rating_mapping[rating]
|
|
||||||
popularities = good[indices, "Popularity"]
|
|
||||||
return mean(popularities)
|
|
||||||
end
|
|
||||||
|
|
||||||
scatter(ratings, mean_popularities;
|
compressed.invpool
|
||||||
xlabel="rating", ylabel="mean popularity", legend=false)
|
compressed.pool
|
||||||
|
|
||||||
import Loess
|
compressed[10]
|
||||||
model = Loess.loess(ratings, mean_popularities);
|
compressed.pool[compressed.refs[10]]
|
||||||
ratings_predict = float.(sort(ratings))
|
|
||||||
popularity_predict = Loess.predict(model, ratings_predict)
|
|
||||||
|
|
||||||
plot!(ratings_predict, popularity_predict; width=5, color="black")
|
Base.summarysize.(compressed.pool)
|
||||||
|
|
||||||
|
v1 = string.("x", 1:10^6)
|
||||||
|
v2 = PooledArray(v1)
|
||||||
|
Base.summarysize(v1)
|
||||||
|
Base.summarysize(v2)
|
||||||
|
|||||||
431
ch07.jl
431
ch07.jl
@@ -1,279 +1,214 @@
|
|||||||
# Bogumił Kamiński, 2022
|
# Bogumił Kamiński, 2022
|
||||||
|
|
||||||
# Codes for chapter 7
|
# Codes for chapter 5
|
||||||
|
|
||||||
# Code for section 7.1
|
# Code for listing 5.1
|
||||||
|
|
||||||
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
using HTTP
|
||||||
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
using JSON3
|
||||||
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
query = "https://api.nbp.pl/api/exchangerates/rates/a/usd/" *
|
||||||
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
"2020-06-01/?format=json"
|
||||||
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
response = HTTP.get(query)
|
||||||
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
json = JSON3.read(response.body)
|
||||||
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
|
||||||
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
|
||||||
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
|
||||||
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
|
||||||
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89];
|
|
||||||
|
|
||||||
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
# Code for the remainder of section 5.1.2
|
||||||
set2=(x=aq[:, 3], y=aq[:, 4]),
|
|
||||||
set3=(x=aq[:, 5], y=aq[:, 6]),
|
|
||||||
set4=(x=aq[:, 7], y=aq[:, 8]));
|
|
||||||
|
|
||||||
using DataFrames
|
response.body
|
||||||
|
|
||||||
# Code for listing 7.1
|
String(response.body)
|
||||||
|
|
||||||
aq1 = ataFrame(aq, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
response.body
|
||||||
DataFrame(aq, [:x1, :y1, :x2, :y2, :x3, :y3, :x4, :y4])
|
|
||||||
|
|
||||||
# Code for creating DataFrame with automatic column names
|
json.table
|
||||||
|
json.currency
|
||||||
|
json.code
|
||||||
|
json.rates
|
||||||
|
|
||||||
DataFrame(aq, :auto)
|
json.rates[1].mid
|
||||||
|
|
||||||
# Codes for creating DataFrame from vector of vectors
|
only(json.rates).mid
|
||||||
|
|
||||||
aq_vec = collect(eachcol(aq))
|
only([])
|
||||||
DataFrame(aq_vec, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
only([1, 2])
|
||||||
DataFrame(aq_vec, :auto)
|
|
||||||
|
|
||||||
# Codes for section 7.1.2
|
# Code for listing 5.2
|
||||||
|
|
||||||
data.set1.x
|
query = "https://api.nbp.pl/api/exchangerates/rates/a/usd/" *
|
||||||
|
"2020-06-06/?format=json"
|
||||||
|
response = HTTP.get(query)
|
||||||
|
|
||||||
DataFrame(x1=data.set1.x, y1=data.set1.y,
|
# Code for listing 5.3
|
||||||
x2=data.set2.x, y2=data.set2.y,
|
|
||||||
x3=data.set3.x, y3=data.set3.y,
|
|
||||||
x4=data.set4.x, y4=data.set4.y)
|
|
||||||
|
|
||||||
DataFrame(:x1 => data.set1.x, :y1 => data.set1.y,
|
query = "https://api.nbp.pl/api/exchangerates/rates/a/usd/" *
|
||||||
:x2 => data.set2.x, :y2 => data.set2.y,
|
"2020-06-01/?format=json"
|
||||||
:x3 => data.set3.x, :y3 => data.set3.y,
|
try
|
||||||
:x4 => data.set4.x, :y4 => data.set4.y)
|
response = HTTP.get(query)
|
||||||
|
json = JSON3.read(response.body)
|
||||||
DataFrame([:x1 => data.set1.x, :y1 => data.set1.y,
|
only(json.rates).mid
|
||||||
:x2 => data.set2.x, :y2 => data.set2.y,
|
catch e
|
||||||
:x3 => data.set3.x, :y3 => data.set3.y,
|
if e isa HTTP.ExceptionRequest.StatusError
|
||||||
:x4 => data.set4.x, :y4 => data.set4.y]);
|
missing
|
||||||
|
else
|
||||||
[(i, v) for i in 1:4 for v in [:x, :y]]
|
rethrow(e)
|
||||||
|
end
|
||||||
[string(v, i) for i in 1:4 for v in [:x, :y]]
|
|
||||||
|
|
||||||
[string(v, i) => getproperty(data[i], v)
|
|
||||||
for i in 1:4 for v in [:x, :y]]
|
|
||||||
|
|
||||||
DataFrame([string(v, i) => getproperty(data[i], v)
|
|
||||||
for i in 1:4 for v in [:x, :y]]);
|
|
||||||
|
|
||||||
data_dict = Dict([string(v, i) => getproperty(data[i], v)
|
|
||||||
for i in 1:4 for v in [:x, :y]])
|
|
||||||
collect(data_dict)
|
|
||||||
|
|
||||||
DataFrame(data_dict)
|
|
||||||
|
|
||||||
df1 = DataFrame(x1=data.set1.x)
|
|
||||||
df1.x1 === data.set1.x
|
|
||||||
|
|
||||||
df2 = DataFrame(x1=data.set1.x; copycols=false)
|
|
||||||
df2.x1 === data.set1.x
|
|
||||||
|
|
||||||
df = DataFrame(x=1:3, y=1)
|
|
||||||
df.x
|
|
||||||
|
|
||||||
DataFrame(x=[1], y=[1, 2, 3])
|
|
||||||
|
|
||||||
# Codes for section 7.1.3
|
|
||||||
|
|
||||||
data.set1
|
|
||||||
DataFrame(data.set1)
|
|
||||||
|
|
||||||
DataFrame([(a=1, b=2), (a=3, b=4), (a=5, b=6)])
|
|
||||||
|
|
||||||
data
|
|
||||||
|
|
||||||
# Code for listing 7.2
|
|
||||||
|
|
||||||
aq2 = DataFrame(data)
|
|
||||||
|
|
||||||
# Codes for listing 7.3
|
|
||||||
|
|
||||||
data_dfs = map(DataFrame, data)
|
|
||||||
|
|
||||||
# Codes for vertical concatenation examples
|
|
||||||
|
|
||||||
vcat(data_dfs.set1, data_dfs.set2, data_dfs.set3, data_dfs.set4)
|
|
||||||
|
|
||||||
vcat(data_dfs.set1, data_dfs.set2, data_dfs.set3, data_dfs.set4;
|
|
||||||
source="source_id")
|
|
||||||
|
|
||||||
vcat(data_dfs.set1, data_dfs.set2, data_dfs.set3, data_dfs.set4;
|
|
||||||
source="source_id"=>string.("set", 1:4))
|
|
||||||
|
|
||||||
reduce(vcat, collect(data_dfs);
|
|
||||||
source="source_id"=>string.("set", 1:4))
|
|
||||||
|
|
||||||
# Code for listing 7.4
|
|
||||||
|
|
||||||
df1 = DataFrame(a=1:3, b=11:13)
|
|
||||||
df2 = DataFrame(a=4:6, c=24:26)
|
|
||||||
vcat(df1, df2)
|
|
||||||
vcat(df1, df2; cols=:union)
|
|
||||||
|
|
||||||
# Code for listing 7.5
|
|
||||||
|
|
||||||
df_agg = DataFrame()
|
|
||||||
append!(df_agg, data_dfs.set1)
|
|
||||||
append!(df_agg, data_dfs.set2)
|
|
||||||
|
|
||||||
# Code for appending tables to a data frame
|
|
||||||
|
|
||||||
df_agg = DataFrame()
|
|
||||||
append!(df_agg, data.set1)
|
|
||||||
append!(df_agg, data.set2)
|
|
||||||
|
|
||||||
# Code for promote keyword argument
|
|
||||||
|
|
||||||
df1 = DataFrame(a=1:3, b=11:13)
|
|
||||||
df2 = DataFrame(a=4:6, b=[14, missing, 16])
|
|
||||||
append!(df1, df2)
|
|
||||||
append!(df1, df2; promote=true)
|
|
||||||
|
|
||||||
# Code for section 7.2.3
|
|
||||||
|
|
||||||
df = DataFrame()
|
|
||||||
push!(df, (a=1, b=2))
|
|
||||||
push!(df, (a=3, b=4))
|
|
||||||
|
|
||||||
df = DataFrame(a=Int[], b=Int[])
|
|
||||||
push!(df, [1, 2])
|
|
||||||
push!(df, [3, 4])
|
|
||||||
|
|
||||||
function sim_step(current)
|
|
||||||
dx, dy = rand(((1,0), (-1,0), (0,1), (0,-1)))
|
|
||||||
return (x=current.x + dx, y=current.y + dy)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
using BenchmarkTools
|
query = "https://api.nbp.pl/api/exchangerates/rates/a/usd/" *
|
||||||
@btime rand(((1,0), (-1,0), (0,1), (0,-1)));
|
"2020-06-06/?format=json"
|
||||||
|
try
|
||||||
|
response = HTTP.get(query)
|
||||||
|
json = JSON3.read(response.body)
|
||||||
|
only(json.rates).mid
|
||||||
|
catch e
|
||||||
|
if e isa HTTP.ExceptionRequest.StatusError
|
||||||
|
missing
|
||||||
|
else
|
||||||
|
rethrow(e)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
dx, dy = (10, 20)
|
# Code for section 5.2
|
||||||
dx
|
|
||||||
dy
|
ismissing(missing)
|
||||||
|
ismissing(1)
|
||||||
|
|
||||||
|
1 + missing
|
||||||
|
sin(missing)
|
||||||
|
|
||||||
|
1 == missing
|
||||||
|
1 > missing
|
||||||
|
1 < missing
|
||||||
|
|
||||||
|
if missing
|
||||||
|
print("this is not printed")
|
||||||
|
end
|
||||||
|
missing && true
|
||||||
|
|
||||||
|
coalesce(missing, true)
|
||||||
|
coalesce(missing, false)
|
||||||
|
|
||||||
|
isequal(1, missing)
|
||||||
|
isequal(missing, missing)
|
||||||
|
isless(1, missing)
|
||||||
|
isless(missing, missing)
|
||||||
|
|
||||||
|
isless(Inf, missing)
|
||||||
|
|
||||||
|
a = [1]
|
||||||
|
b = [1]
|
||||||
|
isequal(a, b)
|
||||||
|
a === b
|
||||||
|
|
||||||
|
x = [1, missing, 3, 4, missing]
|
||||||
|
|
||||||
|
coalesce.(x, 0)
|
||||||
|
|
||||||
|
sum(x)
|
||||||
|
|
||||||
|
y = skipmissing(x)
|
||||||
|
|
||||||
|
sum(y)
|
||||||
|
|
||||||
|
sum(skipmissing(x))
|
||||||
|
|
||||||
|
fun(x::Int, y::Int) = x + y
|
||||||
|
fun(1, 2)
|
||||||
|
fun(1, missing)
|
||||||
|
|
||||||
|
using Missings
|
||||||
|
fun2 = passmissing(fun)
|
||||||
|
fun2(1, 2)
|
||||||
|
fun2(1, missing)
|
||||||
|
|
||||||
|
# Code for section 5.3
|
||||||
|
|
||||||
|
using Dates
|
||||||
|
d = Date("2020-06-01")
|
||||||
|
|
||||||
|
typeof(d)
|
||||||
|
year(d)
|
||||||
|
month(d)
|
||||||
|
day(d)
|
||||||
|
|
||||||
|
dayofweek(d)
|
||||||
|
dayname(d)
|
||||||
|
|
||||||
|
Date(2020, 6, 1)
|
||||||
|
|
||||||
|
dates = Date.(2020, 6, 1:30)
|
||||||
|
|
||||||
|
Day(1)
|
||||||
|
|
||||||
|
d
|
||||||
|
d + Day(1)
|
||||||
|
|
||||||
|
Date(2020, 5, 20):Day(1):Date(2020, 7, 5)
|
||||||
|
|
||||||
|
collect(Date(2020, 5, 20):Day(1):Date(2020, 7, 5))
|
||||||
|
|
||||||
|
# Code for listing 5.6
|
||||||
|
|
||||||
|
function get_rate(date::Date)
|
||||||
|
query = "https://api.nbp.pl/api/exchangerates/rates/" *
|
||||||
|
"a/usd/$date/?format=json"
|
||||||
|
try
|
||||||
|
response = HTTP.get(query)
|
||||||
|
json = JSON3.read(response.body)
|
||||||
|
return only(json.rates).mid
|
||||||
|
catch e
|
||||||
|
if e isa HTTP.ExceptionRequest.StatusError
|
||||||
|
return missing
|
||||||
|
else
|
||||||
|
rethrow(e)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code for showing how string interpolation works
|
||||||
|
|
||||||
|
"https://api.nbp.pl/api/exchangerates/rates/" *
|
||||||
|
"a/usd/$(dates[1])/?format=json"
|
||||||
|
|
||||||
|
"https://api.nbp.pl/api/exchangerates/rates/" *
|
||||||
|
"a/usd/$dates[1]/?format=json"
|
||||||
|
|
||||||
|
# Code for listing 5.7
|
||||||
|
|
||||||
|
rates = get_rate.(dates)
|
||||||
|
|
||||||
|
# Code for section 5.4
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
mean(rates)
|
||||||
|
std(rates)
|
||||||
|
|
||||||
|
mean(skipmissing(rates))
|
||||||
|
std(skipmissing(rates))
|
||||||
|
|
||||||
|
# Code for listing 5.8
|
||||||
|
|
||||||
using FreqTables
|
using FreqTables
|
||||||
using Random
|
proptable(dayname.(dates), ismissing.(rates); margins=1)
|
||||||
Random.seed!(1234);
|
|
||||||
proptable([rand(((1,0), (-1,0), (0,1), (0,-1))) for _ in 1:10^7])
|
|
||||||
|
|
||||||
using Random
|
# Code showing how to specify a complex condition using broadcasting
|
||||||
Random.seed!(6);
|
|
||||||
walk = DataFrame(x=0, y=0)
|
|
||||||
for _ in 1:10
|
|
||||||
current = walk[end, :]
|
|
||||||
push!(walk, sim_step(current))
|
|
||||||
end
|
|
||||||
walk
|
|
||||||
|
|
||||||
plot(walk.x, walk.y;
|
dayname.(dates) .== "Thursday" .&& ismissing.(rates)
|
||||||
legend=false,
|
|
||||||
series_annotations=1:11,
|
|
||||||
xticks=range(extrema(walk.x)...),
|
|
||||||
yticks=range(extrema(walk.y)...))
|
|
||||||
|
|
||||||
extrema(walk.y)
|
# Code for listing 5.9
|
||||||
|
|
||||||
range(1, 5)
|
dates[dayname.(dates) .== "Thursday" .&& ismissing.(rates)]
|
||||||
|
|
||||||
(3/4)^9
|
# Codes for plotting exchange rate data
|
||||||
|
|
||||||
# Code for listing 7.6
|
using Plots
|
||||||
|
plot(dates, rates; xlabel="day", ylabel="PLN/USD", legend=false)
|
||||||
|
|
||||||
function walk_unique() #A
|
rates_ok = .!ismissing.(rates)
|
||||||
walk = DataFrame(x=0, y=0)
|
|
||||||
for _ in 1:10
|
|
||||||
current = walk[end, :]
|
|
||||||
push!(walk, sim_step(current))
|
|
||||||
end
|
|
||||||
return nrow(unique(walk)) == nrow(walk) #B
|
|
||||||
end
|
|
||||||
Random.seed!(2);
|
|
||||||
proptable([walk_unique() for _ in 1:10^5])
|
|
||||||
|
|
||||||
# Code for a note on conversion
|
plot(dates[rates_ok], rates[rates_ok];
|
||||||
|
xlabel="day", ylabel="PLN/USD", legend=false)
|
||||||
|
|
||||||
x = [1.5]
|
using Impute
|
||||||
x[1] = 1
|
rates_filled = Impute.interp(rates)
|
||||||
x
|
|
||||||
|
|
||||||
# Code from section 7.3.1
|
scatter!(dates, rates_filled)
|
||||||
|
|
||||||
Matrix(walk)
|
|
||||||
Matrix{Any}(walk)
|
|
||||||
Matrix{String}(walk)
|
|
||||||
|
|
||||||
plot(walk)
|
|
||||||
|
|
||||||
plot(Matrix(walk); labels=["x" "y"] , legend=:topleft)
|
|
||||||
|
|
||||||
# Code from section 7.3.2
|
|
||||||
|
|
||||||
Tables.columntable(walk)
|
|
||||||
|
|
||||||
using BenchmarkTools
|
|
||||||
function mysum(table)
|
|
||||||
s = 0 #A
|
|
||||||
for v in table.x #B
|
|
||||||
s += v
|
|
||||||
end
|
|
||||||
return s
|
|
||||||
end
|
|
||||||
df = DataFrame(x=1:1_000_000);
|
|
||||||
@btime mysum($df)
|
|
||||||
|
|
||||||
tab = Tables.columntable(df);
|
|
||||||
@btime mysum($tab)
|
|
||||||
|
|
||||||
@code_warntype mysum(df)
|
|
||||||
|
|
||||||
@code_warntype mysum(tab)
|
|
||||||
|
|
||||||
typeof(tab)
|
|
||||||
|
|
||||||
function barrier_mysum2(x)
|
|
||||||
s = 0
|
|
||||||
for v in x
|
|
||||||
s += v
|
|
||||||
end
|
|
||||||
return s
|
|
||||||
end
|
|
||||||
mysum2(table) = barrier_mysum2(table.x)
|
|
||||||
@btime mysum2($df)
|
|
||||||
|
|
||||||
df = DataFrame(a=[1, 1, 2], b=[1, 1, 2])
|
|
||||||
unique(df)
|
|
||||||
|
|
||||||
tab = Tables.columntable(df)
|
|
||||||
unique(tab)
|
|
||||||
|
|
||||||
# Code from section 7.3.3
|
|
||||||
|
|
||||||
Tables.rowtable(walk)
|
|
||||||
|
|
||||||
nti = Tables.namedtupleiterator(walk)
|
|
||||||
for v in nti
|
|
||||||
println(v)
|
|
||||||
end
|
|
||||||
|
|
||||||
er = eachrow(walk)
|
|
||||||
er[1]
|
|
||||||
er[end]
|
|
||||||
ec = eachcol(walk)
|
|
||||||
ec[1]
|
|
||||||
ec[end]
|
|
||||||
|
|
||||||
identity.(eachcol(walk))
|
|
||||||
|
|
||||||
df = DataFrame(x=1:2, b=["a", "b"])
|
|
||||||
identity.(eachcol(df))
|
|
||||||
|
|||||||
420
ch08.jl
420
ch08.jl
@@ -1,284 +1,248 @@
|
|||||||
# Bogumił Kamiński, 2022
|
# Bogumił Kamiński, 2022
|
||||||
|
|
||||||
# Codes for chapter 8
|
# Codes for chapter 6
|
||||||
|
|
||||||
# Codes for section 8.1
|
# Code for section 6.1
|
||||||
|
|
||||||
# Code for listing 8.1
|
if isfile("puzzles.csv.bz2")
|
||||||
|
@info "file already present"
|
||||||
import Downloads
|
else
|
||||||
using SHA
|
@info "fetching file"
|
||||||
git_zip = "git_web_ml.zip"
|
download("https://database.lichess.org/" *
|
||||||
if !isfile(git_zip)
|
"lichess_db_puzzle.csv.bz2",
|
||||||
Downloads.download("https://snap.stanford.edu/data/" *
|
"puzzles.csv.bz2")
|
||||||
"git_web_ml.zip",
|
|
||||||
git_zip)
|
|
||||||
end
|
|
||||||
isfile(git_zip)
|
|
||||||
open(sha256, git_zip) == [0x56, 0xc0, 0xc1, 0xc2,
|
|
||||||
0xc4, 0x60, 0xdc, 0x4c,
|
|
||||||
0x7b, 0xf8, 0x93, 0x57,
|
|
||||||
0xb1, 0xfe, 0xc0, 0x20,
|
|
||||||
0xf4, 0x5e, 0x2e, 0xce,
|
|
||||||
0xba, 0xb8, 0x1d, 0x13,
|
|
||||||
0x1d, 0x07, 0x3b, 0x10,
|
|
||||||
0xe2, 0x8e, 0xc0, 0x31]
|
|
||||||
|
|
||||||
# Code for opeining a zip archive
|
|
||||||
|
|
||||||
import ZipFile
|
|
||||||
git_archive = ZipFile.Reader(git_zip)
|
|
||||||
|
|
||||||
# Code for listing 8.2
|
|
||||||
|
|
||||||
function ingest_to_df(archive::ZipFile.Reader, filename::AbstractString)
|
|
||||||
idx = only(findall(x -> x.name == filename, archive.files))
|
|
||||||
return CSV.read(read(archive.files[idx]), DataFrame)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Code for working with zip archive
|
using CodecBzip2
|
||||||
|
compressed = read("puzzles.csv.bz2")
|
||||||
|
plain = transcode(Bzip2Decompressor, compressed)
|
||||||
|
|
||||||
git_archive.files
|
open("puzzles.csv", "w") do io
|
||||||
|
println(io, "PuzzleId,FEN,Moves,Rating,RatingDeviation," *
|
||||||
|
"Popularity,NbPlays,Themes,GameUrl")
|
||||||
|
write(io, plain)
|
||||||
|
end
|
||||||
|
|
||||||
git_archive.files[2].name
|
readlines("puzzles.csv")
|
||||||
|
|
||||||
findall(x -> x.name == "git_web_ml/musae_git_edges.csv", git_archive.files)
|
# Code for section 6.2
|
||||||
findall(x -> x.name == "", git_archive.files)
|
|
||||||
|
|
||||||
only(findall(x -> x.name == "git_web_ml/musae_git_edges.csv", git_archive.files))
|
|
||||||
only(findall(x -> x.name == "", git_archive.files))
|
|
||||||
|
|
||||||
# Code for listing 8.3
|
|
||||||
|
|
||||||
using CSV
|
using CSV
|
||||||
using DataFrames
|
using DataFrames
|
||||||
edges_df = ingest_to_df(git_archive, "git_web_ml/musae_git_edges.csv");
|
puzzles = CSV.read("puzzles.csv", DataFrame);
|
||||||
classes_df = ingest_to_df(git_archive, "git_web_ml/musae_git_target.csv");
|
|
||||||
close(git_archive)
|
|
||||||
summary(edges_df)
|
|
||||||
describe(edges_df, :min, :max, :mean, :nmissing, :eltype)
|
|
||||||
summary(classes_df)
|
|
||||||
describe(classes_df, :min, :max, :mean, :nmissing, :eltype)
|
|
||||||
|
|
||||||
# Code for updating data frame columns using broadcasting
|
CSV.read(plain, DataFrame);
|
||||||
|
|
||||||
edges_df .+= 1
|
compressed = nothing
|
||||||
classes_df.id .+= 1
|
plain = nothing
|
||||||
|
|
||||||
# Code for examples of data frame broadcasting
|
# Code for listing 6.1
|
||||||
|
|
||||||
df = DataFrame(a=1:3, b=[4, missing, 5])
|
puzzles
|
||||||
df .^ 2
|
|
||||||
coalesce.(df, 0)
|
|
||||||
df .+ [10, 11, 12]
|
|
||||||
|
|
||||||
# Code for checking the order of :id column in a data frame
|
# Code for listing 6.2
|
||||||
|
|
||||||
classes_df.id == axes(classes_df, 1)
|
describe(puzzles)
|
||||||
|
|
||||||
# Code for the difference between ! and : in broadcasting assignment
|
# Code for getting basic information about a data frame
|
||||||
|
|
||||||
df = DataFrame(a=1:3, b=1:3)
|
ncol(puzzles)
|
||||||
df[!, :a] .= "x"
|
|
||||||
df[:, :b] .= "x"
|
|
||||||
df
|
|
||||||
|
|
||||||
# Code for the difference between ! and : in assignment
|
nrow(puzzles)
|
||||||
|
|
||||||
df = DataFrame(a=1:3, b=1:3, c=1:3)
|
names(puzzles)
|
||||||
df[!, :a] = ["x", "y", "z"]
|
|
||||||
df[:, :b] = ["x", "y", "z"]
|
|
||||||
df[:, :c] = [11, 12, 13]
|
|
||||||
df
|
|
||||||
|
|
||||||
# Codes for section 8.2
|
# Code for section 6.3
|
||||||
|
|
||||||
# Code from listing 8.4
|
puzzles.Rating
|
||||||
|
|
||||||
using Graphs
|
using BenchmarkTools
|
||||||
gh = SimpleGraph(nrow(classes_df))
|
@benchmark $puzzles.Rating
|
||||||
for (from, to) in eachrow(edges_df)
|
|
||||||
add_edge!(gh, from, to)
|
|
||||||
end
|
|
||||||
gh
|
|
||||||
ne(gh)
|
|
||||||
nv(gh)
|
|
||||||
|
|
||||||
# Code for iterator destruction in iteration specification
|
puzzles.Rating == copy(puzzles.Rating)
|
||||||
|
|
||||||
mat = [1 2; 3 4; 5 6]
|
puzzles.Rating === copy(puzzles.Rating)
|
||||||
for (x1, x2) in eachrow(mat)
|
|
||||||
@show x1, x2
|
|
||||||
end
|
|
||||||
|
|
||||||
# Code for getting degrees of nodes in the graph
|
puzzles.Rating === puzzles.Rating
|
||||||
|
|
||||||
degree(gh)
|
copy(puzzles.Rating) === copy(puzzles.Rating)
|
||||||
|
|
||||||
# Code for adding a column to a data frame
|
puzzles."Rating"
|
||||||
|
|
||||||
classes_df.deg = degree(gh)
|
col = "Rating"
|
||||||
|
|
||||||
# Code for the difference between ! and : when adding a column
|
data_frame_name[selected_rows, selected_columns]
|
||||||
|
|
||||||
df = DataFrame()
|
puzzles[:, "Rating"]
|
||||||
x = [1, 2, 3]
|
puzzles[:, :Rating]
|
||||||
df[!, :x1] = x
|
puzzles[:, 4]
|
||||||
df[:, :x2] = x
|
puzzles[:, col]
|
||||||
df
|
|
||||||
df.x1 === x
|
|
||||||
df.x2 === x
|
|
||||||
df.x2 == x
|
|
||||||
|
|
||||||
# Code for creating a column using broadcasting
|
columnindex(puzzles, "Rating")
|
||||||
|
|
||||||
df.x3 .= 1
|
columnindex(puzzles, "Some fancy column name")
|
||||||
df
|
|
||||||
|
|
||||||
# Code for edge iterator of a graph
|
hasproperty(puzzles, "Rating")
|
||||||
|
hasproperty(puzzles, "Some fancy column name")
|
||||||
|
|
||||||
edges(gh)
|
@benchmark $puzzles[:, :Rating]
|
||||||
|
|
||||||
e1 = first(edges(gh))
|
puzzles[!, "Rating"]
|
||||||
dump(e1)
|
puzzles[!, :Rating]
|
||||||
e1.src
|
puzzles[!, 4]
|
||||||
e1.dst
|
puzzles[!, col]
|
||||||
|
|
||||||
# Code for listing 8.5
|
|
||||||
|
|
||||||
function deg_class(gh, class)
|
|
||||||
deg_ml = zeros(Int, length(class))
|
|
||||||
deg_web = zeros(Int, length(class))
|
|
||||||
for edge in edges(gh)
|
|
||||||
a, b = edge.src, edge.dst
|
|
||||||
if class[b] == 1
|
|
||||||
deg_ml[a] += 1
|
|
||||||
else
|
|
||||||
deg_web[a] += 1
|
|
||||||
end
|
|
||||||
if class[a] == 1
|
|
||||||
deg_ml[b] += 1
|
|
||||||
else
|
|
||||||
deg_web[b] += 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
return (deg_ml, deg_web)
|
|
||||||
end
|
|
||||||
|
|
||||||
# Code for computing machine learning and web neighbors for gh graph
|
|
||||||
|
|
||||||
classes_df.deg_ml, classes_df.deg_web =
|
|
||||||
deg_class(gh, classes_df.ml_target)
|
|
||||||
|
|
||||||
# Code for checking type stability of deg_class function
|
|
||||||
|
|
||||||
@time deg_class(gh, classes_df.ml_target);
|
|
||||||
@code_warntype deg_class(gh, classes_df.ml_target)
|
|
||||||
|
|
||||||
# Code for checking the classes_df summary statistics
|
|
||||||
|
|
||||||
describe(classes_df, :min, :max, :mean, :std)
|
|
||||||
|
|
||||||
# Code for average degree of node in the graph
|
|
||||||
|
|
||||||
2 * ne(gh) / nv(gh)
|
|
||||||
|
|
||||||
# Code for checking correctness of computations
|
|
||||||
|
|
||||||
classes_df.deg_ml + classes_df.deg_web == classes_df.deg
|
|
||||||
|
|
||||||
# Code for showing that DataFrames.jl checks consistency of stored objects
|
|
||||||
|
|
||||||
df = DataFrame(a=1, b=11)
|
|
||||||
push!(df.a, 2)
|
|
||||||
df
|
|
||||||
|
|
||||||
# Codes for section 8.3
|
|
||||||
|
|
||||||
# Code for computing groupwise means of columns
|
|
||||||
|
|
||||||
using Statistics
|
|
||||||
for type in [0, 1], col in ["deg_ml", "deg_web"]
|
|
||||||
println((type, col, mean(classes_df[classes_df.ml_target .== type, col])))
|
|
||||||
end
|
|
||||||
|
|
||||||
gdf = groupby(classes_df, :ml_target)
|
|
||||||
combine(gdf,
|
|
||||||
:deg_ml => mean => :mean_deg_ml,
|
|
||||||
:deg_web => mean => :mean_deg_web)
|
|
||||||
|
|
||||||
using DataFramesMeta
|
|
||||||
@combine(gdf,
|
|
||||||
:mean_deg_ml = mean(:deg_ml),
|
|
||||||
:mean_deg_web = mean(:deg_web))
|
|
||||||
|
|
||||||
# Code for simple plotting of relationship between developer degree and type
|
|
||||||
|
|
||||||
using Plots
|
using Plots
|
||||||
scatter(classes_df.deg_ml, classes_df.deg_web;
|
plot(histogram(puzzles.Rating, label="Rating"),
|
||||||
color=[x == 1 ? "black" : "gray" for x in classes_df.ml_target],
|
histogram(puzzles.RatingDeviation, label="RatingDeviation"),
|
||||||
xlabel="degree ml", ylabel="degree web", labels=false)
|
histogram(puzzles.Popularity, label="Popularity"),
|
||||||
|
histogram(puzzles.NbPlays, label="NbPlays"))
|
||||||
|
|
||||||
# Code for aggregation of degree data
|
plot([histogram(puzzles[!, col]; label=col) for
|
||||||
|
col in ["Rating", "RatingDeviation",
|
||||||
|
"Popularity", "NbPlays"]]...)
|
||||||
|
|
||||||
agg_df = combine(groupby(classes_df, [:deg_ml, :deg_web]),
|
# Code for section 6.4
|
||||||
:ml_target => (x -> 1 - mean(x)) => :web_mean)
|
|
||||||
|
|
||||||
# Code for comparison how Julia parses expressions
|
using Statistics
|
||||||
|
plays_lo = median(puzzles.NbPlays)
|
||||||
|
puzzles.NbPlays .> plays_lo
|
||||||
|
|
||||||
:ml_target => (x -> 1 - mean(x)) => :web_mean
|
puzzles.NbPlays > plays_lo
|
||||||
:ml_target => x -> 1 - mean(x) => :web_mean
|
|
||||||
|
|
||||||
# Code for aggregation using DataFramesMeta.jl
|
rating_lo = 1500
|
||||||
|
rating_hi = quantile(puzzles.Rating, 0.99)
|
||||||
|
rating_lo .< puzzles.Rating .< rating_hi
|
||||||
|
|
||||||
@combine(groupby(classes_df, [:deg_ml, :deg_web]),
|
row_selector = (puzzles.NbPlays .> plays_lo) .&&
|
||||||
:web_mean = 1 - mean(:ml_target))
|
(rating_lo .< puzzles.Rating .< rating_hi)
|
||||||
|
|
||||||
# Code for getting summary information about the aggregated data frame
|
sum(row_selector)
|
||||||
|
count(row_selector)
|
||||||
|
|
||||||
describe(agg_df)
|
# Code for listing 6.3
|
||||||
|
|
||||||
# Code for log1p function
|
good = puzzles[row_selector, ["Rating", "Popularity"]]
|
||||||
|
|
||||||
log1p(0)
|
# Code for plotting histograms
|
||||||
|
|
||||||
# Code for listing 8.6
|
plot(histogram(good.Rating; label="Rating"),
|
||||||
|
histogram(good.Popularity; label="Popularity"))
|
||||||
|
|
||||||
function gen_ticks(maxv)
|
# Code for column selectors
|
||||||
max2 = round(Int, log2(maxv))
|
|
||||||
tick = [0; 2 .^ (0:max2)]
|
puzzles[1, "Rating"]
|
||||||
return (log1p.(tick), tick)
|
|
||||||
|
puzzles[:, "Rating"]
|
||||||
|
|
||||||
|
row1 = puzzles[1, ["Rating", "Popularity"]]
|
||||||
|
|
||||||
|
row1["Rating"]
|
||||||
|
row1[:Rating]
|
||||||
|
row1[1]
|
||||||
|
row1.Rating
|
||||||
|
row1."Rating"
|
||||||
|
|
||||||
|
good = puzzles[row_selector, ["Rating", "Popularity"]]
|
||||||
|
|
||||||
|
good[1, "Rating"]
|
||||||
|
good[1, :]
|
||||||
|
good[:, "Rating"]
|
||||||
|
good[:, :]
|
||||||
|
|
||||||
|
names(puzzles, ["Rating", "Popularity"])
|
||||||
|
names(puzzles, [:Rating, :Popularity])
|
||||||
|
names(puzzles, [4, 6])
|
||||||
|
names(puzzles, [false, false, false, true, false, true, false, false, false])
|
||||||
|
names(puzzles, r"Rating")
|
||||||
|
names(puzzles, Not([4, 6]))
|
||||||
|
names(puzzles, Not(r"Rating"))
|
||||||
|
names(puzzles, Between("Rating", "Popularity"))
|
||||||
|
names(puzzles, :)
|
||||||
|
names(puzzles, All())
|
||||||
|
names(puzzles, Cols(r"Rating", "NbPlays"))
|
||||||
|
names(puzzles, Cols(startswith("P")))
|
||||||
|
|
||||||
|
names(puzzles, startswith("P"))
|
||||||
|
|
||||||
|
names(puzzles, Real)
|
||||||
|
|
||||||
|
names(puzzles, AbstractString)
|
||||||
|
|
||||||
|
puzzles[:, names(puzzles, Real)]
|
||||||
|
|
||||||
|
# Code for row subsetting
|
||||||
|
|
||||||
|
df1 = puzzles[:, ["Rating", "Popularity"]];
|
||||||
|
df2 = puzzles[!, ["Rating", "Popularity"]];
|
||||||
|
|
||||||
|
df1 == df2
|
||||||
|
df1 == puzzles
|
||||||
|
df2 == puzzles
|
||||||
|
|
||||||
|
df1.Rating === puzzles.Rating
|
||||||
|
df1.Popularity === puzzles.Popularity
|
||||||
|
df2.Rating === puzzles.Rating
|
||||||
|
df2.Popularity === puzzles.Popularity
|
||||||
|
|
||||||
|
@benchmark $puzzles[:, ["Rating", "Popularity"]]
|
||||||
|
@benchmark $puzzles[!, ["Rating", "Popularity"]]
|
||||||
|
|
||||||
|
puzzles[1, 1]
|
||||||
|
puzzles[[1], 1]
|
||||||
|
puzzles[1, [1]]
|
||||||
|
puzzles[[1], [1]]
|
||||||
|
|
||||||
|
# Code for making views
|
||||||
|
|
||||||
|
@view puzzles[1, 1]
|
||||||
|
|
||||||
|
@view puzzles[[1], 1]
|
||||||
|
|
||||||
|
@view puzzles[1, [1]]
|
||||||
|
|
||||||
|
@view puzzles[[1], [1]]
|
||||||
|
|
||||||
|
@btime $puzzles[$row_selector, ["Rating", "Popularity"]];
|
||||||
|
@btime @view $puzzles[$row_selector, ["Rating", "Popularity"]];
|
||||||
|
|
||||||
|
parentindices(@view puzzles[row_selector, ["Rating", "Popularity"]])
|
||||||
|
|
||||||
|
# Code for section 6.5
|
||||||
|
|
||||||
|
describe(good)
|
||||||
|
|
||||||
|
rating_mapping = Dict{Int, Vector{Int}}()
|
||||||
|
for (i, rating) in enumerate(good.Rating)
|
||||||
|
if haskey(rating_mapping, rating)
|
||||||
|
push!(rating_mapping[rating], i)
|
||||||
|
else
|
||||||
|
rating_mapping[rating] = [i]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
rating_mapping
|
||||||
|
|
||||||
|
good[rating_mapping[2108], :]
|
||||||
|
|
||||||
|
unique(good[rating_mapping[2108], :].Rating)
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
mean(good[rating_mapping[2108], "Popularity"])
|
||||||
|
|
||||||
|
ratings = unique(good.Rating)
|
||||||
|
|
||||||
|
mean_popularities = map(ratings) do rating
|
||||||
|
indices = rating_mapping[rating]
|
||||||
|
popularities = good[indices, "Popularity"]
|
||||||
|
return mean(popularities)
|
||||||
end
|
end
|
||||||
|
|
||||||
log1pjitter(x) = log1p(x) - 0.05 + rand() / 10
|
scatter(ratings, mean_popularities;
|
||||||
|
xlabel="rating", ylabel="mean popularity", legend=false)
|
||||||
|
|
||||||
using Random
|
import Loess
|
||||||
Random.seed!(1234);
|
model = Loess.loess(ratings, mean_popularities);
|
||||||
scatter(log1pjitter.(agg_df.deg_ml),
|
ratings_predict = float.(sort(ratings))
|
||||||
log1pjitter.(agg_df.deg_web);
|
popularity_predict = Loess.predict(model, ratings_predict)
|
||||||
zcolor=agg_df.web_mean,
|
|
||||||
xlabel="degree ml", ylabel="degree web",
|
|
||||||
markersize=2, markerstrokewidth=0, markeralpha=0.8,
|
|
||||||
legend=:topleft, labels = "fraction web",
|
|
||||||
xticks=gen_ticks(maximum(classes_df.deg_ml)),
|
|
||||||
yticks=gen_ticks(maximum(classes_df.deg_web)))
|
|
||||||
|
|
||||||
# Code for fitting logistic regression model
|
plot!(ratings_predict, popularity_predict; width=5, color="black")
|
||||||
|
|
||||||
using GLM
|
|
||||||
glm(@formula(ml_target~log1p(deg_ml)+log1p(deg_web)), classes_df, Binomial(), LogitLink())
|
|
||||||
|
|
||||||
# Code for inspecting @formula result
|
|
||||||
|
|
||||||
@formula(ml_target~log1p(deg_ml)+log1p(deg_web))
|
|
||||||
|
|
||||||
# Code for inserting columns to a data frame
|
|
||||||
|
|
||||||
df = DataFrame(x=1:3)
|
|
||||||
insertcols!(df, :y => 4:6)
|
|
||||||
insertcols!(df, :y => 4:6)
|
|
||||||
insertcols!(df, :z => 1)
|
|
||||||
|
|
||||||
insertcols!(df, 1, :a => 0)
|
|
||||||
insertcols!(df, :x, :pre_x => 2)
|
|
||||||
insertcols!(df, :x, :post_x => 3, after=true)
|
|
||||||
|
|||||||
279
ch09.jl
Normal file
279
ch09.jl
Normal file
@@ -0,0 +1,279 @@
|
|||||||
|
# Bogumił Kamiński, 2022
|
||||||
|
|
||||||
|
# Codes for chapter 7
|
||||||
|
|
||||||
|
# Code for section 7.1
|
||||||
|
|
||||||
|
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||||
|
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||||
|
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||||
|
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||||
|
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||||
|
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||||
|
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||||
|
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||||
|
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||||
|
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||||
|
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89];
|
||||||
|
|
||||||
|
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
||||||
|
set2=(x=aq[:, 3], y=aq[:, 4]),
|
||||||
|
set3=(x=aq[:, 5], y=aq[:, 6]),
|
||||||
|
set4=(x=aq[:, 7], y=aq[:, 8]));
|
||||||
|
|
||||||
|
using DataFrames
|
||||||
|
|
||||||
|
# Code for listing 7.1
|
||||||
|
|
||||||
|
aq1 = ataFrame(aq, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
||||||
|
DataFrame(aq, [:x1, :y1, :x2, :y2, :x3, :y3, :x4, :y4])
|
||||||
|
|
||||||
|
# Code for creating DataFrame with automatic column names
|
||||||
|
|
||||||
|
DataFrame(aq, :auto)
|
||||||
|
|
||||||
|
# Codes for creating DataFrame from vector of vectors
|
||||||
|
|
||||||
|
aq_vec = collect(eachcol(aq))
|
||||||
|
DataFrame(aq_vec, ["x1", "y1", "x2", "y2", "x3", "y3", "x4", "y4"])
|
||||||
|
DataFrame(aq_vec, :auto)
|
||||||
|
|
||||||
|
# Codes for section 7.1.2
|
||||||
|
|
||||||
|
data.set1.x
|
||||||
|
|
||||||
|
DataFrame(x1=data.set1.x, y1=data.set1.y,
|
||||||
|
x2=data.set2.x, y2=data.set2.y,
|
||||||
|
x3=data.set3.x, y3=data.set3.y,
|
||||||
|
x4=data.set4.x, y4=data.set4.y)
|
||||||
|
|
||||||
|
DataFrame(:x1 => data.set1.x, :y1 => data.set1.y,
|
||||||
|
:x2 => data.set2.x, :y2 => data.set2.y,
|
||||||
|
:x3 => data.set3.x, :y3 => data.set3.y,
|
||||||
|
:x4 => data.set4.x, :y4 => data.set4.y)
|
||||||
|
|
||||||
|
DataFrame([:x1 => data.set1.x, :y1 => data.set1.y,
|
||||||
|
:x2 => data.set2.x, :y2 => data.set2.y,
|
||||||
|
:x3 => data.set3.x, :y3 => data.set3.y,
|
||||||
|
:x4 => data.set4.x, :y4 => data.set4.y]);
|
||||||
|
|
||||||
|
[(i, v) for i in 1:4 for v in [:x, :y]]
|
||||||
|
|
||||||
|
[string(v, i) for i in 1:4 for v in [:x, :y]]
|
||||||
|
|
||||||
|
[string(v, i) => getproperty(data[i], v)
|
||||||
|
for i in 1:4 for v in [:x, :y]]
|
||||||
|
|
||||||
|
DataFrame([string(v, i) => getproperty(data[i], v)
|
||||||
|
for i in 1:4 for v in [:x, :y]]);
|
||||||
|
|
||||||
|
data_dict = Dict([string(v, i) => getproperty(data[i], v)
|
||||||
|
for i in 1:4 for v in [:x, :y]])
|
||||||
|
collect(data_dict)
|
||||||
|
|
||||||
|
DataFrame(data_dict)
|
||||||
|
|
||||||
|
df1 = DataFrame(x1=data.set1.x)
|
||||||
|
df1.x1 === data.set1.x
|
||||||
|
|
||||||
|
df2 = DataFrame(x1=data.set1.x; copycols=false)
|
||||||
|
df2.x1 === data.set1.x
|
||||||
|
|
||||||
|
df = DataFrame(x=1:3, y=1)
|
||||||
|
df.x
|
||||||
|
|
||||||
|
DataFrame(x=[1], y=[1, 2, 3])
|
||||||
|
|
||||||
|
# Codes for section 7.1.3
|
||||||
|
|
||||||
|
data.set1
|
||||||
|
DataFrame(data.set1)
|
||||||
|
|
||||||
|
DataFrame([(a=1, b=2), (a=3, b=4), (a=5, b=6)])
|
||||||
|
|
||||||
|
data
|
||||||
|
|
||||||
|
# Code for listing 7.2
|
||||||
|
|
||||||
|
aq2 = DataFrame(data)
|
||||||
|
|
||||||
|
# Codes for listing 7.3
|
||||||
|
|
||||||
|
data_dfs = map(DataFrame, data)
|
||||||
|
|
||||||
|
# Codes for vertical concatenation examples
|
||||||
|
|
||||||
|
vcat(data_dfs.set1, data_dfs.set2, data_dfs.set3, data_dfs.set4)
|
||||||
|
|
||||||
|
vcat(data_dfs.set1, data_dfs.set2, data_dfs.set3, data_dfs.set4;
|
||||||
|
source="source_id")
|
||||||
|
|
||||||
|
vcat(data_dfs.set1, data_dfs.set2, data_dfs.set3, data_dfs.set4;
|
||||||
|
source="source_id"=>string.("set", 1:4))
|
||||||
|
|
||||||
|
reduce(vcat, collect(data_dfs);
|
||||||
|
source="source_id"=>string.("set", 1:4))
|
||||||
|
|
||||||
|
# Code for listing 7.4
|
||||||
|
|
||||||
|
df1 = DataFrame(a=1:3, b=11:13)
|
||||||
|
df2 = DataFrame(a=4:6, c=24:26)
|
||||||
|
vcat(df1, df2)
|
||||||
|
vcat(df1, df2; cols=:union)
|
||||||
|
|
||||||
|
# Code for listing 7.5
|
||||||
|
|
||||||
|
df_agg = DataFrame()
|
||||||
|
append!(df_agg, data_dfs.set1)
|
||||||
|
append!(df_agg, data_dfs.set2)
|
||||||
|
|
||||||
|
# Code for appending tables to a data frame
|
||||||
|
|
||||||
|
df_agg = DataFrame()
|
||||||
|
append!(df_agg, data.set1)
|
||||||
|
append!(df_agg, data.set2)
|
||||||
|
|
||||||
|
# Code for promote keyword argument
|
||||||
|
|
||||||
|
df1 = DataFrame(a=1:3, b=11:13)
|
||||||
|
df2 = DataFrame(a=4:6, b=[14, missing, 16])
|
||||||
|
append!(df1, df2)
|
||||||
|
append!(df1, df2; promote=true)
|
||||||
|
|
||||||
|
# Code for section 7.2.3
|
||||||
|
|
||||||
|
df = DataFrame()
|
||||||
|
push!(df, (a=1, b=2))
|
||||||
|
push!(df, (a=3, b=4))
|
||||||
|
|
||||||
|
df = DataFrame(a=Int[], b=Int[])
|
||||||
|
push!(df, [1, 2])
|
||||||
|
push!(df, [3, 4])
|
||||||
|
|
||||||
|
function sim_step(current)
|
||||||
|
dx, dy = rand(((1,0), (-1,0), (0,1), (0,-1)))
|
||||||
|
return (x=current.x + dx, y=current.y + dy)
|
||||||
|
end
|
||||||
|
|
||||||
|
using BenchmarkTools
|
||||||
|
@btime rand(((1,0), (-1,0), (0,1), (0,-1)));
|
||||||
|
|
||||||
|
dx, dy = (10, 20)
|
||||||
|
dx
|
||||||
|
dy
|
||||||
|
|
||||||
|
using FreqTables
|
||||||
|
using Random
|
||||||
|
Random.seed!(1234);
|
||||||
|
proptable([rand(((1,0), (-1,0), (0,1), (0,-1))) for _ in 1:10^7])
|
||||||
|
|
||||||
|
using Random
|
||||||
|
Random.seed!(6);
|
||||||
|
walk = DataFrame(x=0, y=0)
|
||||||
|
for _ in 1:10
|
||||||
|
current = walk[end, :]
|
||||||
|
push!(walk, sim_step(current))
|
||||||
|
end
|
||||||
|
walk
|
||||||
|
|
||||||
|
plot(walk.x, walk.y;
|
||||||
|
legend=false,
|
||||||
|
series_annotations=1:11,
|
||||||
|
xticks=range(extrema(walk.x)...),
|
||||||
|
yticks=range(extrema(walk.y)...))
|
||||||
|
|
||||||
|
extrema(walk.y)
|
||||||
|
|
||||||
|
range(1, 5)
|
||||||
|
|
||||||
|
(3/4)^9
|
||||||
|
|
||||||
|
# Code for listing 7.6
|
||||||
|
|
||||||
|
function walk_unique() #A
|
||||||
|
walk = DataFrame(x=0, y=0)
|
||||||
|
for _ in 1:10
|
||||||
|
current = walk[end, :]
|
||||||
|
push!(walk, sim_step(current))
|
||||||
|
end
|
||||||
|
return nrow(unique(walk)) == nrow(walk) #B
|
||||||
|
end
|
||||||
|
Random.seed!(2);
|
||||||
|
proptable([walk_unique() for _ in 1:10^5])
|
||||||
|
|
||||||
|
# Code for a note on conversion
|
||||||
|
|
||||||
|
x = [1.5]
|
||||||
|
x[1] = 1
|
||||||
|
x
|
||||||
|
|
||||||
|
# Code from section 7.3.1
|
||||||
|
|
||||||
|
Matrix(walk)
|
||||||
|
Matrix{Any}(walk)
|
||||||
|
Matrix{String}(walk)
|
||||||
|
|
||||||
|
plot(walk)
|
||||||
|
|
||||||
|
plot(Matrix(walk); labels=["x" "y"] , legend=:topleft)
|
||||||
|
|
||||||
|
# Code from section 7.3.2
|
||||||
|
|
||||||
|
Tables.columntable(walk)
|
||||||
|
|
||||||
|
using BenchmarkTools
|
||||||
|
function mysum(table)
|
||||||
|
s = 0 #A
|
||||||
|
for v in table.x #B
|
||||||
|
s += v
|
||||||
|
end
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
df = DataFrame(x=1:1_000_000);
|
||||||
|
@btime mysum($df)
|
||||||
|
|
||||||
|
tab = Tables.columntable(df);
|
||||||
|
@btime mysum($tab)
|
||||||
|
|
||||||
|
@code_warntype mysum(df)
|
||||||
|
|
||||||
|
@code_warntype mysum(tab)
|
||||||
|
|
||||||
|
typeof(tab)
|
||||||
|
|
||||||
|
function barrier_mysum2(x)
|
||||||
|
s = 0
|
||||||
|
for v in x
|
||||||
|
s += v
|
||||||
|
end
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
mysum2(table) = barrier_mysum2(table.x)
|
||||||
|
@btime mysum2($df)
|
||||||
|
|
||||||
|
df = DataFrame(a=[1, 1, 2], b=[1, 1, 2])
|
||||||
|
unique(df)
|
||||||
|
|
||||||
|
tab = Tables.columntable(df)
|
||||||
|
unique(tab)
|
||||||
|
|
||||||
|
# Code from section 7.3.3
|
||||||
|
|
||||||
|
Tables.rowtable(walk)
|
||||||
|
|
||||||
|
nti = Tables.namedtupleiterator(walk)
|
||||||
|
for v in nti
|
||||||
|
println(v)
|
||||||
|
end
|
||||||
|
|
||||||
|
er = eachrow(walk)
|
||||||
|
er[1]
|
||||||
|
er[end]
|
||||||
|
ec = eachcol(walk)
|
||||||
|
ec[1]
|
||||||
|
ec[end]
|
||||||
|
|
||||||
|
identity.(eachcol(walk))
|
||||||
|
|
||||||
|
df = DataFrame(x=1:2, b=["a", "b"])
|
||||||
|
identity.(eachcol(df))
|
||||||
284
ch10.jl
Normal file
284
ch10.jl
Normal file
@@ -0,0 +1,284 @@
|
|||||||
|
# Bogumił Kamiński, 2022
|
||||||
|
|
||||||
|
# Codes for chapter 8
|
||||||
|
|
||||||
|
# Codes for section 8.1
|
||||||
|
|
||||||
|
# Code for listing 8.1
|
||||||
|
|
||||||
|
import Downloads
|
||||||
|
using SHA
|
||||||
|
git_zip = "git_web_ml.zip"
|
||||||
|
if !isfile(git_zip)
|
||||||
|
Downloads.download("https://snap.stanford.edu/data/" *
|
||||||
|
"git_web_ml.zip",
|
||||||
|
git_zip)
|
||||||
|
end
|
||||||
|
isfile(git_zip)
|
||||||
|
open(sha256, git_zip) == [0x56, 0xc0, 0xc1, 0xc2,
|
||||||
|
0xc4, 0x60, 0xdc, 0x4c,
|
||||||
|
0x7b, 0xf8, 0x93, 0x57,
|
||||||
|
0xb1, 0xfe, 0xc0, 0x20,
|
||||||
|
0xf4, 0x5e, 0x2e, 0xce,
|
||||||
|
0xba, 0xb8, 0x1d, 0x13,
|
||||||
|
0x1d, 0x07, 0x3b, 0x10,
|
||||||
|
0xe2, 0x8e, 0xc0, 0x31]
|
||||||
|
|
||||||
|
# Code for opeining a zip archive
|
||||||
|
|
||||||
|
import ZipFile
|
||||||
|
git_archive = ZipFile.Reader(git_zip)
|
||||||
|
|
||||||
|
# Code for listing 8.2
|
||||||
|
|
||||||
|
function ingest_to_df(archive::ZipFile.Reader, filename::AbstractString)
|
||||||
|
idx = only(findall(x -> x.name == filename, archive.files))
|
||||||
|
return CSV.read(read(archive.files[idx]), DataFrame)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code for working with zip archive
|
||||||
|
|
||||||
|
git_archive.files
|
||||||
|
|
||||||
|
git_archive.files[2].name
|
||||||
|
|
||||||
|
findall(x -> x.name == "git_web_ml/musae_git_edges.csv", git_archive.files)
|
||||||
|
findall(x -> x.name == "", git_archive.files)
|
||||||
|
|
||||||
|
only(findall(x -> x.name == "git_web_ml/musae_git_edges.csv", git_archive.files))
|
||||||
|
only(findall(x -> x.name == "", git_archive.files))
|
||||||
|
|
||||||
|
# Code for listing 8.3
|
||||||
|
|
||||||
|
using CSV
|
||||||
|
using DataFrames
|
||||||
|
edges_df = ingest_to_df(git_archive, "git_web_ml/musae_git_edges.csv");
|
||||||
|
classes_df = ingest_to_df(git_archive, "git_web_ml/musae_git_target.csv");
|
||||||
|
close(git_archive)
|
||||||
|
summary(edges_df)
|
||||||
|
describe(edges_df, :min, :max, :mean, :nmissing, :eltype)
|
||||||
|
summary(classes_df)
|
||||||
|
describe(classes_df, :min, :max, :mean, :nmissing, :eltype)
|
||||||
|
|
||||||
|
# Code for updating data frame columns using broadcasting
|
||||||
|
|
||||||
|
edges_df .+= 1
|
||||||
|
classes_df.id .+= 1
|
||||||
|
|
||||||
|
# Code for examples of data frame broadcasting
|
||||||
|
|
||||||
|
df = DataFrame(a=1:3, b=[4, missing, 5])
|
||||||
|
df .^ 2
|
||||||
|
coalesce.(df, 0)
|
||||||
|
df .+ [10, 11, 12]
|
||||||
|
|
||||||
|
# Code for checking the order of :id column in a data frame
|
||||||
|
|
||||||
|
classes_df.id == axes(classes_df, 1)
|
||||||
|
|
||||||
|
# Code for the difference between ! and : in broadcasting assignment
|
||||||
|
|
||||||
|
df = DataFrame(a=1:3, b=1:3)
|
||||||
|
df[!, :a] .= "x"
|
||||||
|
df[:, :b] .= "x"
|
||||||
|
df
|
||||||
|
|
||||||
|
# Code for the difference between ! and : in assignment
|
||||||
|
|
||||||
|
df = DataFrame(a=1:3, b=1:3, c=1:3)
|
||||||
|
df[!, :a] = ["x", "y", "z"]
|
||||||
|
df[:, :b] = ["x", "y", "z"]
|
||||||
|
df[:, :c] = [11, 12, 13]
|
||||||
|
df
|
||||||
|
|
||||||
|
# Codes for section 8.2
|
||||||
|
|
||||||
|
# Code from listing 8.4
|
||||||
|
|
||||||
|
using Graphs
|
||||||
|
gh = SimpleGraph(nrow(classes_df))
|
||||||
|
for (from, to) in eachrow(edges_df)
|
||||||
|
add_edge!(gh, from, to)
|
||||||
|
end
|
||||||
|
gh
|
||||||
|
ne(gh)
|
||||||
|
nv(gh)
|
||||||
|
|
||||||
|
# Code for iterator destruction in iteration specification
|
||||||
|
|
||||||
|
mat = [1 2; 3 4; 5 6]
|
||||||
|
for (x1, x2) in eachrow(mat)
|
||||||
|
@show x1, x2
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code for getting degrees of nodes in the graph
|
||||||
|
|
||||||
|
degree(gh)
|
||||||
|
|
||||||
|
# Code for adding a column to a data frame
|
||||||
|
|
||||||
|
classes_df.deg = degree(gh)
|
||||||
|
|
||||||
|
# Code for the difference between ! and : when adding a column
|
||||||
|
|
||||||
|
df = DataFrame()
|
||||||
|
x = [1, 2, 3]
|
||||||
|
df[!, :x1] = x
|
||||||
|
df[:, :x2] = x
|
||||||
|
df
|
||||||
|
df.x1 === x
|
||||||
|
df.x2 === x
|
||||||
|
df.x2 == x
|
||||||
|
|
||||||
|
# Code for creating a column using broadcasting
|
||||||
|
|
||||||
|
df.x3 .= 1
|
||||||
|
df
|
||||||
|
|
||||||
|
# Code for edge iterator of a graph
|
||||||
|
|
||||||
|
edges(gh)
|
||||||
|
|
||||||
|
e1 = first(edges(gh))
|
||||||
|
dump(e1)
|
||||||
|
e1.src
|
||||||
|
e1.dst
|
||||||
|
|
||||||
|
# Code for listing 8.5
|
||||||
|
|
||||||
|
function deg_class(gh, class)
|
||||||
|
deg_ml = zeros(Int, length(class))
|
||||||
|
deg_web = zeros(Int, length(class))
|
||||||
|
for edge in edges(gh)
|
||||||
|
a, b = edge.src, edge.dst
|
||||||
|
if class[b] == 1
|
||||||
|
deg_ml[a] += 1
|
||||||
|
else
|
||||||
|
deg_web[a] += 1
|
||||||
|
end
|
||||||
|
if class[a] == 1
|
||||||
|
deg_ml[b] += 1
|
||||||
|
else
|
||||||
|
deg_web[b] += 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return (deg_ml, deg_web)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code for computing machine learning and web neighbors for gh graph
|
||||||
|
|
||||||
|
classes_df.deg_ml, classes_df.deg_web =
|
||||||
|
deg_class(gh, classes_df.ml_target)
|
||||||
|
|
||||||
|
# Code for checking type stability of deg_class function
|
||||||
|
|
||||||
|
@time deg_class(gh, classes_df.ml_target);
|
||||||
|
@code_warntype deg_class(gh, classes_df.ml_target)
|
||||||
|
|
||||||
|
# Code for checking the classes_df summary statistics
|
||||||
|
|
||||||
|
describe(classes_df, :min, :max, :mean, :std)
|
||||||
|
|
||||||
|
# Code for average degree of node in the graph
|
||||||
|
|
||||||
|
2 * ne(gh) / nv(gh)
|
||||||
|
|
||||||
|
# Code for checking correctness of computations
|
||||||
|
|
||||||
|
classes_df.deg_ml + classes_df.deg_web == classes_df.deg
|
||||||
|
|
||||||
|
# Code for showing that DataFrames.jl checks consistency of stored objects
|
||||||
|
|
||||||
|
df = DataFrame(a=1, b=11)
|
||||||
|
push!(df.a, 2)
|
||||||
|
df
|
||||||
|
|
||||||
|
# Codes for section 8.3
|
||||||
|
|
||||||
|
# Code for computing groupwise means of columns
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
for type in [0, 1], col in ["deg_ml", "deg_web"]
|
||||||
|
println((type, col, mean(classes_df[classes_df.ml_target .== type, col])))
|
||||||
|
end
|
||||||
|
|
||||||
|
gdf = groupby(classes_df, :ml_target)
|
||||||
|
combine(gdf,
|
||||||
|
:deg_ml => mean => :mean_deg_ml,
|
||||||
|
:deg_web => mean => :mean_deg_web)
|
||||||
|
|
||||||
|
using DataFramesMeta
|
||||||
|
@combine(gdf,
|
||||||
|
:mean_deg_ml = mean(:deg_ml),
|
||||||
|
:mean_deg_web = mean(:deg_web))
|
||||||
|
|
||||||
|
# Code for simple plotting of relationship between developer degree and type
|
||||||
|
|
||||||
|
using Plots
|
||||||
|
scatter(classes_df.deg_ml, classes_df.deg_web;
|
||||||
|
color=[x == 1 ? "black" : "gray" for x in classes_df.ml_target],
|
||||||
|
xlabel="degree ml", ylabel="degree web", labels=false)
|
||||||
|
|
||||||
|
# Code for aggregation of degree data
|
||||||
|
|
||||||
|
agg_df = combine(groupby(classes_df, [:deg_ml, :deg_web]),
|
||||||
|
:ml_target => (x -> 1 - mean(x)) => :web_mean)
|
||||||
|
|
||||||
|
# Code for comparison how Julia parses expressions
|
||||||
|
|
||||||
|
:ml_target => (x -> 1 - mean(x)) => :web_mean
|
||||||
|
:ml_target => x -> 1 - mean(x) => :web_mean
|
||||||
|
|
||||||
|
# Code for aggregation using DataFramesMeta.jl
|
||||||
|
|
||||||
|
@combine(groupby(classes_df, [:deg_ml, :deg_web]),
|
||||||
|
:web_mean = 1 - mean(:ml_target))
|
||||||
|
|
||||||
|
# Code for getting summary information about the aggregated data frame
|
||||||
|
|
||||||
|
describe(agg_df)
|
||||||
|
|
||||||
|
# Code for log1p function
|
||||||
|
|
||||||
|
log1p(0)
|
||||||
|
|
||||||
|
# Code for listing 8.6
|
||||||
|
|
||||||
|
function gen_ticks(maxv)
|
||||||
|
max2 = round(Int, log2(maxv))
|
||||||
|
tick = [0; 2 .^ (0:max2)]
|
||||||
|
return (log1p.(tick), tick)
|
||||||
|
end
|
||||||
|
|
||||||
|
log1pjitter(x) = log1p(x) - 0.05 + rand() / 10
|
||||||
|
|
||||||
|
using Random
|
||||||
|
Random.seed!(1234);
|
||||||
|
scatter(log1pjitter.(agg_df.deg_ml),
|
||||||
|
log1pjitter.(agg_df.deg_web);
|
||||||
|
zcolor=agg_df.web_mean,
|
||||||
|
xlabel="degree ml", ylabel="degree web",
|
||||||
|
markersize=2, markerstrokewidth=0, markeralpha=0.8,
|
||||||
|
legend=:topleft, labels = "fraction web",
|
||||||
|
xticks=gen_ticks(maximum(classes_df.deg_ml)),
|
||||||
|
yticks=gen_ticks(maximum(classes_df.deg_web)))
|
||||||
|
|
||||||
|
# Code for fitting logistic regression model
|
||||||
|
|
||||||
|
using GLM
|
||||||
|
glm(@formula(ml_target~log1p(deg_ml)+log1p(deg_web)), classes_df, Binomial(), LogitLink())
|
||||||
|
|
||||||
|
# Code for inspecting @formula result
|
||||||
|
|
||||||
|
@formula(ml_target~log1p(deg_ml)+log1p(deg_web))
|
||||||
|
|
||||||
|
# Code for inserting columns to a data frame
|
||||||
|
|
||||||
|
df = DataFrame(x=1:3)
|
||||||
|
insertcols!(df, :y => 4:6)
|
||||||
|
insertcols!(df, :y => 4:6)
|
||||||
|
insertcols!(df, :z => 1)
|
||||||
|
|
||||||
|
insertcols!(df, 1, :a => 0)
|
||||||
|
insertcols!(df, :x, :pre_x => 2)
|
||||||
|
insertcols!(df, :x, :post_x => 3, after=true)
|
||||||
18
chXXX_client.jl
Normal file
18
chXXX_client.jl
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
using HTTP
|
||||||
|
using JSON3
|
||||||
|
using DataFrames
|
||||||
|
using Plots
|
||||||
|
|
||||||
|
df = DataFrame(K=30:2:80, max_time=0.25)
|
||||||
|
df.data = map(df.K, df.max_time) do K, max_time
|
||||||
|
@show K
|
||||||
|
@time req = HTTP.request("POST", "http://127.0.0.1:8000",
|
||||||
|
["Content-Type" => "application/json"],
|
||||||
|
JSON3.write((;K, max_time)))
|
||||||
|
return JSON3.read(req.body)
|
||||||
|
end
|
||||||
|
|
||||||
|
@assert all(==("OK"), getproperty.(df.data, :status))
|
||||||
|
df2 = select(df, :K, :data => ByRow(x -> x.value) => AsTable)
|
||||||
|
plot(plot(df2.K, df2.mv; legend=false, xlabel="K", ylabel="expected value"),
|
||||||
|
plot(df2.K, df2.zero; legend=false, xlabel="K", ylabel="probability of zero"))
|
||||||
45
chXXX_server.jl
Normal file
45
chXXX_server.jl
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
using Genie
|
||||||
|
using Statistics
|
||||||
|
using ThreadsX
|
||||||
|
|
||||||
|
function v_asian_sample(T, X0, K, r, sd, m)::Float64
|
||||||
|
X = X0
|
||||||
|
sumX = X
|
||||||
|
d = T / m
|
||||||
|
for i in 1:m
|
||||||
|
X *= exp((r-sd^2/2)*d + sd*sqrt(d)*randn())
|
||||||
|
sumX += X
|
||||||
|
end
|
||||||
|
return exp(-r*T) * max(sumX / (m + 1) - K, 0)
|
||||||
|
end
|
||||||
|
|
||||||
|
function v_asian_value(T, X0, K, r, sd, m, max_time)
|
||||||
|
result = Float64[]
|
||||||
|
start_time = time()
|
||||||
|
while time() - start_time < max_time
|
||||||
|
append!(result, ThreadsX.map(_ -> v_asian_sample(T, X0, K, r, sd, m), 1:10_000))
|
||||||
|
end
|
||||||
|
n = length(result)
|
||||||
|
mv = mean(result)
|
||||||
|
sdv = std(result)
|
||||||
|
lo95 = mv - 1.96 * sdv / sqrt(n)
|
||||||
|
hi95 = mv + 1.96 * sdv / sqrt(n)
|
||||||
|
zero = mean(==(0), result)
|
||||||
|
return (; n, mv, sdv, lo95, hi95, zero)
|
||||||
|
end
|
||||||
|
|
||||||
|
Genie.config.run_as_server = true
|
||||||
|
|
||||||
|
Genie.Router.route("/", method=POST) do
|
||||||
|
message = Genie.Requests.jsonpayload()
|
||||||
|
return try
|
||||||
|
K = float(message["K"])
|
||||||
|
max_time = float(message["max_time"])
|
||||||
|
value = v_asian_value(1.0, 50.0, K, 0.05, 0.3, 200, max_time)
|
||||||
|
Genie.Renderer.Json.json((status="OK", value=value))
|
||||||
|
catch
|
||||||
|
Genie.Renderer.Json.json((status="ERROR", value=""))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
Genie.startup()
|
||||||
Reference in New Issue
Block a user