Codes for chapters 1, 2, and 3
This commit is contained in:
parent
f4c3f0f754
commit
b82be9e882
1030
Manifest.toml
Normal file
1030
Manifest.toml
Normal file
File diff suppressed because it is too large
Load Diff
8
Project.toml
Normal file
8
Project.toml
Normal file
@ -0,0 +1,8 @@
|
||||
[deps]
|
||||
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
|
||||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
|
||||
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
|
||||
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
|
||||
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
|
37
README.md
37
README.md
@ -1,2 +1,35 @@
|
||||
# JuliaForDataAnalysis
|
||||
Codes for the book "Julia for Data Analysis"
|
||||
This repository contains source codes for the "Julia for Data Analysis" book
|
||||
that is written by Bogumił Kamiński and is planned to be published in 2022 by
|
||||
[Manning Publications Co.](https://www.manning.com/).
|
||||
|
||||
In order to prepare the Julia environment before working with the materials
|
||||
presented in the book please perform the following setup steps:
|
||||
* [download](https://julialang.org/downloads/) and
|
||||
[install](https://julialang.org/downloads/platform/)
|
||||
[Julia](https://julialang.org/);
|
||||
all the codes were tested under Julia 1.7;
|
||||
* make sure you can start Julia by running `julia` command in your system shell
|
||||
(alternative ways to use Julia are described in Appendix A to the book)
|
||||
* download [this repository](https://github.com/bkamins/JuliaForDataAnalysis)
|
||||
to a local folder on your computer;
|
||||
* start Julia in a folder containing the downloaded material using the command
|
||||
`julia --project`; the folder must
|
||||
contain the Project.toml and Manifest.toml files prepared for this book
|
||||
(an explanation what these files do and why they are required is given in
|
||||
Appendix A to the book);
|
||||
* press *]*, write `instantiate` and press *Enter* (this process will ensure
|
||||
that Julia properly configures the working environment for working with
|
||||
the codes from the book);
|
||||
* press *Backspace*, write `exit()` and press *Enter*; now you should exit Julia
|
||||
and everything is set up to work with the materials presented in the book.
|
||||
|
||||
The codes for each chapter are stored in files named *chXX.jl*, where *XX* is
|
||||
chapter number.
|
||||
|
||||
To work with codes from some given chapter:
|
||||
* start a fresh Julia session using the `julia --project` command in a folder
|
||||
containing the downloaded material;
|
||||
* execute the commands sequentially as they appear in the file;
|
||||
the codes were prepared in a way that you do not need to restart Julia
|
||||
when working with material from a single chapter, unless it is explicitly
|
||||
written in the instructions to restart Julia (some of the codes require this).
|
||||
|
25
ch01.jl
Normal file
25
ch01.jl
Normal file
@ -0,0 +1,25 @@
|
||||
# Bogumił Kamiński, 2021
|
||||
|
||||
# Codes for chapter 1
|
||||
|
||||
# Code from section 1.2.1
|
||||
|
||||
function f(n)
|
||||
s = 0
|
||||
for i in 1:n
|
||||
s += i
|
||||
end
|
||||
return s
|
||||
end
|
||||
|
||||
@time f(1_000_000_000)
|
||||
|
||||
# Code allowing to reproduce the data frame presented in section 1.3
|
||||
|
||||
using DataFrames
|
||||
|
||||
DataFrame(a=1:3, name=["Alice", "Bob", "Clyde"],
|
||||
age=[19, 24, 21], friends=[[2], [1, 3], [2]],
|
||||
location=[(city="Atlanta", state="GA"),
|
||||
(city="Boston", state="MA"),
|
||||
(city="Austin", state="TX")])
|
442
ch02.jl
Normal file
442
ch02.jl
Normal file
@ -0,0 +1,442 @@
|
||||
# Bogumił Kamiński, 2021
|
||||
|
||||
# Codes for chapter 2
|
||||
|
||||
# Code for listing 2.1
|
||||
|
||||
1
|
||||
true
|
||||
"Hello world!"
|
||||
0.1
|
||||
[1, 2, 3]
|
||||
|
||||
# Code for listing 2.2
|
||||
|
||||
typeof(1)
|
||||
typeof(true)
|
||||
typeof("Hello world!")
|
||||
typeof(0.1)
|
||||
typeof([1, 2, 3])
|
||||
|
||||
# Code for showing bit representation of numbers
|
||||
|
||||
bitstring(1)
|
||||
bitstring(1.0)
|
||||
bitstring(Int8(1))
|
||||
|
||||
# Code showing to what Int alias expands
|
||||
|
||||
Int
|
||||
|
||||
# Code for checking if value is of some type
|
||||
|
||||
[1, 2, 3] isa Vector{Int}
|
||||
[1, 2, 3] isa Array{Int64, 1}
|
||||
|
||||
# Code for section 2.2
|
||||
|
||||
x = 1
|
||||
y = [1, 2, 3]
|
||||
|
||||
x = 1
|
||||
x
|
||||
typeof(x)
|
||||
x = 0.1
|
||||
x
|
||||
typeof(x)
|
||||
|
||||
Kamiński = 1
|
||||
x₁ = 0.5
|
||||
ε = 0.0001
|
||||
|
||||
?₁
|
||||
?ε
|
||||
|
||||
# Code for listing 2.3
|
||||
|
||||
x = -7
|
||||
if x > 0
|
||||
println("positive")
|
||||
elseif x < 0
|
||||
println("negative")
|
||||
elseif x == 0
|
||||
println("zero")
|
||||
else
|
||||
println("unexpected condition")
|
||||
end
|
||||
|
||||
# Code showing that logical condition must be Bool
|
||||
|
||||
x = -7
|
||||
if x
|
||||
println("condition was true")
|
||||
end
|
||||
|
||||
# Code showing comparisons against NaN
|
||||
|
||||
NaN > 0
|
||||
NaN >= 0
|
||||
NaN < 0
|
||||
NaN <= 0
|
||||
NaN == 0
|
||||
|
||||
NaN != 0
|
||||
NaN != NaN
|
||||
|
||||
# Code showing that floating point arithmetic is only approximate
|
||||
|
||||
0.1 + 0.2 == 0.3
|
||||
|
||||
0.1 + 0.2
|
||||
|
||||
isapprox(0.1 + 0.2, 0.3)
|
||||
|
||||
0.1 + 0.2 ≈ 0.3
|
||||
|
||||
# Code showing combining conditions
|
||||
|
||||
x = -7
|
||||
x > 0 && x < 10
|
||||
x < 0 || log(x) > 10
|
||||
|
||||
x = -7
|
||||
log(x)
|
||||
|
||||
# Code showing typical one-line conditional execution expressions
|
||||
|
||||
x = -7
|
||||
x < 0 && println(x^2)
|
||||
iseven(x) || println("x is odd")
|
||||
|
||||
x = -7
|
||||
if x < 0
|
||||
println(x^2)
|
||||
end
|
||||
if !iseven(x)
|
||||
println("x is odd")
|
||||
end
|
||||
|
||||
x = -7
|
||||
if x < 0 && x^2
|
||||
println("inside if")
|
||||
end
|
||||
|
||||
# Code showing ternary operator
|
||||
|
||||
x = -7
|
||||
x > 0 ? println("x is positive") : println("x is not positive")
|
||||
|
||||
# Code from listing 2.4
|
||||
|
||||
for i in [1, 2, 3]
|
||||
println(i, " is ", isodd(i) ? "odd" : "even")
|
||||
end
|
||||
|
||||
# Code from listing 2.5
|
||||
|
||||
i = 1
|
||||
while i < 4
|
||||
println(i, " is ", isodd(i) ? "odd" : "even")
|
||||
global i += 1
|
||||
end
|
||||
|
||||
# Code showing break and continue keywords
|
||||
|
||||
i = 0
|
||||
while true
|
||||
global i += 1
|
||||
i > 6 && break
|
||||
isodd(i) && continue
|
||||
println(i, " is even")
|
||||
end
|
||||
|
||||
# Code from listing 2.6
|
||||
|
||||
x = -7
|
||||
x < 0 && begin
|
||||
println(x)
|
||||
x += 1
|
||||
println(x)
|
||||
2 * x
|
||||
end
|
||||
x > 0 ? (println(x); x) : (x += 1; println(x); x)
|
||||
|
||||
# Code from section 2.3.4
|
||||
|
||||
x = [8, 3, 1, 5, 7]
|
||||
k = 1
|
||||
|
||||
y = sort(x)
|
||||
|
||||
for i in 1:k
|
||||
y[i] = y[k + 1]
|
||||
y[end - i + 1] = y[end - k]
|
||||
end
|
||||
y
|
||||
|
||||
s = 0
|
||||
for v in y
|
||||
s += v
|
||||
end
|
||||
s
|
||||
s / length(y)
|
||||
|
||||
# Code from listing 2.7
|
||||
|
||||
function times_two(x)
|
||||
return 2 * x
|
||||
end
|
||||
times_two(10)
|
||||
|
||||
# Code from listing 2.8
|
||||
|
||||
function compose(x, y=10; a, b=10)
|
||||
return x, y, a, b
|
||||
end
|
||||
compose(1, 2; a=3, b=4)
|
||||
compose(1, 2; a=3)
|
||||
compose(1; a=3)
|
||||
compose(1)
|
||||
compose(; a=3)
|
||||
|
||||
# Code from listing 2.9
|
||||
|
||||
times_two(x) = 2 * x
|
||||
compose(x, y=10; a, b=10) = x, y, a, b
|
||||
|
||||
# Code showing the use of map function
|
||||
|
||||
map(times_two, [1, 2, 3])
|
||||
|
||||
# Code from listing 2.10
|
||||
|
||||
map(x -> 2 * x, [1, 2, 3])
|
||||
|
||||
# Code showing sum taking a function as a first argument
|
||||
|
||||
sum(x -> x ^ 2, [1, 2, 3])
|
||||
|
||||
# Code showing do-end syntax
|
||||
|
||||
sum([1, 2, 3]) do x
|
||||
println("processing ", x)
|
||||
return x ^ 2
|
||||
end
|
||||
|
||||
# Code showing the difference between sort and sort!
|
||||
|
||||
x = [5, 1, 3, 2]
|
||||
sort(x)
|
||||
x
|
||||
sort!(x)
|
||||
x
|
||||
|
||||
# Code showing a simple implementation of winsorized_mean function
|
||||
|
||||
function winsorized_mean(x, k)
|
||||
y = sort(x)
|
||||
for i in 1:k
|
||||
y[i] = y[k + 1]
|
||||
y[end - i + 1] = y[end - k]
|
||||
end
|
||||
s = 0
|
||||
for v in y
|
||||
s += v
|
||||
end
|
||||
return s / length(y)
|
||||
end
|
||||
winsorized_mean([8, 3, 1, 5, 7], 1)
|
||||
|
||||
# Code from section 2.5
|
||||
|
||||
function fun1()
|
||||
x = 1
|
||||
return x + 1
|
||||
end
|
||||
fun1()
|
||||
x
|
||||
|
||||
function fun2()
|
||||
if true
|
||||
x = 10
|
||||
end
|
||||
return x
|
||||
end
|
||||
fun2()
|
||||
|
||||
function fun3()
|
||||
x = 0
|
||||
for i in [1, 2, 3]
|
||||
if i == 2
|
||||
x = 2
|
||||
end
|
||||
end
|
||||
return x
|
||||
end
|
||||
fun3()
|
||||
|
||||
function fun4()
|
||||
for i in [1, 2, 3]
|
||||
if i == 2
|
||||
x = 2
|
||||
end
|
||||
end
|
||||
return x
|
||||
end
|
||||
fun4()
|
||||
|
||||
function fun5()
|
||||
for i in [1, 2, 3]
|
||||
if i == 1
|
||||
x = 1
|
||||
else
|
||||
x += 1
|
||||
end
|
||||
println(x)
|
||||
end
|
||||
end
|
||||
fun5()
|
||||
|
||||
function fun6()
|
||||
x = 0
|
||||
for i in [1, 2, 3]
|
||||
if i == 1
|
||||
x = 1
|
||||
else
|
||||
x += 1
|
||||
end
|
||||
println(x)
|
||||
end
|
||||
end
|
||||
fun6()
|
||||
|
||||
# Code from section 2.6
|
||||
|
||||
methods(cd)
|
||||
|
||||
sum isa Function
|
||||
|
||||
typeof(sum)
|
||||
typeof(sum) == Function
|
||||
|
||||
supertype(typeof(sum))
|
||||
|
||||
function traverse(T)
|
||||
println(T)
|
||||
T == Any || traverse(supertype(T))
|
||||
return nothing
|
||||
end
|
||||
traverse(Int64)
|
||||
|
||||
function print_subtypes(T, indent_level=0)
|
||||
println(" " ^ indent_level, T)
|
||||
for S in subtypes(T)
|
||||
print_subtypes(S, indent_level + 2)
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
print_subtypes(Integer)
|
||||
|
||||
traverse(typeof([1.0, 2.0, 3.0]))
|
||||
traverse(typeof(1:3))
|
||||
|
||||
AbstractVector
|
||||
|
||||
typejoin(typeof([1.0, 2.0, 3.0]), typeof(1:3))
|
||||
|
||||
# Code from section 2.7
|
||||
|
||||
fun(x) = println("unsupported type")
|
||||
fun(x::Number) = println("a number was passed")
|
||||
fun(x::Float64) = println("a Float64 value")
|
||||
methods(fun)
|
||||
|
||||
fun("hello!")
|
||||
fun(1)
|
||||
fun(1.0)
|
||||
|
||||
bar(x, y) = "no numbers passed"
|
||||
bar(x::Number, y) = "first argument is a number"
|
||||
bar(x, y::Number) = "second argument is a number"
|
||||
bar("hello", "world")
|
||||
bar(1, "world")
|
||||
bar("hello", 2)
|
||||
bar(1, 2)
|
||||
|
||||
bar(x::Number, y::Number) = "both arguments are numbers"
|
||||
bar(1, 2)
|
||||
methods(bar)
|
||||
|
||||
function winsorized_mean(x::AbstractVector, k::Integer)
|
||||
k >= 0 || throw(ArgumentError("k must be non-negative"))
|
||||
length(x) > 2 * k || throw(ArgumentError("k is too large"))
|
||||
y = sort!(collect(x))
|
||||
for i in 1:k
|
||||
y[i] = y[k + 1]
|
||||
y[end - i + 1] = y[end - k]
|
||||
end
|
||||
return sum(y) / length(y)
|
||||
end
|
||||
|
||||
winsorized_mean([8, 3, 1, 5, 7], 1)
|
||||
winsorized_mean(1:10, 2)
|
||||
winsorized_mean(1:10, "a")
|
||||
winsorized_mean(10, 1)
|
||||
|
||||
winsorized_mean(1:10, -1)
|
||||
winsorized_mean(1:10, 5)
|
||||
|
||||
# Code from section 2.8
|
||||
|
||||
import Statistics
|
||||
x = [1, 2, 3]
|
||||
mean(x)
|
||||
Statistics.mean(x)
|
||||
|
||||
using Statistics
|
||||
mean(x)
|
||||
|
||||
# start a fresh Julia session before running this code
|
||||
mean = 1
|
||||
using Statistics
|
||||
mean
|
||||
|
||||
# start a fresh Julia session before running this code
|
||||
using Statistics
|
||||
mean([1, 2, 3])
|
||||
mean = 1
|
||||
|
||||
# start a fresh Julia session before running this code
|
||||
using Statistics
|
||||
mean = 1
|
||||
mean([1, 2, 3])
|
||||
|
||||
# start a fresh Julia session before running this code
|
||||
using Statistics
|
||||
using StatsBase
|
||||
?winsor
|
||||
mean(winsor([8, 3, 1, 5, 7], count=1))
|
||||
|
||||
# Code from section 2.9
|
||||
|
||||
@time 1 + 2
|
||||
|
||||
@time(1 + 2)
|
||||
|
||||
@assert 1 == 2 "1 is not equal 2"
|
||||
@assert(1 == 2, "1 is not equal 2")
|
||||
|
||||
@macroexpand @assert(1 == 2, "1 is not equal 2")
|
||||
|
||||
@macroexpand @time 1 + 2
|
||||
|
||||
# before running these codes
|
||||
# define the winsorized_mean function using the code from section 2.7
|
||||
|
||||
using BenchmarkTools
|
||||
x = rand(10^6);
|
||||
@benchmark winsorized_mean($x, 10^5)
|
||||
using Statistics, StatsBase
|
||||
@benchmark mean(winsor($x; count=10^5))
|
||||
|
||||
@edit winsor(x, count=10^5)
|
359
ch03.jl
Normal file
359
ch03.jl
Normal file
@ -0,0 +1,359 @@
|
||||
# Bogumił Kamiński, 2021
|
||||
|
||||
# Codes for chapter 3
|
||||
|
||||
# Code for listing 3.1
|
||||
|
||||
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
||||
|
||||
# Code for checking size of a matrix
|
||||
|
||||
size(aq)
|
||||
size(aq, 1)
|
||||
size(aq, 2)
|
||||
|
||||
# Code comparing tuple to a vector
|
||||
|
||||
v = [1, 2, 3]
|
||||
t = (1, 2, 3)
|
||||
v[1]
|
||||
t[1]
|
||||
v[1] = 10
|
||||
v
|
||||
t[1] = 10
|
||||
|
||||
# Code for figure 3.2
|
||||
|
||||
using BenchmarkTools
|
||||
@benchmark (1, 2, 3)
|
||||
@benchmark [1, 2, 3]
|
||||
|
||||
# Code for section 3.1.2
|
||||
|
||||
using Statistics
|
||||
mean(aq; dims=1)
|
||||
std(aq; dims=1)
|
||||
|
||||
map(mean, eachcol(aq))
|
||||
map(std, eachcol(aq))
|
||||
|
||||
map(eachcol(aq)) do col
|
||||
mean(col)
|
||||
end
|
||||
|
||||
[mean(col) for col in eachcol(aq)]
|
||||
[std(col) for col in eachcol(aq)]
|
||||
|
||||
# Code for section 3.1.3
|
||||
|
||||
[mean(aq[:, j]) for j in axes(aq, 2)]
|
||||
[std(aq[:, j]) for j in axes(aq, 2)]
|
||||
|
||||
axes(aq, 2)
|
||||
?Base.OneTo
|
||||
|
||||
[mean(view(aq, :, j)) for j in axes(aq, 2)]
|
||||
[std(@view aq[:, j]) for j in axes(aq, 2)]
|
||||
|
||||
# Code for section 3.1.4
|
||||
|
||||
using BenchmarkTools
|
||||
x = ones(10^7, 10)
|
||||
@benchmark [mean(@view $x[:, j]) for j in axes($x, 2)]
|
||||
@benchmark [mean($x[:, j]) for j in axes($x, 2)]
|
||||
@benchmark mean($x, dims=1)
|
||||
|
||||
# Code for section 3.1.5
|
||||
|
||||
[cor(aq[:, i], aq[:, i+1]) for i in 1:2:7]
|
||||
collect(1:2:7)
|
||||
|
||||
# Code for section 3.1.6
|
||||
|
||||
y = aq[:, 2]
|
||||
X = [ones(11) aq[:, 1]]
|
||||
X \ y
|
||||
[[ones(11) aq[:, i]] \ aq[:, i+1] for i in 1:2:7]
|
||||
|
||||
function R²(x, y)
|
||||
X = [ones(11) x]
|
||||
model = X \ y
|
||||
prediction = X * model
|
||||
error = y - prediction
|
||||
SS_res = sum(v -> v ^ 2, error)
|
||||
mean_y = mean(y)
|
||||
SS_tot = sum(v -> (v - mean_y) ^ 2, y)
|
||||
return 1 - SS_res / SS_tot
|
||||
end
|
||||
[R²(aq[:, i], aq[:, i+1]) for i in 1:2:7]
|
||||
|
||||
?²
|
||||
|
||||
# Code for section 3.1.7
|
||||
|
||||
using Plots
|
||||
scatter(aq[:, 1], aq[:, 2]; legend=false)
|
||||
|
||||
plot(scatter(aq[:, 1], aq[:, 2]; legend=false),
|
||||
scatter(aq[:, 3], aq[:, 4]; legend=false),
|
||||
scatter(aq[:, 5], aq[:, 6]; legend=false),
|
||||
scatter(aq[:, 7], aq[:, 8]; legend=false))
|
||||
|
||||
plot([scatter(aq[:, i], aq[:, i+1]; legend=false)
|
||||
for i in 1:2:7]...)
|
||||
|
||||
# Code for section 3.2
|
||||
|
||||
two_standard = Dict{Int, Int}()
|
||||
for i in [1, 2, 3, 4, 5, 6]
|
||||
for j in [1, 2, 3, 4, 5, 6]
|
||||
s = i + j
|
||||
if haskey(two_standard, s)
|
||||
two_standard[s] += 1
|
||||
else
|
||||
two_standard[s] = 1
|
||||
end
|
||||
end
|
||||
end
|
||||
two_standard
|
||||
|
||||
keys(two_standard)
|
||||
values(two_standard)
|
||||
|
||||
using Plots
|
||||
scatter(collect(keys(two_standard)), collect(values(two_standard));
|
||||
legend=false, xaxis=2:12)
|
||||
|
||||
all_dice = [[1, x2, x3, x4, x5, x6]
|
||||
for x2 in 2:11
|
||||
for x3 in x2:11
|
||||
for x4 in x3:11
|
||||
for x5 in x4:11
|
||||
for x6 in x5:11]
|
||||
|
||||
for d1 in all_dice, d2 in all_dice
|
||||
test = Dict{Int, Int}()
|
||||
for i in d1, j in d2
|
||||
s = i + j
|
||||
if haskey(test, s)
|
||||
test[s] += 1
|
||||
else
|
||||
test[s] = 1
|
||||
end
|
||||
end
|
||||
if test == two_standard
|
||||
println(d1, " ", d2)
|
||||
end
|
||||
end
|
||||
|
||||
# Code for section 3.3
|
||||
|
||||
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
||||
|
||||
dataset1 = (x=aq[:, 1], y=aq[:, 2])
|
||||
|
||||
dataset1[1]
|
||||
dataset1.x
|
||||
|
||||
# Code for listing 3.2
|
||||
|
||||
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
||||
set2=(x=aq[:, 3], y=aq[:, 4]),
|
||||
set3=(x=aq[:, 5], y=aq[:, 6]),
|
||||
set4=(x=aq[:, 7], y=aq[:, 8]))
|
||||
|
||||
# Code for section 3.3.2
|
||||
|
||||
using Statistics
|
||||
map(s -> mean(s.x), data)
|
||||
|
||||
map(s -> cor(s.x, s.y), data)
|
||||
|
||||
using GLM
|
||||
model = lm(@formula(y ~ x), data.set1)
|
||||
|
||||
r2(model)
|
||||
|
||||
# Code for section 3.3.3
|
||||
|
||||
model.mm
|
||||
|
||||
x = [3, 1, 2]
|
||||
sort(x)
|
||||
x
|
||||
sort!(x)
|
||||
x
|
||||
|
||||
empty_field!(nt, i) = empty!(nt[i])
|
||||
nt = (dict = Dict("a" => 1, "b" => 2), int=10)
|
||||
empty_field!(nt, 1)
|
||||
nt
|
||||
|
||||
# Code for section 3.4.1
|
||||
|
||||
x = [1 2 3]
|
||||
y = [1, 2, 3]
|
||||
x * y
|
||||
|
||||
a = [1, 2, 3]
|
||||
b = [4, 5, 6]
|
||||
a * b
|
||||
|
||||
a .* b
|
||||
|
||||
map(*, a, b)
|
||||
[a[i] * b[i] for i in eachindex(a, b)]
|
||||
|
||||
eachindex(a, b)
|
||||
|
||||
eachindex([1, 2, 3], [4, 5])
|
||||
|
||||
map(*, [1, 2, 3], [4, 5])
|
||||
|
||||
[1, 2, 3] .* [4, 5]
|
||||
|
||||
# Code for section 3.4.2
|
||||
|
||||
[1, 2, 3] .* [4]
|
||||
|
||||
[1, 2, 3] .^ 2
|
||||
|
||||
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] .* [1 2 3 4 5 6 7 8 9 10]
|
||||
|
||||
["x", "y", "z"] .=> [sum minimum maximum]
|
||||
|
||||
abs.([1, -2, 3, -4])
|
||||
|
||||
abs([1, 2, 3])
|
||||
|
||||
string(1, 2, 3)
|
||||
|
||||
string.("x", 1:10)
|
||||
|
||||
f(i::Int) = string("got integer ", i)
|
||||
f(s::String) = string("got string ", s)
|
||||
f.([1, "1"])
|
||||
|
||||
# Code for section 3.4.3
|
||||
|
||||
in(1, [1, 2, 3])
|
||||
in(4, [1, 2, 3])
|
||||
|
||||
in([1, 3, 5, 7, 9], [1, 2, 3, 4])
|
||||
|
||||
in.([1, 3, 5, 7, 9], [1, 2, 3, 4])
|
||||
|
||||
in.([1, 3, 5, 7, 9], Ref([1, 2, 3, 4]))
|
||||
|
||||
# Code for section 3.4.4
|
||||
|
||||
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
||||
using Statistics
|
||||
|
||||
mean.(eachcol(aq))
|
||||
|
||||
mean(eachcol(aq))
|
||||
|
||||
function R²(x, y)
|
||||
X = [ones(11) x]
|
||||
model = X \ y
|
||||
prediction = X * model
|
||||
error = y - prediction
|
||||
SS_res = sum(v -> v ^ 2, error)
|
||||
mean_y = mean(y)
|
||||
SS_tot = sum(v -> (v - mean_y) ^ 2, y)
|
||||
return 1 - SS_res / SS_tot
|
||||
end
|
||||
|
||||
function R²(x, y)
|
||||
X = [ones(11) x]
|
||||
model = X \ y
|
||||
prediction = X * model
|
||||
SS_res = sum((y .- prediction) .^ 2)
|
||||
SS_tot = sum((y .- mean(y)) .^ 2)
|
||||
return 1 - SS_res / SS_tot
|
||||
end
|
||||
|
||||
# Code for section 3.5
|
||||
|
||||
[]
|
||||
Dict()
|
||||
|
||||
Float64[1, 2, 3]
|
||||
|
||||
Dict{UInt8, Float64}(0 => 0, 1 => 1)
|
||||
|
||||
UInt32(200)
|
||||
|
||||
Real[1, 1.0, 0x3]
|
||||
|
||||
v1 = Any[1, 2, 3]
|
||||
eltype(v1)
|
||||
v2 = Float64[1, 2, 3]
|
||||
eltype(v2)
|
||||
v3 = [1, 2, 3]
|
||||
eltype(v2)
|
||||
d1 = Dict()
|
||||
eltype(d1)
|
||||
d2 = Dict(1 => 2, 3 => 4)
|
||||
eltype(d2)
|
||||
|
||||
p = 1 => 2
|
||||
typeof(p)
|
||||
|
||||
# Code for section 3.5.1
|
||||
|
||||
[1, 2, 3] isa AbstractVector{Int}
|
||||
[1, 2, 3] isa AbstractVector{Real}
|
||||
|
||||
AbstractVector{<:Real}
|
||||
|
||||
# Code for section 3.5.2
|
||||
|
||||
using Statistics
|
||||
function ourcov(x::AbstractVector{<:Real},
|
||||
y::AbstractVector{<:Real})
|
||||
len = length(x)
|
||||
@assert len == length(y) > 0
|
||||
return sum((x .- mean(x)) .* (y .- mean(y))) / (len - 1)
|
||||
end
|
||||
|
||||
ourcov(1:4, [1.0, 3.0, 2.0, 4.0])
|
||||
cov(1:4, [1.0, 3.0, 2.0, 4.0])
|
||||
|
||||
ourcov(1:4, Any[1.0, 3.0, 2.0, 4.0])
|
||||
|
||||
x = Any[1, 2, 3]
|
||||
identity.(x)
|
||||
y = Any[1, 2.0]
|
||||
identity.(y)
|
Loading…
Reference in New Issue
Block a user