Codes for chapters 1, 2, and 3
This commit is contained in:
parent
f4c3f0f754
commit
b82be9e882
1030
Manifest.toml
Normal file
1030
Manifest.toml
Normal file
File diff suppressed because it is too large
Load Diff
8
Project.toml
Normal file
8
Project.toml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
[deps]
|
||||||
|
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
|
||||||
|
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||||
|
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
|
||||||
|
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
|
||||||
|
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
|
||||||
|
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
|
||||||
|
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
|
37
README.md
37
README.md
@ -1,2 +1,35 @@
|
|||||||
# JuliaForDataAnalysis
|
This repository contains source codes for the "Julia for Data Analysis" book
|
||||||
Codes for the book "Julia for Data Analysis"
|
that is written by Bogumił Kamiński and is planned to be published in 2022 by
|
||||||
|
[Manning Publications Co.](https://www.manning.com/).
|
||||||
|
|
||||||
|
In order to prepare the Julia environment before working with the materials
|
||||||
|
presented in the book please perform the following setup steps:
|
||||||
|
* [download](https://julialang.org/downloads/) and
|
||||||
|
[install](https://julialang.org/downloads/platform/)
|
||||||
|
[Julia](https://julialang.org/);
|
||||||
|
all the codes were tested under Julia 1.7;
|
||||||
|
* make sure you can start Julia by running `julia` command in your system shell
|
||||||
|
(alternative ways to use Julia are described in Appendix A to the book)
|
||||||
|
* download [this repository](https://github.com/bkamins/JuliaForDataAnalysis)
|
||||||
|
to a local folder on your computer;
|
||||||
|
* start Julia in a folder containing the downloaded material using the command
|
||||||
|
`julia --project`; the folder must
|
||||||
|
contain the Project.toml and Manifest.toml files prepared for this book
|
||||||
|
(an explanation what these files do and why they are required is given in
|
||||||
|
Appendix A to the book);
|
||||||
|
* press *]*, write `instantiate` and press *Enter* (this process will ensure
|
||||||
|
that Julia properly configures the working environment for working with
|
||||||
|
the codes from the book);
|
||||||
|
* press *Backspace*, write `exit()` and press *Enter*; now you should exit Julia
|
||||||
|
and everything is set up to work with the materials presented in the book.
|
||||||
|
|
||||||
|
The codes for each chapter are stored in files named *chXX.jl*, where *XX* is
|
||||||
|
chapter number.
|
||||||
|
|
||||||
|
To work with codes from some given chapter:
|
||||||
|
* start a fresh Julia session using the `julia --project` command in a folder
|
||||||
|
containing the downloaded material;
|
||||||
|
* execute the commands sequentially as they appear in the file;
|
||||||
|
the codes were prepared in a way that you do not need to restart Julia
|
||||||
|
when working with material from a single chapter, unless it is explicitly
|
||||||
|
written in the instructions to restart Julia (some of the codes require this).
|
||||||
|
25
ch01.jl
Normal file
25
ch01.jl
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# Bogumił Kamiński, 2021
|
||||||
|
|
||||||
|
# Codes for chapter 1
|
||||||
|
|
||||||
|
# Code from section 1.2.1
|
||||||
|
|
||||||
|
function f(n)
|
||||||
|
s = 0
|
||||||
|
for i in 1:n
|
||||||
|
s += i
|
||||||
|
end
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
|
||||||
|
@time f(1_000_000_000)
|
||||||
|
|
||||||
|
# Code allowing to reproduce the data frame presented in section 1.3
|
||||||
|
|
||||||
|
using DataFrames
|
||||||
|
|
||||||
|
DataFrame(a=1:3, name=["Alice", "Bob", "Clyde"],
|
||||||
|
age=[19, 24, 21], friends=[[2], [1, 3], [2]],
|
||||||
|
location=[(city="Atlanta", state="GA"),
|
||||||
|
(city="Boston", state="MA"),
|
||||||
|
(city="Austin", state="TX")])
|
442
ch02.jl
Normal file
442
ch02.jl
Normal file
@ -0,0 +1,442 @@
|
|||||||
|
# Bogumił Kamiński, 2021
|
||||||
|
|
||||||
|
# Codes for chapter 2
|
||||||
|
|
||||||
|
# Code for listing 2.1
|
||||||
|
|
||||||
|
1
|
||||||
|
true
|
||||||
|
"Hello world!"
|
||||||
|
0.1
|
||||||
|
[1, 2, 3]
|
||||||
|
|
||||||
|
# Code for listing 2.2
|
||||||
|
|
||||||
|
typeof(1)
|
||||||
|
typeof(true)
|
||||||
|
typeof("Hello world!")
|
||||||
|
typeof(0.1)
|
||||||
|
typeof([1, 2, 3])
|
||||||
|
|
||||||
|
# Code for showing bit representation of numbers
|
||||||
|
|
||||||
|
bitstring(1)
|
||||||
|
bitstring(1.0)
|
||||||
|
bitstring(Int8(1))
|
||||||
|
|
||||||
|
# Code showing to what Int alias expands
|
||||||
|
|
||||||
|
Int
|
||||||
|
|
||||||
|
# Code for checking if value is of some type
|
||||||
|
|
||||||
|
[1, 2, 3] isa Vector{Int}
|
||||||
|
[1, 2, 3] isa Array{Int64, 1}
|
||||||
|
|
||||||
|
# Code for section 2.2
|
||||||
|
|
||||||
|
x = 1
|
||||||
|
y = [1, 2, 3]
|
||||||
|
|
||||||
|
x = 1
|
||||||
|
x
|
||||||
|
typeof(x)
|
||||||
|
x = 0.1
|
||||||
|
x
|
||||||
|
typeof(x)
|
||||||
|
|
||||||
|
Kamiński = 1
|
||||||
|
x₁ = 0.5
|
||||||
|
ε = 0.0001
|
||||||
|
|
||||||
|
?₁
|
||||||
|
?ε
|
||||||
|
|
||||||
|
# Code for listing 2.3
|
||||||
|
|
||||||
|
x = -7
|
||||||
|
if x > 0
|
||||||
|
println("positive")
|
||||||
|
elseif x < 0
|
||||||
|
println("negative")
|
||||||
|
elseif x == 0
|
||||||
|
println("zero")
|
||||||
|
else
|
||||||
|
println("unexpected condition")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code showing that logical condition must be Bool
|
||||||
|
|
||||||
|
x = -7
|
||||||
|
if x
|
||||||
|
println("condition was true")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code showing comparisons against NaN
|
||||||
|
|
||||||
|
NaN > 0
|
||||||
|
NaN >= 0
|
||||||
|
NaN < 0
|
||||||
|
NaN <= 0
|
||||||
|
NaN == 0
|
||||||
|
|
||||||
|
NaN != 0
|
||||||
|
NaN != NaN
|
||||||
|
|
||||||
|
# Code showing that floating point arithmetic is only approximate
|
||||||
|
|
||||||
|
0.1 + 0.2 == 0.3
|
||||||
|
|
||||||
|
0.1 + 0.2
|
||||||
|
|
||||||
|
isapprox(0.1 + 0.2, 0.3)
|
||||||
|
|
||||||
|
0.1 + 0.2 ≈ 0.3
|
||||||
|
|
||||||
|
# Code showing combining conditions
|
||||||
|
|
||||||
|
x = -7
|
||||||
|
x > 0 && x < 10
|
||||||
|
x < 0 || log(x) > 10
|
||||||
|
|
||||||
|
x = -7
|
||||||
|
log(x)
|
||||||
|
|
||||||
|
# Code showing typical one-line conditional execution expressions
|
||||||
|
|
||||||
|
x = -7
|
||||||
|
x < 0 && println(x^2)
|
||||||
|
iseven(x) || println("x is odd")
|
||||||
|
|
||||||
|
x = -7
|
||||||
|
if x < 0
|
||||||
|
println(x^2)
|
||||||
|
end
|
||||||
|
if !iseven(x)
|
||||||
|
println("x is odd")
|
||||||
|
end
|
||||||
|
|
||||||
|
x = -7
|
||||||
|
if x < 0 && x^2
|
||||||
|
println("inside if")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code showing ternary operator
|
||||||
|
|
||||||
|
x = -7
|
||||||
|
x > 0 ? println("x is positive") : println("x is not positive")
|
||||||
|
|
||||||
|
# Code from listing 2.4
|
||||||
|
|
||||||
|
for i in [1, 2, 3]
|
||||||
|
println(i, " is ", isodd(i) ? "odd" : "even")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code from listing 2.5
|
||||||
|
|
||||||
|
i = 1
|
||||||
|
while i < 4
|
||||||
|
println(i, " is ", isodd(i) ? "odd" : "even")
|
||||||
|
global i += 1
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code showing break and continue keywords
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while true
|
||||||
|
global i += 1
|
||||||
|
i > 6 && break
|
||||||
|
isodd(i) && continue
|
||||||
|
println(i, " is even")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code from listing 2.6
|
||||||
|
|
||||||
|
x = -7
|
||||||
|
x < 0 && begin
|
||||||
|
println(x)
|
||||||
|
x += 1
|
||||||
|
println(x)
|
||||||
|
2 * x
|
||||||
|
end
|
||||||
|
x > 0 ? (println(x); x) : (x += 1; println(x); x)
|
||||||
|
|
||||||
|
# Code from section 2.3.4
|
||||||
|
|
||||||
|
x = [8, 3, 1, 5, 7]
|
||||||
|
k = 1
|
||||||
|
|
||||||
|
y = sort(x)
|
||||||
|
|
||||||
|
for i in 1:k
|
||||||
|
y[i] = y[k + 1]
|
||||||
|
y[end - i + 1] = y[end - k]
|
||||||
|
end
|
||||||
|
y
|
||||||
|
|
||||||
|
s = 0
|
||||||
|
for v in y
|
||||||
|
s += v
|
||||||
|
end
|
||||||
|
s
|
||||||
|
s / length(y)
|
||||||
|
|
||||||
|
# Code from listing 2.7
|
||||||
|
|
||||||
|
function times_two(x)
|
||||||
|
return 2 * x
|
||||||
|
end
|
||||||
|
times_two(10)
|
||||||
|
|
||||||
|
# Code from listing 2.8
|
||||||
|
|
||||||
|
function compose(x, y=10; a, b=10)
|
||||||
|
return x, y, a, b
|
||||||
|
end
|
||||||
|
compose(1, 2; a=3, b=4)
|
||||||
|
compose(1, 2; a=3)
|
||||||
|
compose(1; a=3)
|
||||||
|
compose(1)
|
||||||
|
compose(; a=3)
|
||||||
|
|
||||||
|
# Code from listing 2.9
|
||||||
|
|
||||||
|
times_two(x) = 2 * x
|
||||||
|
compose(x, y=10; a, b=10) = x, y, a, b
|
||||||
|
|
||||||
|
# Code showing the use of map function
|
||||||
|
|
||||||
|
map(times_two, [1, 2, 3])
|
||||||
|
|
||||||
|
# Code from listing 2.10
|
||||||
|
|
||||||
|
map(x -> 2 * x, [1, 2, 3])
|
||||||
|
|
||||||
|
# Code showing sum taking a function as a first argument
|
||||||
|
|
||||||
|
sum(x -> x ^ 2, [1, 2, 3])
|
||||||
|
|
||||||
|
# Code showing do-end syntax
|
||||||
|
|
||||||
|
sum([1, 2, 3]) do x
|
||||||
|
println("processing ", x)
|
||||||
|
return x ^ 2
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code showing the difference between sort and sort!
|
||||||
|
|
||||||
|
x = [5, 1, 3, 2]
|
||||||
|
sort(x)
|
||||||
|
x
|
||||||
|
sort!(x)
|
||||||
|
x
|
||||||
|
|
||||||
|
# Code showing a simple implementation of winsorized_mean function
|
||||||
|
|
||||||
|
function winsorized_mean(x, k)
|
||||||
|
y = sort(x)
|
||||||
|
for i in 1:k
|
||||||
|
y[i] = y[k + 1]
|
||||||
|
y[end - i + 1] = y[end - k]
|
||||||
|
end
|
||||||
|
s = 0
|
||||||
|
for v in y
|
||||||
|
s += v
|
||||||
|
end
|
||||||
|
return s / length(y)
|
||||||
|
end
|
||||||
|
winsorized_mean([8, 3, 1, 5, 7], 1)
|
||||||
|
|
||||||
|
# Code from section 2.5
|
||||||
|
|
||||||
|
function fun1()
|
||||||
|
x = 1
|
||||||
|
return x + 1
|
||||||
|
end
|
||||||
|
fun1()
|
||||||
|
x
|
||||||
|
|
||||||
|
function fun2()
|
||||||
|
if true
|
||||||
|
x = 10
|
||||||
|
end
|
||||||
|
return x
|
||||||
|
end
|
||||||
|
fun2()
|
||||||
|
|
||||||
|
function fun3()
|
||||||
|
x = 0
|
||||||
|
for i in [1, 2, 3]
|
||||||
|
if i == 2
|
||||||
|
x = 2
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return x
|
||||||
|
end
|
||||||
|
fun3()
|
||||||
|
|
||||||
|
function fun4()
|
||||||
|
for i in [1, 2, 3]
|
||||||
|
if i == 2
|
||||||
|
x = 2
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return x
|
||||||
|
end
|
||||||
|
fun4()
|
||||||
|
|
||||||
|
function fun5()
|
||||||
|
for i in [1, 2, 3]
|
||||||
|
if i == 1
|
||||||
|
x = 1
|
||||||
|
else
|
||||||
|
x += 1
|
||||||
|
end
|
||||||
|
println(x)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
fun5()
|
||||||
|
|
||||||
|
function fun6()
|
||||||
|
x = 0
|
||||||
|
for i in [1, 2, 3]
|
||||||
|
if i == 1
|
||||||
|
x = 1
|
||||||
|
else
|
||||||
|
x += 1
|
||||||
|
end
|
||||||
|
println(x)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
fun6()
|
||||||
|
|
||||||
|
# Code from section 2.6
|
||||||
|
|
||||||
|
methods(cd)
|
||||||
|
|
||||||
|
sum isa Function
|
||||||
|
|
||||||
|
typeof(sum)
|
||||||
|
typeof(sum) == Function
|
||||||
|
|
||||||
|
supertype(typeof(sum))
|
||||||
|
|
||||||
|
function traverse(T)
|
||||||
|
println(T)
|
||||||
|
T == Any || traverse(supertype(T))
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
traverse(Int64)
|
||||||
|
|
||||||
|
function print_subtypes(T, indent_level=0)
|
||||||
|
println(" " ^ indent_level, T)
|
||||||
|
for S in subtypes(T)
|
||||||
|
print_subtypes(S, indent_level + 2)
|
||||||
|
end
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
print_subtypes(Integer)
|
||||||
|
|
||||||
|
traverse(typeof([1.0, 2.0, 3.0]))
|
||||||
|
traverse(typeof(1:3))
|
||||||
|
|
||||||
|
AbstractVector
|
||||||
|
|
||||||
|
typejoin(typeof([1.0, 2.0, 3.0]), typeof(1:3))
|
||||||
|
|
||||||
|
# Code from section 2.7
|
||||||
|
|
||||||
|
fun(x) = println("unsupported type")
|
||||||
|
fun(x::Number) = println("a number was passed")
|
||||||
|
fun(x::Float64) = println("a Float64 value")
|
||||||
|
methods(fun)
|
||||||
|
|
||||||
|
fun("hello!")
|
||||||
|
fun(1)
|
||||||
|
fun(1.0)
|
||||||
|
|
||||||
|
bar(x, y) = "no numbers passed"
|
||||||
|
bar(x::Number, y) = "first argument is a number"
|
||||||
|
bar(x, y::Number) = "second argument is a number"
|
||||||
|
bar("hello", "world")
|
||||||
|
bar(1, "world")
|
||||||
|
bar("hello", 2)
|
||||||
|
bar(1, 2)
|
||||||
|
|
||||||
|
bar(x::Number, y::Number) = "both arguments are numbers"
|
||||||
|
bar(1, 2)
|
||||||
|
methods(bar)
|
||||||
|
|
||||||
|
function winsorized_mean(x::AbstractVector, k::Integer)
|
||||||
|
k >= 0 || throw(ArgumentError("k must be non-negative"))
|
||||||
|
length(x) > 2 * k || throw(ArgumentError("k is too large"))
|
||||||
|
y = sort!(collect(x))
|
||||||
|
for i in 1:k
|
||||||
|
y[i] = y[k + 1]
|
||||||
|
y[end - i + 1] = y[end - k]
|
||||||
|
end
|
||||||
|
return sum(y) / length(y)
|
||||||
|
end
|
||||||
|
|
||||||
|
winsorized_mean([8, 3, 1, 5, 7], 1)
|
||||||
|
winsorized_mean(1:10, 2)
|
||||||
|
winsorized_mean(1:10, "a")
|
||||||
|
winsorized_mean(10, 1)
|
||||||
|
|
||||||
|
winsorized_mean(1:10, -1)
|
||||||
|
winsorized_mean(1:10, 5)
|
||||||
|
|
||||||
|
# Code from section 2.8
|
||||||
|
|
||||||
|
import Statistics
|
||||||
|
x = [1, 2, 3]
|
||||||
|
mean(x)
|
||||||
|
Statistics.mean(x)
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
mean(x)
|
||||||
|
|
||||||
|
# start a fresh Julia session before running this code
|
||||||
|
mean = 1
|
||||||
|
using Statistics
|
||||||
|
mean
|
||||||
|
|
||||||
|
# start a fresh Julia session before running this code
|
||||||
|
using Statistics
|
||||||
|
mean([1, 2, 3])
|
||||||
|
mean = 1
|
||||||
|
|
||||||
|
# start a fresh Julia session before running this code
|
||||||
|
using Statistics
|
||||||
|
mean = 1
|
||||||
|
mean([1, 2, 3])
|
||||||
|
|
||||||
|
# start a fresh Julia session before running this code
|
||||||
|
using Statistics
|
||||||
|
using StatsBase
|
||||||
|
?winsor
|
||||||
|
mean(winsor([8, 3, 1, 5, 7], count=1))
|
||||||
|
|
||||||
|
# Code from section 2.9
|
||||||
|
|
||||||
|
@time 1 + 2
|
||||||
|
|
||||||
|
@time(1 + 2)
|
||||||
|
|
||||||
|
@assert 1 == 2 "1 is not equal 2"
|
||||||
|
@assert(1 == 2, "1 is not equal 2")
|
||||||
|
|
||||||
|
@macroexpand @assert(1 == 2, "1 is not equal 2")
|
||||||
|
|
||||||
|
@macroexpand @time 1 + 2
|
||||||
|
|
||||||
|
# before running these codes
|
||||||
|
# define the winsorized_mean function using the code from section 2.7
|
||||||
|
|
||||||
|
using BenchmarkTools
|
||||||
|
x = rand(10^6);
|
||||||
|
@benchmark winsorized_mean($x, 10^5)
|
||||||
|
using Statistics, StatsBase
|
||||||
|
@benchmark mean(winsor($x; count=10^5))
|
||||||
|
|
||||||
|
@edit winsor(x, count=10^5)
|
359
ch03.jl
Normal file
359
ch03.jl
Normal file
@ -0,0 +1,359 @@
|
|||||||
|
# Bogumił Kamiński, 2021
|
||||||
|
|
||||||
|
# Codes for chapter 3
|
||||||
|
|
||||||
|
# Code for listing 3.1
|
||||||
|
|
||||||
|
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||||
|
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||||
|
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||||
|
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||||
|
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||||
|
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||||
|
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||||
|
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||||
|
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||||
|
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||||
|
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
||||||
|
|
||||||
|
# Code for checking size of a matrix
|
||||||
|
|
||||||
|
size(aq)
|
||||||
|
size(aq, 1)
|
||||||
|
size(aq, 2)
|
||||||
|
|
||||||
|
# Code comparing tuple to a vector
|
||||||
|
|
||||||
|
v = [1, 2, 3]
|
||||||
|
t = (1, 2, 3)
|
||||||
|
v[1]
|
||||||
|
t[1]
|
||||||
|
v[1] = 10
|
||||||
|
v
|
||||||
|
t[1] = 10
|
||||||
|
|
||||||
|
# Code for figure 3.2
|
||||||
|
|
||||||
|
using BenchmarkTools
|
||||||
|
@benchmark (1, 2, 3)
|
||||||
|
@benchmark [1, 2, 3]
|
||||||
|
|
||||||
|
# Code for section 3.1.2
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
mean(aq; dims=1)
|
||||||
|
std(aq; dims=1)
|
||||||
|
|
||||||
|
map(mean, eachcol(aq))
|
||||||
|
map(std, eachcol(aq))
|
||||||
|
|
||||||
|
map(eachcol(aq)) do col
|
||||||
|
mean(col)
|
||||||
|
end
|
||||||
|
|
||||||
|
[mean(col) for col in eachcol(aq)]
|
||||||
|
[std(col) for col in eachcol(aq)]
|
||||||
|
|
||||||
|
# Code for section 3.1.3
|
||||||
|
|
||||||
|
[mean(aq[:, j]) for j in axes(aq, 2)]
|
||||||
|
[std(aq[:, j]) for j in axes(aq, 2)]
|
||||||
|
|
||||||
|
axes(aq, 2)
|
||||||
|
?Base.OneTo
|
||||||
|
|
||||||
|
[mean(view(aq, :, j)) for j in axes(aq, 2)]
|
||||||
|
[std(@view aq[:, j]) for j in axes(aq, 2)]
|
||||||
|
|
||||||
|
# Code for section 3.1.4
|
||||||
|
|
||||||
|
using BenchmarkTools
|
||||||
|
x = ones(10^7, 10)
|
||||||
|
@benchmark [mean(@view $x[:, j]) for j in axes($x, 2)]
|
||||||
|
@benchmark [mean($x[:, j]) for j in axes($x, 2)]
|
||||||
|
@benchmark mean($x, dims=1)
|
||||||
|
|
||||||
|
# Code for section 3.1.5
|
||||||
|
|
||||||
|
[cor(aq[:, i], aq[:, i+1]) for i in 1:2:7]
|
||||||
|
collect(1:2:7)
|
||||||
|
|
||||||
|
# Code for section 3.1.6
|
||||||
|
|
||||||
|
y = aq[:, 2]
|
||||||
|
X = [ones(11) aq[:, 1]]
|
||||||
|
X \ y
|
||||||
|
[[ones(11) aq[:, i]] \ aq[:, i+1] for i in 1:2:7]
|
||||||
|
|
||||||
|
function R²(x, y)
|
||||||
|
X = [ones(11) x]
|
||||||
|
model = X \ y
|
||||||
|
prediction = X * model
|
||||||
|
error = y - prediction
|
||||||
|
SS_res = sum(v -> v ^ 2, error)
|
||||||
|
mean_y = mean(y)
|
||||||
|
SS_tot = sum(v -> (v - mean_y) ^ 2, y)
|
||||||
|
return 1 - SS_res / SS_tot
|
||||||
|
end
|
||||||
|
[R²(aq[:, i], aq[:, i+1]) for i in 1:2:7]
|
||||||
|
|
||||||
|
?²
|
||||||
|
|
||||||
|
# Code for section 3.1.7
|
||||||
|
|
||||||
|
using Plots
|
||||||
|
scatter(aq[:, 1], aq[:, 2]; legend=false)
|
||||||
|
|
||||||
|
plot(scatter(aq[:, 1], aq[:, 2]; legend=false),
|
||||||
|
scatter(aq[:, 3], aq[:, 4]; legend=false),
|
||||||
|
scatter(aq[:, 5], aq[:, 6]; legend=false),
|
||||||
|
scatter(aq[:, 7], aq[:, 8]; legend=false))
|
||||||
|
|
||||||
|
plot([scatter(aq[:, i], aq[:, i+1]; legend=false)
|
||||||
|
for i in 1:2:7]...)
|
||||||
|
|
||||||
|
# Code for section 3.2
|
||||||
|
|
||||||
|
two_standard = Dict{Int, Int}()
|
||||||
|
for i in [1, 2, 3, 4, 5, 6]
|
||||||
|
for j in [1, 2, 3, 4, 5, 6]
|
||||||
|
s = i + j
|
||||||
|
if haskey(two_standard, s)
|
||||||
|
two_standard[s] += 1
|
||||||
|
else
|
||||||
|
two_standard[s] = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
two_standard
|
||||||
|
|
||||||
|
keys(two_standard)
|
||||||
|
values(two_standard)
|
||||||
|
|
||||||
|
using Plots
|
||||||
|
scatter(collect(keys(two_standard)), collect(values(two_standard));
|
||||||
|
legend=false, xaxis=2:12)
|
||||||
|
|
||||||
|
all_dice = [[1, x2, x3, x4, x5, x6]
|
||||||
|
for x2 in 2:11
|
||||||
|
for x3 in x2:11
|
||||||
|
for x4 in x3:11
|
||||||
|
for x5 in x4:11
|
||||||
|
for x6 in x5:11]
|
||||||
|
|
||||||
|
for d1 in all_dice, d2 in all_dice
|
||||||
|
test = Dict{Int, Int}()
|
||||||
|
for i in d1, j in d2
|
||||||
|
s = i + j
|
||||||
|
if haskey(test, s)
|
||||||
|
test[s] += 1
|
||||||
|
else
|
||||||
|
test[s] = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if test == two_standard
|
||||||
|
println(d1, " ", d2)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code for section 3.3
|
||||||
|
|
||||||
|
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||||
|
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||||
|
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||||
|
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||||
|
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||||
|
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||||
|
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||||
|
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||||
|
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||||
|
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||||
|
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
||||||
|
|
||||||
|
dataset1 = (x=aq[:, 1], y=aq[:, 2])
|
||||||
|
|
||||||
|
dataset1[1]
|
||||||
|
dataset1.x
|
||||||
|
|
||||||
|
# Code for listing 3.2
|
||||||
|
|
||||||
|
data = (set1=(x=aq[:, 1], y=aq[:, 2]),
|
||||||
|
set2=(x=aq[:, 3], y=aq[:, 4]),
|
||||||
|
set3=(x=aq[:, 5], y=aq[:, 6]),
|
||||||
|
set4=(x=aq[:, 7], y=aq[:, 8]))
|
||||||
|
|
||||||
|
# Code for section 3.3.2
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
map(s -> mean(s.x), data)
|
||||||
|
|
||||||
|
map(s -> cor(s.x, s.y), data)
|
||||||
|
|
||||||
|
using GLM
|
||||||
|
model = lm(@formula(y ~ x), data.set1)
|
||||||
|
|
||||||
|
r2(model)
|
||||||
|
|
||||||
|
# Code for section 3.3.3
|
||||||
|
|
||||||
|
model.mm
|
||||||
|
|
||||||
|
x = [3, 1, 2]
|
||||||
|
sort(x)
|
||||||
|
x
|
||||||
|
sort!(x)
|
||||||
|
x
|
||||||
|
|
||||||
|
empty_field!(nt, i) = empty!(nt[i])
|
||||||
|
nt = (dict = Dict("a" => 1, "b" => 2), int=10)
|
||||||
|
empty_field!(nt, 1)
|
||||||
|
nt
|
||||||
|
|
||||||
|
# Code for section 3.4.1
|
||||||
|
|
||||||
|
x = [1 2 3]
|
||||||
|
y = [1, 2, 3]
|
||||||
|
x * y
|
||||||
|
|
||||||
|
a = [1, 2, 3]
|
||||||
|
b = [4, 5, 6]
|
||||||
|
a * b
|
||||||
|
|
||||||
|
a .* b
|
||||||
|
|
||||||
|
map(*, a, b)
|
||||||
|
[a[i] * b[i] for i in eachindex(a, b)]
|
||||||
|
|
||||||
|
eachindex(a, b)
|
||||||
|
|
||||||
|
eachindex([1, 2, 3], [4, 5])
|
||||||
|
|
||||||
|
map(*, [1, 2, 3], [4, 5])
|
||||||
|
|
||||||
|
[1, 2, 3] .* [4, 5]
|
||||||
|
|
||||||
|
# Code for section 3.4.2
|
||||||
|
|
||||||
|
[1, 2, 3] .* [4]
|
||||||
|
|
||||||
|
[1, 2, 3] .^ 2
|
||||||
|
|
||||||
|
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] .* [1 2 3 4 5 6 7 8 9 10]
|
||||||
|
|
||||||
|
["x", "y", "z"] .=> [sum minimum maximum]
|
||||||
|
|
||||||
|
abs.([1, -2, 3, -4])
|
||||||
|
|
||||||
|
abs([1, 2, 3])
|
||||||
|
|
||||||
|
string(1, 2, 3)
|
||||||
|
|
||||||
|
string.("x", 1:10)
|
||||||
|
|
||||||
|
f(i::Int) = string("got integer ", i)
|
||||||
|
f(s::String) = string("got string ", s)
|
||||||
|
f.([1, "1"])
|
||||||
|
|
||||||
|
# Code for section 3.4.3
|
||||||
|
|
||||||
|
in(1, [1, 2, 3])
|
||||||
|
in(4, [1, 2, 3])
|
||||||
|
|
||||||
|
in([1, 3, 5, 7, 9], [1, 2, 3, 4])
|
||||||
|
|
||||||
|
in.([1, 3, 5, 7, 9], [1, 2, 3, 4])
|
||||||
|
|
||||||
|
in.([1, 3, 5, 7, 9], Ref([1, 2, 3, 4]))
|
||||||
|
|
||||||
|
# Code for section 3.4.4
|
||||||
|
|
||||||
|
aq = [10.0 8.04 10.0 9.14 10.0 7.46 8.0 6.58
|
||||||
|
8.0 6.95 8.0 8.14 8.0 6.77 8.0 5.76
|
||||||
|
13.0 7.58 13.0 8.74 13.0 12.74 8.0 7.71
|
||||||
|
9.0 8.81 9.0 8.77 9.0 7.11 8.0 8.84
|
||||||
|
11.0 8.33 11.0 9.26 11.0 7.81 8.0 8.47
|
||||||
|
14.0 9.96 14.0 8.1 14.0 8.84 8.0 7.04
|
||||||
|
6.0 7.24 6.0 6.13 6.0 6.08 8.0 5.25
|
||||||
|
4.0 4.26 4.0 3.1 4.0 5.39 19.0 12.50
|
||||||
|
12.0 10.84 12.0 9.13 12.0 8.15 8.0 5.56
|
||||||
|
7.0 4.82 7.0 7.26 7.0 6.42 8.0 7.91
|
||||||
|
5.0 5.68 5.0 4.74 5.0 5.73 8.0 6.89]
|
||||||
|
using Statistics
|
||||||
|
|
||||||
|
mean.(eachcol(aq))
|
||||||
|
|
||||||
|
mean(eachcol(aq))
|
||||||
|
|
||||||
|
function R²(x, y)
|
||||||
|
X = [ones(11) x]
|
||||||
|
model = X \ y
|
||||||
|
prediction = X * model
|
||||||
|
error = y - prediction
|
||||||
|
SS_res = sum(v -> v ^ 2, error)
|
||||||
|
mean_y = mean(y)
|
||||||
|
SS_tot = sum(v -> (v - mean_y) ^ 2, y)
|
||||||
|
return 1 - SS_res / SS_tot
|
||||||
|
end
|
||||||
|
|
||||||
|
function R²(x, y)
|
||||||
|
X = [ones(11) x]
|
||||||
|
model = X \ y
|
||||||
|
prediction = X * model
|
||||||
|
SS_res = sum((y .- prediction) .^ 2)
|
||||||
|
SS_tot = sum((y .- mean(y)) .^ 2)
|
||||||
|
return 1 - SS_res / SS_tot
|
||||||
|
end
|
||||||
|
|
||||||
|
# Code for section 3.5
|
||||||
|
|
||||||
|
[]
|
||||||
|
Dict()
|
||||||
|
|
||||||
|
Float64[1, 2, 3]
|
||||||
|
|
||||||
|
Dict{UInt8, Float64}(0 => 0, 1 => 1)
|
||||||
|
|
||||||
|
UInt32(200)
|
||||||
|
|
||||||
|
Real[1, 1.0, 0x3]
|
||||||
|
|
||||||
|
v1 = Any[1, 2, 3]
|
||||||
|
eltype(v1)
|
||||||
|
v2 = Float64[1, 2, 3]
|
||||||
|
eltype(v2)
|
||||||
|
v3 = [1, 2, 3]
|
||||||
|
eltype(v2)
|
||||||
|
d1 = Dict()
|
||||||
|
eltype(d1)
|
||||||
|
d2 = Dict(1 => 2, 3 => 4)
|
||||||
|
eltype(d2)
|
||||||
|
|
||||||
|
p = 1 => 2
|
||||||
|
typeof(p)
|
||||||
|
|
||||||
|
# Code for section 3.5.1
|
||||||
|
|
||||||
|
[1, 2, 3] isa AbstractVector{Int}
|
||||||
|
[1, 2, 3] isa AbstractVector{Real}
|
||||||
|
|
||||||
|
AbstractVector{<:Real}
|
||||||
|
|
||||||
|
# Code for section 3.5.2
|
||||||
|
|
||||||
|
using Statistics
|
||||||
|
function ourcov(x::AbstractVector{<:Real},
|
||||||
|
y::AbstractVector{<:Real})
|
||||||
|
len = length(x)
|
||||||
|
@assert len == length(y) > 0
|
||||||
|
return sum((x .- mean(x)) .* (y .- mean(y))) / (len - 1)
|
||||||
|
end
|
||||||
|
|
||||||
|
ourcov(1:4, [1.0, 3.0, 2.0, 4.0])
|
||||||
|
cov(1:4, [1.0, 3.0, 2.0, 4.0])
|
||||||
|
|
||||||
|
ourcov(1:4, Any[1.0, 3.0, 2.0, 4.0])
|
||||||
|
|
||||||
|
x = Any[1, 2, 3]
|
||||||
|
identity.(x)
|
||||||
|
y = Any[1, 2.0]
|
||||||
|
identity.(y)
|
Loading…
Reference in New Issue
Block a user