reorganize chapters

2022-02-08 20:58:33 +01:00
parent c44c4f1609
commit 55045fc5dd
12 changed files with 1662 additions and 1596 deletions
--- a/ch03.jl
+++ b/ch03.jl
@@ -2,358 +2,141 @@

 # Codes for chapter 3

-# Code for listing 3.1
+# Code from section 3.1

-aq = [10.0   8.04  10.0  9.14  10.0   7.46   8.0   6.58
-       8.0   6.95   8.0  8.14   8.0   6.77   8.0   5.76
-      13.0   7.58  13.0  8.74  13.0  12.74   8.0   7.71
-       9.0   8.81   9.0  8.77   9.0   7.11   8.0   8.84
-      11.0   8.33  11.0  9.26  11.0   7.81   8.0   8.47
-      14.0   9.96  14.0  8.1   14.0   8.84   8.0   7.04
-       6.0   7.24   6.0  6.13   6.0   6.08   8.0   5.25
-       4.0   4.26   4.0  3.1    4.0   5.39  19.0  12.50
-      12.0  10.84  12.0  9.13  12.0   8.15   8.0   5.56
-       7.0   4.82   7.0  7.26   7.0   6.42   8.0   7.91
-       5.0   5.68   5.0  4.74   5.0   5.73   8.0   6.89]
+methods(cd)

-# Code for checking size of a matrix
+sum isa Function

-size(aq)
-size(aq, 1)
-size(aq, 2)
+typeof(sum)
+typeof(sum) == Function

-# Code comparing tuple to a vector
+supertype(typeof(sum))

-v = [1, 2, 3]
-t = (1, 2, 3)
-v[1]
-t[1]
-v[1] = 10
-v
-t[1] = 10
+function traverse(T)
+    println(T)
+    T == Any || traverse(supertype(T))
+    return nothing
+end
+traverse(Int64)

-# Code for figure 3.2
+function print_subtypes(T, indent_level=0)
+    println(" " ^ indent_level, T)
+    for S in subtypes(T)
+        print_subtypes(S, indent_level + 2)
+    end
+    return nothing
+end
+print_subtypes(Integer)
+
+traverse(typeof([1.0, 2.0, 3.0]))
+traverse(typeof(1:3))
+
+AbstractVector
+
+typejoin(typeof([1.0, 2.0, 3.0]), typeof(1:3))
+
+# Code from section 3.2
+
+fun(x) = println("unsupported type")
+fun(x::Number) = println("a number was passed")
+fun(x::Float64) = println("a Float64 value")
+methods(fun)
+
+fun("hello!")
+fun(1)
+fun(1.0)
+
+bar(x, y) = "no numbers passed"
+bar(x::Number, y) = "first argument is a number"
+bar(x, y::Number) = "second argument is a number"
+bar("hello", "world")
+bar(1, "world")
+bar("hello", 2)
+bar(1, 2)
+
+bar(x::Number, y::Number) = "both arguments are numbers"
+bar(1, 2)
+methods(bar)
+
+function winsorized_mean(x::AbstractVector, k::Integer)
+    k >= 0 || throw(ArgumentError("k must be non-negative"))
+    length(x) > 2 * k || throw(ArgumentError("k is too large"))
+    y = sort!(collect(x))
+    for i in 1:k
+        y[i] = y[k + 1]
+        y[end - i + 1] = y[end - k]
+    end
+    return sum(y) / length(y)
+end
+
+winsorized_mean([8, 3, 1, 5, 7], 1)
+winsorized_mean(1:10, 2)
+winsorized_mean(1:10, "a")
+winsorized_mean(10, 1)
+
+winsorized_mean(1:10, -1)
+winsorized_mean(1:10, 5)
+
+# Code from section 3.3
+
+module ExampleModule
+
+function example()
+    println("Hello")
+end
+
+end # ExampleModule
+
+import Statistics
+x = [1, 2, 3]
+mean(x)
+Statistics.mean(x)
+
+using Statistics
+mean(x)
+
+# start a fresh Julia session before running this code
+mean = 1
+using Statistics
+mean
+
+# start a fresh Julia session before running this code
+using Statistics
+mean([1, 2, 3])
+mean = 1
+
+# start a fresh Julia session before running this code
+using Statistics
+mean = 1
+mean([1, 2, 3])
+
+# start a fresh Julia session before running this code
+using Statistics
+using StatsBase
+?winsor
+mean(winsor([8, 3, 1, 5, 7], count=1))
+
+# Code from section 3.4
+
+@time 1 + 2
+
+@time(1 + 2)
+
+@assert 1 == 2 "1 is not equal 2"
+@assert(1 == 2, "1 is not equal 2")
+
+@macroexpand @assert(1 == 2, "1 is not equal 2")
+
+@macroexpand @time 1 + 2
+
+# before running these codes
+# define the winsorized_mean function using the code from section 3.1

 using BenchmarkTools
-@benchmark (1, 2, 3)
-@benchmark [1, 2, 3]
+x = rand(10^6);
+@benchmark winsorized_mean($x, 10^5)
+using Statistics, StatsBase
+@benchmark mean(winsor($x; count=10^5))

-# Code for section 3.1.2
-
-using Statistics
-mean(aq; dims=1)
-std(aq; dims=1)
-
-map(mean, eachcol(aq))
-map(std, eachcol(aq))
-
-map(eachcol(aq)) do col
-    mean(col)
-end
-
-[mean(col) for col in eachcol(aq)]
-[std(col) for col in eachcol(aq)]
-
-# Code for section 3.1.3
-
-[mean(aq[:, j]) for j in axes(aq, 2)]
-[std(aq[:, j]) for j in axes(aq, 2)]
-
-axes(aq, 2)
-?Base.OneTo
-
-[mean(view(aq, :, j)) for j in axes(aq, 2)]
-[std(@view aq[:, j]) for j in axes(aq, 2)]
-
-# Code for section 3.1.4
-
-using BenchmarkTools
-x = ones(10^7, 10)
-@benchmark [mean(@view $x[:, j]) for j in axes($x, 2)]
-@benchmark [mean($x[:, j]) for j in axes($x, 2)]
-@benchmark mean($x, dims=1)
-
-# Code for section 3.1.5
-
-[cor(aq[:, i], aq[:, i+1]) for i in 1:2:7]
-collect(1:2:7)
-
-# Code for section 3.1.6
-
-y = aq[:, 2]
-X = [ones(11) aq[:, 1]]
-X \ y
-[[ones(11) aq[:, i]] \ aq[:, i+1] for i in 1:2:7]
-
-function R²(x, y)
-    X = [ones(11) x]
-    model = X \ y
-    prediction = X * model
-    error = y - prediction
-    SS_res = sum(v -> v ^ 2, error)
-    mean_y = mean(y)
-    SS_tot = sum(v -> (v - mean_y) ^ 2, y)
-    return 1 - SS_res / SS_tot
-end
-[R²(aq[:, i], aq[:, i+1]) for i in 1:2:7]
-
-?²
-
-# Code for section 3.1.7
-
-using Plots
-scatter(aq[:, 1], aq[:, 2]; legend=false)
-
-plot(scatter(aq[:, 1], aq[:, 2]; legend=false),
-     scatter(aq[:, 3], aq[:, 4]; legend=false),
-     scatter(aq[:, 5], aq[:, 6]; legend=false),
-     scatter(aq[:, 7], aq[:, 8]; legend=false))
-
-plot([scatter(aq[:, i], aq[:, i+1]; legend=false)
-      for i in 1:2:7]...)
-
-# Code for section 3.2
-
-two_standard = Dict{Int, Int}()
-for i in [1, 2, 3, 4, 5, 6]
-    for j in [1, 2, 3, 4, 5, 6]
-        s = i + j
-        if haskey(two_standard, s)
-            two_standard[s] += 1
-        else
-            two_standard[s] = 1
-        end
-    end
-end
-two_standard
-
-keys(two_standard)
-values(two_standard)
-
-using Plots
-scatter(collect(keys(two_standard)), collect(values(two_standard));
-        legend=false, xaxis=2:12)
-
-all_dice = [[1, x2, x3, x4, x5, x6]
-            for x2 in 2:11
-            for x3 in x2:11
-            for x4 in x3:11
-            for x5 in x4:11
-            for x6 in x5:11]
-
-for d1 in all_dice, d2 in all_dice
-    test = Dict{Int, Int}()
-    for i in d1, j in d2
-        s = i + j
-        if haskey(test, s)
-            test[s] += 1
-        else
-            test[s] = 1
-        end
-    end
-    if test == two_standard
-        println(d1, " ", d2)
-    end
-end
-
-# Code for section 3.3
-
-aq = [10.0   8.04  10.0  9.14  10.0   7.46   8.0   6.58
-       8.0   6.95   8.0  8.14   8.0   6.77   8.0   5.76
-      13.0   7.58  13.0  8.74  13.0  12.74   8.0   7.71
-       9.0   8.81   9.0  8.77   9.0   7.11   8.0   8.84
-      11.0   8.33  11.0  9.26  11.0   7.81   8.0   8.47
-      14.0   9.96  14.0  8.1   14.0   8.84   8.0   7.04
-       6.0   7.24   6.0  6.13   6.0   6.08   8.0   5.25
-       4.0   4.26   4.0  3.1    4.0   5.39  19.0  12.50
-      12.0  10.84  12.0  9.13  12.0   8.15   8.0   5.56
-       7.0   4.82   7.0  7.26   7.0   6.42   8.0   7.91
-       5.0   5.68   5.0  4.74   5.0   5.73   8.0   6.89]
-
-dataset1 = (x=aq[:, 1], y=aq[:, 2])
-
-dataset1[1]
-dataset1.x
-
-# Code for listing 3.2
-
-data = (set1=(x=aq[:, 1], y=aq[:, 2]),
-        set2=(x=aq[:, 3], y=aq[:, 4]),
-        set3=(x=aq[:, 5], y=aq[:, 6]),
-        set4=(x=aq[:, 7], y=aq[:, 8]))
-
-# Code for section 3.3.2
-
-using Statistics
-map(s -> mean(s.x), data)
-
-map(s -> cor(s.x, s.y), data)
-
-using GLM
-model = lm(@formula(y ~ x), data.set1)
-
-r2(model)
-
-# Code for section 3.3.3
-
-model.mm
-
-x = [3, 1, 2]
-sort(x)
-x
-sort!(x)
-x
-
-empty_field!(nt, i) = empty!(nt[i])
-nt = (dict = Dict("a" => 1, "b" => 2), int=10)
-empty_field!(nt, 1)
-nt
-
-# Code for section 3.4.1
-
-x = [1 2 3]
-y = [1, 2, 3]
-x * y
-
-a = [1, 2, 3]
-b = [4, 5, 6]
-a * b
-
-a .* b
-
-map(*, a, b)
-[a[i] * b[i] for i in eachindex(a, b)]
-
-eachindex(a, b)
-
-eachindex([1, 2, 3], [4, 5])
-
-map(*, [1, 2, 3], [4, 5])
-
-[1, 2, 3] .* [4, 5]
-
-# Code for section 3.4.2
-
-[1, 2, 3] .* [4]
-
-[1, 2, 3] .^ 2
-
-[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] .* [1 2 3 4 5 6 7 8 9 10]
-
-["x", "y", "z"] .=> [sum minimum maximum]
-
-abs.([1, -2, 3, -4])
-
-abs([1, 2, 3])
-
-string(1, 2, 3)
-
-string.("x", 1:10)
-
-f(i::Int) = string("got integer ", i)
-f(s::String) = string("got string ", s)
-f.([1, "1"])
-
-# Code for section 3.4.3
-
-in(1, [1, 2, 3])
-in(4, [1, 2, 3])
-
-in([1, 3, 5, 7, 9], [1, 2, 3, 4])
-
-in.([1, 3, 5, 7, 9], [1, 2, 3, 4])
-
-in.([1, 3, 5, 7, 9], Ref([1, 2, 3, 4]))
-
-# Code for section 3.4.4
-
-aq = [10.0   8.04  10.0  9.14  10.0   7.46   8.0   6.58
-       8.0   6.95   8.0  8.14   8.0   6.77   8.0   5.76
-      13.0   7.58  13.0  8.74  13.0  12.74   8.0   7.71
-       9.0   8.81   9.0  8.77   9.0   7.11   8.0   8.84
-      11.0   8.33  11.0  9.26  11.0   7.81   8.0   8.47
-      14.0   9.96  14.0  8.1   14.0   8.84   8.0   7.04
-       6.0   7.24   6.0  6.13   6.0   6.08   8.0   5.25
-       4.0   4.26   4.0  3.1    4.0   5.39  19.0  12.50
-      12.0  10.84  12.0  9.13  12.0   8.15   8.0   5.56
-       7.0   4.82   7.0  7.26   7.0   6.42   8.0   7.91
-       5.0   5.68   5.0  4.74   5.0   5.73   8.0   6.89]
-using Statistics
-
-mean.(eachcol(aq))
-
-mean(eachcol(aq))
-
-function R²(x, y)
-    X = [ones(11) x]
-    model = X \ y
-    prediction = X * model
-    error = y - prediction
-    SS_res = sum(v -> v ^ 2, error)
-    mean_y = mean(y)
-    SS_tot = sum(v -> (v - mean_y) ^ 2, y)
-    return 1 - SS_res / SS_tot
-end
-
-function R²(x, y)
-    X = [ones(11) x]
-    model = X \ y
-    prediction = X * model
-    SS_res = sum((y .- prediction) .^ 2)
-    SS_tot = sum((y .- mean(y)) .^ 2)
-    return 1 - SS_res / SS_tot
-end
-
-# Code for section 3.5
-
-[]
-Dict()
-
-Float64[1, 2, 3]
-
-Dict{UInt8, Float64}(0 => 0, 1 => 1)
-
-UInt32(200)
-
-Real[1, 1.0, 0x3]
-
-v1 = Any[1, 2, 3]
-eltype(v1)
-v2 = Float64[1, 2, 3]
-eltype(v2)
-v3 = [1, 2, 3]
-eltype(v2)
-d1 = Dict()
-eltype(d1)
-d2 = Dict(1 => 2, 3 => 4)
-eltype(d2)
-
-p = 1 => 2
-typeof(p)
-
-# Code for section 3.5.1
-
-[1, 2, 3] isa AbstractVector{Int}
-[1, 2, 3] isa AbstractVector{Real}
-
-AbstractVector{<:Real}
-
-# Code for section 3.5.2
-
-using Statistics
-function ourcov(x::AbstractVector{<:Real},
-                y::AbstractVector{<:Real})
-    len = length(x)
-    @assert len == length(y) > 0
-    return sum((x .- mean(x)) .* (y .- mean(y))) / (len - 1)
-end
-
-ourcov(1:4, [1.0, 3.0, 2.0, 4.0])
-cov(1:4, [1.0, 3.0, 2.0, 4.0])
-
-ourcov(1:4, Any[1.0, 3.0, 2.0, 4.0])
-
-x = Any[1, 2, 3]
-identity.(x)
-y = Any[1, 2.0]
-identity.(y)
+@edit winsor(x, count=10^5)