JuliaForDataAnalysis/ch03.jl

147 lines
2.7 KiB
Julia
Raw Normal View History

2021-12-29 14:18:24 +01:00
# Bogumił Kamiński, 2021
# Codes for chapter 3
2022-04-19 13:51:42 +02:00
# Code for section 3.1
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
methods(cd)
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
sum isa Function
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
typeof(sum)
typeof(sum) == Function
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
supertype(typeof(sum))
2021-12-29 14:18:24 +01:00
2022-04-07 11:15:46 +02:00
function print_supertypes(T)
2022-02-08 20:58:33 +01:00
println(T)
2022-04-07 11:15:46 +02:00
T == Any || print_supertypes(supertype(T))
2022-02-08 20:58:33 +01:00
return nothing
2021-12-29 14:18:24 +01:00
end
2022-04-07 11:15:46 +02:00
print_supertypes(Int64)
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
function print_subtypes(T, indent_level=0)
println(" " ^ indent_level, T)
for S in subtypes(T)
print_subtypes(S, indent_level + 2)
end
return nothing
end
print_subtypes(Integer)
2021-12-29 14:18:24 +01:00
2022-04-07 11:15:46 +02:00
print_supertypes(typeof([1.0, 2.0, 3.0]))
print_supertypes(typeof(1:3))
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
AbstractVector
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
typejoin(typeof([1.0, 2.0, 3.0]), typeof(1:3))
2021-12-29 14:18:24 +01:00
2022-04-19 13:51:42 +02:00
# Code for section 3.2
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
fun(x) = println("unsupported type")
fun(x::Number) = println("a number was passed")
fun(x::Float64) = println("a Float64 value")
methods(fun)
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
fun("hello!")
fun(1)
fun(1.0)
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
bar(x, y) = "no numbers passed"
bar(x::Number, y) = "first argument is a number"
bar(x, y::Number) = "second argument is a number"
bar("hello", "world")
bar(1, "world")
bar("hello", 2)
bar(1, 2)
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
bar(x::Number, y::Number) = "both arguments are numbers"
bar(1, 2)
methods(bar)
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
function winsorized_mean(x::AbstractVector, k::Integer)
k >= 0 || throw(ArgumentError("k must be non-negative"))
length(x) > 2 * k || throw(ArgumentError("k is too large"))
y = sort!(collect(x))
for i in 1:k
y[i] = y[k + 1]
y[end - i + 1] = y[end - k]
2021-12-29 14:18:24 +01:00
end
2022-02-08 20:58:33 +01:00
return sum(y) / length(y)
2021-12-29 14:18:24 +01:00
end
2022-02-08 20:58:33 +01:00
winsorized_mean([8, 3, 1, 5, 7], 1)
winsorized_mean(1:10, 2)
winsorized_mean(1:10, "a")
winsorized_mean(10, 1)
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
winsorized_mean(1:10, -1)
winsorized_mean(1:10, 5)
2021-12-29 14:18:24 +01:00
2022-04-19 13:51:42 +02:00
# Code for section 3.3
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
module ExampleModule
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
function example()
println("Hello")
end
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
end # ExampleModule
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
import Statistics
x = [1, 2, 3]
mean(x)
Statistics.mean(x)
2021-12-29 14:18:24 +01:00
using Statistics
2022-02-08 20:58:33 +01:00
mean(x)
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
# start a fresh Julia session before running this code
mean = 1
2021-12-29 14:18:24 +01:00
using Statistics
2022-02-08 20:58:33 +01:00
mean
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
# start a fresh Julia session before running this code
using Statistics
mean([1, 2, 3])
mean = 1
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
# start a fresh Julia session before running this code
using Statistics
mean = 1
mean([1, 2, 3])
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
# start a fresh Julia session before running this code
using Statistics
using StatsBase
# - change to help mode by pressing `?` key
# - type "winsor" and press Enter
2022-02-08 20:58:33 +01:00
mean(winsor([8, 3, 1, 5, 7], count=1))
2021-12-29 14:18:24 +01:00
2022-04-19 13:51:42 +02:00
# Code for section 3.4
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
@time 1 + 2
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
@time(1 + 2)
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
@assert 1 == 2 "1 is not equal 2"
@assert(1 == 2, "1 is not equal 2")
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
@macroexpand @assert(1 == 2, "1 is not equal 2")
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
@macroexpand @time 1 + 2
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
# before running these codes
# define the winsorized_mean function using the code from section 3.1
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
using BenchmarkTools
x = rand(10^6);
@benchmark winsorized_mean($x, 10^5)
2022-06-25 09:51:47 +02:00
using Statistics
using StatsBase
2022-02-08 20:58:33 +01:00
@benchmark mean(winsor($x; count=10^5))
2021-12-29 14:18:24 +01:00
2022-02-08 20:58:33 +01:00
@edit winsor(x, count=10^5)