diff --git a/ch04.jl b/ch04.jl new file mode 100644 index 0000000..16855a5 --- /dev/null +++ b/ch04.jl @@ -0,0 +1,224 @@ +# Bogumił Kamiński, 2022 + +# Codes for chapter 4 + +# Code for listing 4.1 + +import Downloads +Downloads.download("https://raw.githubusercontent.com/" * + "sidooms/MovieTweetings/" * + "44c525d0c766944910686c60697203cda39305d6/" * + "snapshots/10K/movies.dat", + "movies.dat") + +# Code for string interpolation examples + +x = 10 +"I have $x apples" + +"I have \$100." +"I have $100." + +# Code for multiline strings + +Downloads.download("https://raw.githubusercontent.com/\ + sidooms/MovieTweetings/\ + 44c525d0c766944910686c60697203cda39305d6/\ + snapshots/10K/movies.dat", + "movies.dat") + +"a\ + b\ + c" + +# Code for raw strings + +"C:\my_folder\my_file.txt" + +raw"C:\my_folder\my_file.txt" + +# Code for listing 4.2 + +movies = readlines("movies.dat") + +# Code for section 4.2 + +movie1 = first(movies) + +movie1_parts = split(movie1, "::") + +supertype(String) +supertype(SubString{String}) + +# Code for section 4.3 + +movie1_parts[2] + +rx = r"(.*) \((\d{4})\)$" + +m = match(rx, movie1_parts[2]) + +m[1] +m[2] + +parse(Int, m[2]) + +# Code for listing 4.3 + +function parseline(line::String) + parts = split(line, "::") + m = match(r"(.*) \((\d{4})\)", parts[2]) + return (id=parts[1], + name=m[1], + year=parse(Int, m[2]), + genres=split(parts[3], "|")) +end + +# Code for parsing one line of movies data + +record1 = parseline(movie1) + +# Code for listing 4.4 + +codeunits("a") +codeunits("ε") +codeunits("∀") + +# Codes for different patterns of string subsetting + +word = first(record1.name, 8) + +record1.name[1:8] + +for i in eachindex(word) + println(i, ": ", word[i]) +end + +codeunits("ô") + +codeunits("Fantômas") + +isascii("Hello world!") +isascii("∀ x: x≥0") + +word[1] +word[5] + +# Code for section 4.5 + +records = parseline.(movies) + +genres = String[] +for record in records + append!(genres, record.genres) +end +genres + +using FreqTables +table = freqtable(genres) +sort!(table) + +years = [record.year for record in records] +has_drama = ["Drama" in record.genres for record in records] +drama_prop = proptable(years, has_drama, margins=1) + +# Code for listing 4.5 + +using Plots + +plot(names(drama_prop, 1), drama_prop[:, 2], legend=false, + xlabel="year", ylabel="Drama probability") + +# Code for section 4.6.1 + +s1 = Symbol("x") +s2 = Symbol("hello world!") +s3 = Symbol("x", 1) + +typeof(s1) +typeof(s2) +typeof(s3) + +Symbol("1") + +:x +:x1 + +:hello world +:1 + +# Code for section 4.6.2 + +supertype(Symbol) + +:x == :x +:x == :y + +# Code for listing 4.6 + +using BenchmarkTools +str = string.("x", 1:10^6) +symb = Symbol.(str) +@benchmark "x" in $str +@benchmark :x in $symb + +# Code for section 4.7 + +using InlineStrings +s1 = InlineString("x") +typeof(s1) +s2 = InlineString("∀") +typeof(s2) +sv = inlinestrings(["The", "quick", "brown", "fox", "jumps", + "over", "the", "lazy", "dog"]) + +# Code for listing 4.7 + +using Random +using BenchmarkTools +Random.seed!(1234); +s1 = [randstring(3) for i in 1:10^6] +s2 = inlinestrings(s1) + +# Code for analyzing properties of InlineStrings.jl + +Base.summarysize(s1) +Base.summarysize(s2) + +@benchmark sort($s1) +@benchmark sort($s2) + +# Code for listing 4.8 + +open("iris.txt", "w") do io + for i in 1:10^6 + println(io, "Iris setosa") + println(io, "Iris virginica") + println(io, "Iris versicolor") + end +end + +# Code for section 4.8.2 + +uncompressed = readlines("iris.txt") + +using PooledArrays +compressed = PooledArray(uncompressed) + +Base.summarysize(uncompressed) +Base.summarysize(compressed) + +# Code for section 4.8.3 + +compressed.invpool +compressed.pool + +compressed[10] +compressed.pool[compressed.refs[10]] + +Base.summarysize.(compressed.pool) + +v1 = string.("x", 1:10^6) +v2 = PooledArray(v1) +Base.summarysize(v1) +Base.summarysize(v2)