228 lines
3.8 KiB
Julia
228 lines
3.8 KiB
Julia
# Bogumił Kamiński, 2022
|
|
|
|
# Codes for chapter 6
|
|
|
|
# Code for listing 6.1
|
|
|
|
import Downloads
|
|
Downloads.download("https://raw.githubusercontent.com/" *
|
|
"sidooms/MovieTweetings/" *
|
|
"44c525d0c766944910686c60697203cda39305d6/" *
|
|
"snapshots/10K/movies.dat",
|
|
"movies.dat")
|
|
|
|
# Code for string interpolation examples
|
|
|
|
x = 10
|
|
"I have $x apples"
|
|
|
|
"I have $(2 * x) apples"
|
|
|
|
"I have \$100."
|
|
"I have $100."
|
|
|
|
# Code for multiline strings
|
|
|
|
Downloads.download("https://raw.githubusercontent.com/\
|
|
sidooms/MovieTweetings/\
|
|
44c525d0c766944910686c60697203cda39305d6/\
|
|
snapshots/10K/movies.dat",
|
|
"movies.dat")
|
|
|
|
"a\
|
|
b\
|
|
c"
|
|
|
|
# Code for raw strings
|
|
|
|
"C:\my_folder\my_file.txt"
|
|
|
|
raw"C:\my_folder\my_file.txt"
|
|
|
|
print(raw"C:\my_folder\my_file.txt")
|
|
|
|
# Code for listing 6.2
|
|
|
|
movies = readlines("movies.dat")
|
|
|
|
# Code for section 6.2
|
|
|
|
movie1 = first(movies)
|
|
|
|
movie1_parts = split(movie1, "::")
|
|
|
|
supertype(String)
|
|
supertype(SubString{String})
|
|
|
|
# Code for section 6.3
|
|
|
|
movie1_parts[2]
|
|
|
|
rx = r"(.+) \((\d{4})\)$"
|
|
|
|
m = match(rx, movie1_parts[2])
|
|
|
|
m[1]
|
|
m[2]
|
|
|
|
parse(Int, m[2])
|
|
|
|
# Code for listing 6.3
|
|
|
|
function parseline(line::AbstractString)
|
|
parts = split(line, "::")
|
|
m = match(r"(.+) \((\d{4})\)", parts[2])
|
|
return (id=parts[1],
|
|
name=m[1],
|
|
year=parse(Int, m[2]),
|
|
genres=split(parts[3], "|"))
|
|
end
|
|
|
|
# Code for parsing one line of movies data
|
|
|
|
record1 = parseline(movie1)
|
|
|
|
# Code for listing 6.4
|
|
|
|
codeunits("a")
|
|
codeunits("ε")
|
|
codeunits("∀")
|
|
|
|
# Codes for different patterns of string subsetting
|
|
|
|
word = first(record1.name, 8)
|
|
|
|
record1.name[1:8]
|
|
|
|
for i in eachindex(word)
|
|
println(i, ": ", word[i])
|
|
end
|
|
|
|
codeunits("ô")
|
|
|
|
codeunits("Fantômas")
|
|
|
|
isascii("Hello world!")
|
|
isascii("∀ x: x≥0")
|
|
|
|
word[1]
|
|
word[5]
|
|
|
|
# Code for section 6.5
|
|
|
|
records = parseline.(movies)
|
|
|
|
genres = String[]
|
|
for record in records
|
|
append!(genres, record.genres)
|
|
end
|
|
genres
|
|
|
|
using FreqTables
|
|
table = freqtable(genres)
|
|
sort!(table)
|
|
|
|
years = [record.year for record in records]
|
|
has_drama = ["Drama" in record.genres for record in records]
|
|
drama_prop = proptable(years, has_drama; margins=1)
|
|
|
|
# Code for listing 6.5
|
|
|
|
using Plots
|
|
plot(names(drama_prop, 1), drama_prop[:, 2]; legend=false,
|
|
xlabel="year", ylabel="Drama probability")
|
|
|
|
# Code for section 6.6.1
|
|
|
|
s1 = Symbol("x")
|
|
s2 = Symbol("hello world!")
|
|
s3 = Symbol("x", 1)
|
|
|
|
typeof(s1)
|
|
typeof(s2)
|
|
typeof(s3)
|
|
|
|
Symbol("1")
|
|
|
|
:x
|
|
:x1
|
|
|
|
:hello world
|
|
:1
|
|
|
|
# Code for section 6.6.2
|
|
|
|
supertype(Symbol)
|
|
|
|
:x == :x
|
|
:x == :y
|
|
|
|
# Code for listing 6.6
|
|
|
|
using BenchmarkTools
|
|
str = string.("x", 1:10^6)
|
|
symb = Symbol.(str)
|
|
@btime "x" in $str;
|
|
@btime :x in $symb;
|
|
|
|
# Code for section 6.7
|
|
|
|
using InlineStrings
|
|
s1 = InlineString("x")
|
|
typeof(s1)
|
|
s2 = InlineString("∀")
|
|
typeof(s2)
|
|
sv = inlinestrings(["The", "quick", "brown", "fox", "jumps",
|
|
"over", "the", "lazy", "dog"])
|
|
|
|
# Code for listing 6.7
|
|
|
|
using Random
|
|
using BenchmarkTools
|
|
Random.seed!(1234);
|
|
s1 = [randstring(3) for i in 1:10^6]
|
|
s2 = inlinestrings(s1)
|
|
|
|
# Code for analyzing properties of InlineStrings.jl
|
|
|
|
Base.summarysize(s1)
|
|
Base.summarysize(s2)
|
|
|
|
@btime sort($s1);
|
|
@btime sort($s2);
|
|
|
|
# Code for listing 6.8
|
|
|
|
open("iris.txt", "w") do io
|
|
for i in 1:10^6
|
|
println(io, "Iris setosa")
|
|
println(io, "Iris virginica")
|
|
println(io, "Iris versicolor")
|
|
end
|
|
end
|
|
|
|
# Code for section 6.8.2
|
|
|
|
uncompressed = readlines("iris.txt")
|
|
|
|
using PooledArrays
|
|
compressed = PooledArray(uncompressed)
|
|
|
|
Base.summarysize(uncompressed)
|
|
Base.summarysize(compressed)
|
|
|
|
# Code for section 6.8.3
|
|
|
|
compressed.invpool
|
|
compressed.pool
|
|
|
|
compressed[10]
|
|
compressed.pool[compressed.refs[10]]
|
|
|
|
Base.summarysize.(compressed.pool)
|
|
|
|
v1 = string.("x", 1:10^6)
|
|
v2 = PooledArray(v1)
|
|
Base.summarysize(v1)
|
|
Base.summarysize(v2)
|