Compare commits

...

13 Commits

Author SHA1 Message Date
Bogumił Kamiński
08c75a6a76 Update errata.md 2025-03-31 20:34:00 +02:00
Bogumił Kamiński
8120d5287a Update errata.md 2024-01-16 23:29:33 +01:00
Olivier Benz
89c9667883 Dev Container refinements (#9) 2023-08-21 10:27:03 +02:00
Olivier Benz
01d1c623e5 Fix postStartCommand (#8) 2023-08-02 13:04:54 +02:00
Olivier Benz
931b138bb1 Add Dev Container Configuration Files (#7) 2023-07-23 17:35:21 +02:00
Bogumił Kamiński
4f7ab0ac1b Update exercises12.md 2023-05-06 21:45:45 +02:00
Bogumił Kamiński
ae83ee9d03 Update exercises12.md 2023-05-06 21:36:24 +02:00
Bogumił Kamiński
b710e84f7c move errata to a separate file 2023-03-04 17:25:42 +01:00
Bogumił Kamiński
6fcf4433e0 add errata.md 2023-03-04 17:22:54 +01:00
Bogumił Kamiński
93ace0f083 Update README.md 2023-03-04 10:10:47 +01:00
Bogumił Kamiński
3bc2ed1cb7 Update README.md 2023-03-03 19:42:48 +01:00
Bogumił Kamiński
4beea9b23b Update README.md 2023-03-03 19:41:37 +01:00
Bogumił Kamiński
be3e4de540 Update README.md 2023-03-03 19:35:41 +01:00
23 changed files with 558 additions and 112 deletions

View File

@@ -0,0 +1,64 @@
ARG BUILD_ON_IMAGE=glcr.b-data.ch/julia/base
ARG JULIA_VERSION=latest
FROM ${BUILD_ON_IMAGE}:${JULIA_VERSION} as files
ARG DEBIAN_FRONTEND=noninteractive
RUN mkdir /files
COPY julia-base/conf/user /files
COPY julia-base/scripts /files
## Ensure file modes are correct when using CI
## Otherwise set to 777 in the target image
RUN find /files -type d -exec chmod 755 {} \; \
&& find /files -type f -exec chmod 644 {} \; \
&& find /files/usr/local/bin -type f -exec chmod 755 {} \;
FROM ${BUILD_ON_IMAGE}:${JULIA_VERSION}
ARG DEBIAN_FRONTEND=noninteractive
## Update environment
ARG USE_ZSH_FOR_ROOT
ARG SET_LANG
ARG SET_TZ
ENV LANG=${SET_LANG:-$LANG} \
TZ=${SET_TZ:-$TZ} \
PARENT_IMAGE_BUILD_DATE=${BUILD_DATE}
## Change root's shell to ZSH
RUN if [ -n "$USE_ZSH_FOR_ROOT" ]; then \
chsh -s /bin/zsh; \
fi \
## Update timezone if needed
&& if [ "$TZ" != "Etc/UTC" ]; then \
echo "Setting TZ to $TZ"; \
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \
&& echo $TZ > /etc/timezone; \
fi \
## Add/Update locale if needed
&& if [ "$LANG" != "en_US.UTF-8" ]; then \
sed -i "s/# $LANG/$LANG/g" /etc/locale.gen; \
locale-gen; \
echo "Setting LANG to $LANG"; \
update-locale --reset LANG=$LANG; \
fi \
## Allow updating pre-installed Julia packages
## Make sure $JULIA_PATH/local/share/julia/registries/* is deleted
&& rm -rf "${JULIA_PATH}/local/share/julia/registries"/*
## Pip: Install to the Python user install directory (1) or not (0)
ARG PIP_USER=1
ENV PIP_USER=${PIP_USER}
## Copy files as late as possible to avoid cache busting
COPY --from=files /files /
## Reset environment variable BUILD_DATE
ARG BUILD_START
ENV BUILD_DATE=${BUILD_START}

View File

@@ -0,0 +1,72 @@
{
"name": "Julia base",
"build": {
"dockerfile": "./Julia.Dockerfile",
"context": ".",
"args": {
"BUILD_ON_IMAGE": "glcr.b-data.ch/julia/base",
"JULIA_VERSION": "1.7.3",
"USE_ZSH_FOR_ROOT": "unset-to-use-bash",
"SET_LANG": "en_US.UTF-8",
"SET_TZ": "Etc/UTC"
}
},
"postStartCommand": "postStartCommand.sh",
"features": {
"ghcr.io/devcontainers/features/common-utils:2": {
"configureZshAsDefaultShell": true,
"upgradePackages": false,
"username": "vscode",
"userUid": "automatic",
"userGid": "automatic"
},
"ghcr.io/devcontainers/features/docker-outside-of-docker:1": {
"moby": false
},
// A comma separated list of packages to install
"ghcr.io/rocker-org/devcontainer-features/apt-packages:1": {
"packages": ""
}
},
"customizations": {
"vscode": {
"extensions": [
"eamodio.gitlens@11.7.0",
"editorconfig.editorconfig",
"mhutchie.git-graph",
"ms-python.python",
"mutantdino.resourcemonitor",
"julialang.language-julia",
"dbaeumer.vscode-eslint",
"esbenp.prettier-vscode",
"ms-python.black-formatter",
"ms-azuretools.vscode-docker"
],
"settings": {
"gitlens.showWelcomeOnInstall": false,
"gitlens.showWhatsNewAfterUpgrades": false,
"julia.enableCrashReporter": false,
"julia.enableTelemetry": false,
"resmon.show.battery": false,
"resmon.show.cpufreq": false
}
}
},
"remoteUser": "vscode",
"mounts": [
"source=jfda-home-vscode,target=/home/vscode,type=volume"
],
// By default,
// - Julia starts up with a single thread of execution.
// - BLAS/OpenMP will use as many threads as possible.
// Set the following environment variables to control the exact number to use.
"remoteEnv": {
// "JULIA_NUM_THREADS": "1",
// "OMP_NUM_THREADS": "1"
}
}

View File

@@ -0,0 +1,45 @@
println("Executing user-specific startup file (", @__FILE__, ")...")
# https://github.com/julia-vscode/julia-vscode/issues/3304
project = (Base.JLOptions().project != C_NULL ?
unsafe_string(Base.JLOptions().project) :
get(ENV, "JULIA_PROJECT", nothing))
if !isnothing(project)
Pkg.activate(; io=devnull)
end
try
using Revise
println("Revise started")
catch e
@warn "Error initializing Revise" exception=(e, catch_backtrace())
end
if !isnothing(project) &&
# https://github.com/julia-vscode/julia-vscode/issues/3304
!startswith(Base.load_path_expand(Base.LOAD_PATH[end]), project)
if startswith(project, "@")
if startswith(project, "@.")
if isnothing(Base.current_project())
Pkg.activate(joinpath("$(ENV["HOME"])", ".julia",
"environments", "v$(VERSION.major).$(VERSION.minor)"))
else
Pkg.activate(Base.current_project(); io=devnull)
end
else
Pkg.activate(Base.load_path_expand(project); io=devnull)
end
else
Pkg.activate(abspath(expanduser(project)); io=devnull)
end
else
if isfile(joinpath(pwd(), "Project.toml")) &&
isfile(joinpath(pwd(), "Manifest.toml"))
Pkg.activate(pwd())
else
Pkg.activate(joinpath("$(ENV["HOME"])", ".julia", "environments",
"v$(VERSION.major).$(VERSION.minor)"))
end
end

View File

@@ -0,0 +1,3 @@
if !in(MIME("application/pdf"), IJulia.ijulia_mime_types)
IJulia.register_mime(MIME("application/pdf"))
end

View File

@@ -0,0 +1,17 @@
#!/usr/bin/env bash
# Copyright (c) 2023 b-data GmbH.
# Distributed under the terms of the MIT License.
set -e
mkdir -p "$HOME/.julia/config"
# Copy user-specific startup files if home directory is bind mounted
if [ ! -f "$HOME/.julia/config/startup_ijulia.jl" ]; then
cp -a /etc/skel/.julia/config/startup_ijulia.jl \
"$HOME/.julia/config"
fi
if [ ! -f "$HOME/.julia/config/startup.jl" ]; then
cp -a /etc/skel/.julia/config/startup.jl \
"$HOME/.julia/config"
fi

View File

@@ -0,0 +1,74 @@
{
"name": "Julia pubtools",
"build": {
"dockerfile": "../Julia.Dockerfile",
"context": "..",
"args": {
"BUILD_ON_IMAGE": "glcr.b-data.ch/julia/pubtools",
"JULIA_VERSION": "1.7.3",
"USE_ZSH_FOR_ROOT": "unset-to-use-bash",
"SET_LANG": "en_US.UTF-8",
"SET_TZ": "Etc/UTC"
}
},
"postStartCommand": "postStartCommand.sh",
"features": {
"ghcr.io/devcontainers/features/common-utils:2": {
"configureZshAsDefaultShell": true,
"upgradePackages": false,
"username": "vscode",
"userUid": "automatic",
"userGid": "automatic"
},
"ghcr.io/devcontainers/features/docker-outside-of-docker:1": {
"moby": false
},
// A comma separated list of packages to install
"ghcr.io/rocker-org/devcontainer-features/apt-packages:1": {
"packages": ""
}
},
"customizations": {
"vscode": {
"extensions": [
"eamodio.gitlens@11.7.0",
"editorconfig.editorconfig",
"mhutchie.git-graph",
"ms-python.python",
"mutantdino.resourcemonitor",
"julialang.language-julia",
"James-Yu.latex-workshop",
"quarto.quarto",
"dbaeumer.vscode-eslint",
"esbenp.prettier-vscode",
"ms-python.black-formatter",
"ms-azuretools.vscode-docker"
],
"settings": {
"gitlens.showWelcomeOnInstall": false,
"gitlens.showWhatsNewAfterUpgrades": false,
"julia.enableCrashReporter": false,
"julia.enableTelemetry": false,
"resmon.show.battery": false,
"resmon.show.cpufreq": false
}
}
},
"remoteUser": "vscode",
"mounts": [
"source=jfda-home-vscode,target=/home/vscode,type=volume"
],
// By default,
// - Julia starts up with a single thread of execution.
// - BLAS/OpenMP will use as many threads as possible.
// Set the following environment variables to control the exact number to use.
"remoteEnv": {
// "JULIA_NUM_THREADS": "1",
// "OMP_NUM_THREADS": "1"
}
}

122
README.md
View File

@@ -10,6 +10,7 @@ book that has been written by Bogumił Kamiński and has been published by [Mann
* [Setting up your environment](#setting-up-your-environment)
* [General instructions](#general-instructions)
* [Note for Linux users](#note-for-linux-users)
* [Dev Containers](#dev-containers)
* [Organization of the code](#organization-of-the-code)
* [Running the example codes](#running-the-example-codes)
* [Accompanying materials](#accompanying-materials)
@@ -91,6 +92,17 @@ export PATH="$PATH:/opt/julia-1.7.2/bin"
line (assuming you have downloaded Julia 1.7.2 and extracted it to `/opt` folder).
### Dev Containers
Folder `/.devcontainer` contains configuration files for
[Dev Containers](https://containers.dev).
* For use with Github Codespaces, please follow the instruction at
[Creating a codespace for a repository](https://docs.github.com/en/codespaces/developing-in-codespaces/creating-a-codespace-for-a-repository#creating-a-codespace-for-a-repository).
* For local/'remote host' usage with Visual Studio Code, please follow the
instructions at
[Developing inside a Container](https://code.visualstudio.com/docs/devcontainers/containers).
## Organization of the code
The codes for each chapter are stored in files named *chXX.jl*, where *XX* is
@@ -175,112 +187,4 @@ Kamiński, Bogumił. 2023. *Julia for Data Analysis*. Manning.
## Errata
### Chapter 1, section 1.2.1, page 7
I show the following example of code execution:
```
julia> function sum_n(n)
s = 0
for i in 1:n
s += i
end
return s
end
sum_n (generic function with 1 method)
julia> @time sum_n(1_000_000_000)
0.000001 seconds
500000000500000000
```
This timining is very fast (and the reason is explained in the book).
The issue is that this is the situation under Julia 1.7.
Under Julia 1.8 and Julia 1.9 running the same code takes longer (tested under Julia 1.9-beta4):
```
julia> @time sum_n(1_000_000_000)
2.265569 seconds
500000000500000000
```
The reason for this inconsistency is a bug in `@time` macro introduced in Julia 1.8 release.
The `sum_n(1_000_000_000)` call (without `@time`) is executed fast.
Here is a simplified benchmark (run under Julia 1.9-beta4):
```
julia> let
start = time_ns()
v = sum_n(1_000_000_000)
stop=time_ns()
v, Int(stop - start)
end
(500000000500000000, 1000)
```
Unfortunately there is an issue with the `@time`
macro used in global scope, that needs to be resolved in Base Julia.
See [this issue](https://github.com/JuliaLang/julia/issues/47561).
### Chapter 2, section 2.3.1, page 30
I compare the following expressions:
```
x > 0 && println(x)
```
and
```
if x > 0
println(x)
end
```
where `x = -7`.
I write there that Julia interprets them both in the same way.
It is true in terms of the fact that in both cases the `println` function is not called (and this is the focus point of the example).
However, there is a difference in the value of these expressions.
The first expression evaluates to `false`, while the second evaluates to `nothing`.
Here is how you can check it:
```
julia> x = -7
-7
julia> show(x > 0 && println(x))
false
julia> show(if x > 0
println(x)
end)
nothing
```
### Chapter 3, section 3.2.3, pages 58
* middle of page 58: `y[end - the + 1] = y[end -- k]` should be `y[end - i + 1] = y[end - k]`
### Chapter 3, section 3.2.3, pages 59
* top of page 59: `sort(v::AbstractVector; kwthe.)` should be `sort(v::AbstractVector; kws...)`
### Chapter 6, section 6.4.1, page 132
* middle of Listing 6.4: `codeunits("?")` should be `codeunits("ε")`
### Chapter 8, section 8.1.2, page 189
* middle of page 189: `zsdf format` should be `zstd format`
### Chapter 8, section 8.2.1, page 191
* bottom of page 191: `misssingstring` should be `missingstring`
### Chapter 10, section 10.2.2, page 255
* bottom of page 255: `? Error: Error adding value to column :b.` should be `┌ Error: Error adding value to column :b.`
You can find errata for the book in [this file](errata.md).

113
errata.md Normal file
View File

@@ -0,0 +1,113 @@
## Errata
This file contains errata for the
["Julia for Data Analysis"](https://www.manning.com/books/julia-for-data-analysis?utm_source=bkamins&utm_medium=affiliate&utm_campaign=book_kaminski2_julia_3_17_22)
book that has been written by Bogumił Kamiński and has been published by [Manning Publications Co.](https://www.manning.com/)
### Chapter 2, introduction, page 20
* middle of page 20: the provided link http://mng.bz/5mWD explaining *k-times winsorized mean* definition no longer works.
Use https://web.archive.org/web/20210804184830/https://v8doc.sas.com/sashtml/insight/chap38/sect17.htm provided by
[The Wayback Machine](https://web.archive.org/) instead.
### Chapter 2, section 2.3.1, page 30
I compare the following expressions:
```
x > 0 && println(x)
```
and
```
if x > 0
println(x)
end
```
where `x = -7`.
I write there that Julia interprets them both in the same way.
It is true in terms of the fact that in both cases the `println` function is not called (and this is the focus point of the example).
However, there is a difference in the value of these expressions.
The first expression evaluates to `false`, while the second evaluates to `nothing`.
Here is how you can check it:
```
julia> x = -7
-7
julia> show(x > 0 && println(x))
false
julia> show(if x > 0
println(x)
end)
nothing
```
### Chapter 2, section 2.5, page 45
* top of page 45: *use in this book):* should be *use in this book:*
### Chapter 3, section 3.2.3, page 58
* middle of page 58: `y[end - the + 1] = y[end -- k]` should be `y[end - i + 1] = y[end - k]`
### Chapter 3, section 3.2.3, page 59
* top of page 59: `sort(v::AbstractVector; kwthe.)` should be `sort(v::AbstractVector; kws...)`
### Chapter 6, section 6.4.1, page 132
* middle of Listing 6.4: `codeunits("?")` should be `codeunits("ε")`
### Chapter 8, section 8.1.2, page 189
* middle of page 189: `zsdf format` should be `zstd format`
### Chapter 8, section 8.2.1, page 191
* bottom of page 191: `misssingstring` should be `missingstring`
### Chapter 9, section 9.2.2, page 231
* top of page 191: `both ratings` should be `ratings`
### Chapter 10, section 10.2.2, page 255
* bottom of page 255: `? Error: Error adding value to column :b.` should be `┌ Error: Error adding value to column :b.`
### Chapter 12, section 12.1.4, page 302
* bottom of page 302:
```
julia> df = DataFrame(a=1:3, b=1:3, c=1:3)
3×3 DataFrame
Row │ a b c
│ Int64 Int64 Int64
???????????????????????????
1 │ 1 1 1
2 │ 2 2 2
3 │ 3 3 3
```
should be
```
julia> df = DataFrame(a=1:3, b=1:3, c=1:3)
3×3 DataFrame
Row │ a b c
│ Int64 Int64 Int64
─────┼─────────────────────
1 │ 1 1 1
2 │ 2 2 2
3 │ 3 3 3
```
### Chapter 12, section 12.3.2, page 318
* top of page 318: in the annotation to Figure 12.6 there is text *Applies a log1p* which looks like *Applies a loglp*
(this is a display issue due to the fact that in the font used letter `l` and digit `1` look identical)

View File

@@ -22,7 +22,7 @@ using Downloads
using SHA
Downloads.download("https://go.dev/dl/go1.19.2.src.tar.gz", "go.tar.gz")
shavec = open(sha256, "go.tar.gz")
shastr = join(string.(s; base=16, pad=2))
shastr = join(string.(shavec; base=16, pad=2))
sha == shastr
```
@@ -90,7 +90,7 @@ in `target_df`.
This is short, but you need to have a good understanding of Julia types
and standar functions to properly write it:
```
Symbol.(target_df.id) == keys(edges_json)
Symbol.(target_df.id) == collect(keys(edges_json))
```
</details>
@@ -115,7 +115,7 @@ function edgelist2graph(edgelist)
end
return g
end
target_df.egonet = edgelist2graph(values(edges_json))
target_df.egonet = edgelist2graph.(values(edges_json))
```
</details>

View File

@@ -28,6 +28,17 @@
"### Comparison of execution speed of Julia, Java, Python, and C"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "99c45eb6",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -35,6 +35,17 @@
"## Representing values"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2338b85b",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -34,6 +34,17 @@
"### A single function can have multiple methods"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1f7359c",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -63,6 +63,17 @@
"### Getting the data into a matrix"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5e16f874",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -34,6 +34,17 @@
"### Syntax and meaning of broadcasting in Julia"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1d9574f4",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -34,6 +34,17 @@
"### Downloading data from the web"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10d11474",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -34,6 +34,17 @@
"### Standard process of parsing JSON response"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a179627e",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -26,6 +26,17 @@
"## Fetching, unpacking, and inspecting the Lichess data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2ade007",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -42,6 +42,17 @@
"## Advanced data frame indexing"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "65b9f2cc",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -34,6 +34,17 @@
"## Review of most important ways of creating a data frame"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b94d80ed",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -26,6 +26,17 @@
"## Converting a data frame to other value types"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6408a31f",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -34,6 +34,17 @@
"### Fetching GitHub developer data from the web"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3a42231",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -34,6 +34,17 @@
"### Loading all required packages"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8c7f637",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,

View File

@@ -34,6 +34,17 @@
"### Calculation of asian option payoff"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da189a22",
"metadata": {},
"outputs": [],
"source": [
"using Pkg\n",
"Pkg.activate(Base.current_project())"
]
},
{
"cell_type": "code",
"execution_count": 1,