Files
ISLR.jl/Chapter_06.ipynb
2020-12-10 09:52:57 -05:00

42 KiB
Raw Blame History

In [1]:
using CSV
using DataFrames
using Plots
using StatsPlots
using GLM
using Statistics
using Distributions
using Random
using MultivariateStats
In [2]:
μ = [0, 0]
σ = [1.0 1.0; 1.0 2.0]
dist = Distributions.MvNormal(μ, σ)
Out[2]:
FullNormal(
dim: 2
μ: [0.0, 0.0]
Σ: [1.0 1.0; 1.0 2.0]
)
In [3]:
sample = rand(dist, 100) # the random samples are close to Figure 6.14
Out[3]:
2×100 Array{Float64,2}:
 1.90728   0.417492   0.514817   0.453653  …  0.991181  1.40246  0.892847
 0.710246  0.31368   -0.209396  -0.669405     1.54026   1.47478  1.03204
In [4]:
p = fit(PCA, sample)
Out[4]:
PCA(indim = 2, outdim = 2, principalratio = 1.0)
In [34]:
proj = projection(p) # get principle component
x = sample[1, :]
y = sample[2, :]

scatter(x, y)
first_pca = proj[:, 2] # principle components are sorted in descending order
second_pca = proj[:, 1]
plot!([-2,  2], [(2 * first_pca[1] / first_pca[2]), (-2 * first_pca[1] / first_pca[2])])
plot!([-2,  2], [(2 * second_pca[1] / second_pca[2]), (-2 * second_pca[1] / second_pca[2])])
Out[34]:
No description has been provided for this image
In [ ]: