From f7d6f6126499ca6a56a0e7fca81520a85b7c418a Mon Sep 17 00:00:00 2001 From: tndoan Date: Wed, 9 Dec 2020 18:42:51 -0500 Subject: [PATCH] Chapter 10 --- Chapter_10.ipynb | 562 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 562 insertions(+) create mode 100644 Chapter_10.ipynb diff --git a/Chapter_10.ipynb b/Chapter_10.ipynb new file mode 100644 index 0000000..93aadd7 --- /dev/null +++ b/Chapter_10.ipynb @@ -0,0 +1,562 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "using CSV\n", + "using DataFrames\n", + "using Plots\n", + "using StatsPlots\n", + "using GLM\n", + "using Statistics\n", + "using Distributions\n", + "using Random\n", + "using MultivariateStats\n", + "using Clustering" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X = randn(2, 50)\n", + "\n", + "X[1, 1:25] = X[1, 1:25] .+ 3\n", + "X[2, 1:25] = X[2, 1:25] .- 4\n", + "scatter(X[1, :], X[2, :], legend=false)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "KmeansResult{Array{Float64,2},Float64,Int64}([0.11571504242176926 3.2282230448951474; 0.07818013976821715 -3.943287015604491], [2, 2, 2, 2, 2, 2, 2, 2, 2, 2 … 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [0.16006696751299643, 0.6952238933557737, 5.669535007076988, 1.223402034145309, 0.20838973073509948, 1.7585053507876864, 1.852990031298667, 0.9500953942469366, 2.644713516983856, 0.5686020455334386 … 10.04885271794735, 2.2964692093458687, 0.6863426778361831, 0.2597567234715719, 0.3032486718175304, 3.8931318026126087, 3.9402046638591504, 2.5515240630473897, 16.965344480984694, 3.3167364079110113], [25, 25], [25, 25], 92.46223926742567, 2, true)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = kmeans(X, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "centers = result.centers\n", + "\n", + "scatter(centers[1, :], centers[2, :], marker = (:hexagon, :red, stroke(3, 0.2, :black, :dot)), label=\"Centroid\")\n", + "scatter!(X[1, result.assignments .== 1], X[2, result.assignments .== 1], label=\"Cluster 1\", marker=(:yellow))\n", + "scatter!(X[1, result.assignments .== 2], X[2, result.assignments .== 2], label=\"Cluster 2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "KmeansResult{Array{Float64,2},Float64,Int64}([0.5110651841630944 3.2282230448951474 -1.4656855245435316; 0.2230092561915514 -3.943287015604491 -0.5011363259251198], [2, 2, 2, 2, 2, 2, 2, 2, 2, 2 … 3, 3, 1, 1, 1, 1, 1, 3, 1, 1], [0.16006696751299643, 0.6952238933557737, 5.669535007076988, 1.223402034145309, 0.20838973073509948, 1.7585053507876864, 1.852990031298667, 0.9500953942469366, 2.644713516983856, 0.5686020455334386 … 2.2721356434702695, 0.235137819577667, 0.556206536626782, 0.5890275720220514, 0.1203860200820549, 3.0047005422254394, 4.357682810107818, 1.0772573038248558, 14.024911424667273, 3.9374550907310217], [20, 25, 5], [20, 25, 5], 74.7345185135407, 4, true)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result = kmeans(X, 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "centers = result.centers\n", + "\n", + "scatter(centers[1, :], centers[2, :], marker = (:hexagon, :red, stroke(3, 0.2, :black, :dot)), label=\"Centroid\")\n", + "scatter!(X[1, result.assignments .== 1], X[2, result.assignments .== 1], label=\"Cluster 1\", marker=(:yellow))\n", + "scatter!(X[1, result.assignments .== 2], X[2, result.assignments .== 2], label=\"Cluster 2\", marker=(:green))\n", + "scatter!(X[1, result.assignments .== 3], X[2, result.assignments .== 3], label=\"Cluster 3\", marker=(:blue))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.5.1", + "language": "julia", + "name": "julia-1.5" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.5.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}