Created
December 11, 2021 17:27
-
-
Save audhiaprilliant/d547376ad866ffe01b436f777b74e474 to your computer and use it in GitHub Desktop.
Genetic Algorithm for clustering analysis Using kmeans
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "suburban-replacement", | |
| "metadata": {}, | |
| "source": [ | |
| "# Genetic Algorithm based on Clustering Analysis" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "adult-hormone", | |
| "metadata": {}, | |
| "source": [ | |
| "---" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "funky-birmingham", | |
| "metadata": {}, | |
| "source": [ | |
| "## Import packages" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "operational-characteristic", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Data manipulation\n", | |
| "import pandas as pd\n", | |
| "\n", | |
| "# Matrix calculation\n", | |
| "import numpy as np\n", | |
| "\n", | |
| "# Euclidian distance\n", | |
| "from scipy.spatial.distance import cdist\n", | |
| "\n", | |
| "# Genetic algorithm\n", | |
| "import pygad\n", | |
| "\n", | |
| "# Iris data set\n", | |
| "from sklearn import datasets\n", | |
| "\n", | |
| "# kmeans clustering\n", | |
| "from sklearn.cluster import KMeans" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "altered-impact", | |
| "metadata": {}, | |
| "source": [ | |
| "## Import data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "political-processing", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Iris data set\n", | |
| "df = datasets.load_iris()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "falling-cradle", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Features and target\n", | |
| "x, y = df['data'], df['target']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "cognitive-valve", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[5.1, 3.5, 1.4, 0.2],\n", | |
| " [4.9, 3. , 1.4, 0.2],\n", | |
| " [4.7, 3.2, 1.3, 0.2],\n", | |
| " [4.6, 3.1, 1.5, 0.2],\n", | |
| " [5. , 3.6, 1.4, 0.2],\n", | |
| " [5.4, 3.9, 1.7, 0.4],\n", | |
| " [4.6, 3.4, 1.4, 0.3],\n", | |
| " [5. , 3.4, 1.5, 0.2],\n", | |
| " [4.4, 2.9, 1.4, 0.2],\n", | |
| " [4.9, 3.1, 1.5, 0.1],\n", | |
| " [5.4, 3.7, 1.5, 0.2],\n", | |
| " [4.8, 3.4, 1.6, 0.2],\n", | |
| " [4.8, 3. , 1.4, 0.1],\n", | |
| " [4.3, 3. , 1.1, 0.1],\n", | |
| " [5.8, 4. , 1.2, 0.2],\n", | |
| " [5.7, 4.4, 1.5, 0.4],\n", | |
| " [5.4, 3.9, 1.3, 0.4],\n", | |
| " [5.1, 3.5, 1.4, 0.3],\n", | |
| " [5.7, 3.8, 1.7, 0.3],\n", | |
| " [5.1, 3.8, 1.5, 0.3],\n", | |
| " [5.4, 3.4, 1.7, 0.2],\n", | |
| " [5.1, 3.7, 1.5, 0.4],\n", | |
| " [4.6, 3.6, 1. , 0.2],\n", | |
| " [5.1, 3.3, 1.7, 0.5],\n", | |
| " [4.8, 3.4, 1.9, 0.2],\n", | |
| " [5. , 3. , 1.6, 0.2],\n", | |
| " [5. , 3.4, 1.6, 0.4],\n", | |
| " [5.2, 3.5, 1.5, 0.2],\n", | |
| " [5.2, 3.4, 1.4, 0.2],\n", | |
| " [4.7, 3.2, 1.6, 0.2],\n", | |
| " [4.8, 3.1, 1.6, 0.2],\n", | |
| " [5.4, 3.4, 1.5, 0.4],\n", | |
| " [5.2, 4.1, 1.5, 0.1],\n", | |
| " [5.5, 4.2, 1.4, 0.2],\n", | |
| " [4.9, 3.1, 1.5, 0.2],\n", | |
| " [5. , 3.2, 1.2, 0.2],\n", | |
| " [5.5, 3.5, 1.3, 0.2],\n", | |
| " [4.9, 3.6, 1.4, 0.1],\n", | |
| " [4.4, 3. , 1.3, 0.2],\n", | |
| " [5.1, 3.4, 1.5, 0.2],\n", | |
| " [5. , 3.5, 1.3, 0.3],\n", | |
| " [4.5, 2.3, 1.3, 0.3],\n", | |
| " [4.4, 3.2, 1.3, 0.2],\n", | |
| " [5. , 3.5, 1.6, 0.6],\n", | |
| " [5.1, 3.8, 1.9, 0.4],\n", | |
| " [4.8, 3. , 1.4, 0.3],\n", | |
| " [5.1, 3.8, 1.6, 0.2],\n", | |
| " [4.6, 3.2, 1.4, 0.2],\n", | |
| " [5.3, 3.7, 1.5, 0.2],\n", | |
| " [5. , 3.3, 1.4, 0.2],\n", | |
| " [7. , 3.2, 4.7, 1.4],\n", | |
| " [6.4, 3.2, 4.5, 1.5],\n", | |
| " [6.9, 3.1, 4.9, 1.5],\n", | |
| " [5.5, 2.3, 4. , 1.3],\n", | |
| " [6.5, 2.8, 4.6, 1.5],\n", | |
| " [5.7, 2.8, 4.5, 1.3],\n", | |
| " [6.3, 3.3, 4.7, 1.6],\n", | |
| " [4.9, 2.4, 3.3, 1. ],\n", | |
| " [6.6, 2.9, 4.6, 1.3],\n", | |
| " [5.2, 2.7, 3.9, 1.4],\n", | |
| " [5. , 2. , 3.5, 1. ],\n", | |
| " [5.9, 3. , 4.2, 1.5],\n", | |
| " [6. , 2.2, 4. , 1. ],\n", | |
| " [6.1, 2.9, 4.7, 1.4],\n", | |
| " [5.6, 2.9, 3.6, 1.3],\n", | |
| " [6.7, 3.1, 4.4, 1.4],\n", | |
| " [5.6, 3. , 4.5, 1.5],\n", | |
| " [5.8, 2.7, 4.1, 1. ],\n", | |
| " [6.2, 2.2, 4.5, 1.5],\n", | |
| " [5.6, 2.5, 3.9, 1.1],\n", | |
| " [5.9, 3.2, 4.8, 1.8],\n", | |
| " [6.1, 2.8, 4. , 1.3],\n", | |
| " [6.3, 2.5, 4.9, 1.5],\n", | |
| " [6.1, 2.8, 4.7, 1.2],\n", | |
| " [6.4, 2.9, 4.3, 1.3],\n", | |
| " [6.6, 3. , 4.4, 1.4],\n", | |
| " [6.8, 2.8, 4.8, 1.4],\n", | |
| " [6.7, 3. , 5. , 1.7],\n", | |
| " [6. , 2.9, 4.5, 1.5],\n", | |
| " [5.7, 2.6, 3.5, 1. ],\n", | |
| " [5.5, 2.4, 3.8, 1.1],\n", | |
| " [5.5, 2.4, 3.7, 1. ],\n", | |
| " [5.8, 2.7, 3.9, 1.2],\n", | |
| " [6. , 2.7, 5.1, 1.6],\n", | |
| " [5.4, 3. , 4.5, 1.5],\n", | |
| " [6. , 3.4, 4.5, 1.6],\n", | |
| " [6.7, 3.1, 4.7, 1.5],\n", | |
| " [6.3, 2.3, 4.4, 1.3],\n", | |
| " [5.6, 3. , 4.1, 1.3],\n", | |
| " [5.5, 2.5, 4. , 1.3],\n", | |
| " [5.5, 2.6, 4.4, 1.2],\n", | |
| " [6.1, 3. , 4.6, 1.4],\n", | |
| " [5.8, 2.6, 4. , 1.2],\n", | |
| " [5. , 2.3, 3.3, 1. ],\n", | |
| " [5.6, 2.7, 4.2, 1.3],\n", | |
| " [5.7, 3. , 4.2, 1.2],\n", | |
| " [5.7, 2.9, 4.2, 1.3],\n", | |
| " [6.2, 2.9, 4.3, 1.3],\n", | |
| " [5.1, 2.5, 3. , 1.1],\n", | |
| " [5.7, 2.8, 4.1, 1.3],\n", | |
| " [6.3, 3.3, 6. , 2.5],\n", | |
| " [5.8, 2.7, 5.1, 1.9],\n", | |
| " [7.1, 3. , 5.9, 2.1],\n", | |
| " [6.3, 2.9, 5.6, 1.8],\n", | |
| " [6.5, 3. , 5.8, 2.2],\n", | |
| " [7.6, 3. , 6.6, 2.1],\n", | |
| " [4.9, 2.5, 4.5, 1.7],\n", | |
| " [7.3, 2.9, 6.3, 1.8],\n", | |
| " [6.7, 2.5, 5.8, 1.8],\n", | |
| " [7.2, 3.6, 6.1, 2.5],\n", | |
| " [6.5, 3.2, 5.1, 2. ],\n", | |
| " [6.4, 2.7, 5.3, 1.9],\n", | |
| " [6.8, 3. , 5.5, 2.1],\n", | |
| " [5.7, 2.5, 5. , 2. ],\n", | |
| " [5.8, 2.8, 5.1, 2.4],\n", | |
| " [6.4, 3.2, 5.3, 2.3],\n", | |
| " [6.5, 3. , 5.5, 1.8],\n", | |
| " [7.7, 3.8, 6.7, 2.2],\n", | |
| " [7.7, 2.6, 6.9, 2.3],\n", | |
| " [6. , 2.2, 5. , 1.5],\n", | |
| " [6.9, 3.2, 5.7, 2.3],\n", | |
| " [5.6, 2.8, 4.9, 2. ],\n", | |
| " [7.7, 2.8, 6.7, 2. ],\n", | |
| " [6.3, 2.7, 4.9, 1.8],\n", | |
| " [6.7, 3.3, 5.7, 2.1],\n", | |
| " [7.2, 3.2, 6. , 1.8],\n", | |
| " [6.2, 2.8, 4.8, 1.8],\n", | |
| " [6.1, 3. , 4.9, 1.8],\n", | |
| " [6.4, 2.8, 5.6, 2.1],\n", | |
| " [7.2, 3. , 5.8, 1.6],\n", | |
| " [7.4, 2.8, 6.1, 1.9],\n", | |
| " [7.9, 3.8, 6.4, 2. ],\n", | |
| " [6.4, 2.8, 5.6, 2.2],\n", | |
| " [6.3, 2.8, 5.1, 1.5],\n", | |
| " [6.1, 2.6, 5.6, 1.4],\n", | |
| " [7.7, 3. , 6.1, 2.3],\n", | |
| " [6.3, 3.4, 5.6, 2.4],\n", | |
| " [6.4, 3.1, 5.5, 1.8],\n", | |
| " [6. , 3. , 4.8, 1.8],\n", | |
| " [6.9, 3.1, 5.4, 2.1],\n", | |
| " [6.7, 3.1, 5.6, 2.4],\n", | |
| " [6.9, 3.1, 5.1, 2.3],\n", | |
| " [5.8, 2.7, 5.1, 1.9],\n", | |
| " [6.8, 3.2, 5.9, 2.3],\n", | |
| " [6.7, 3.3, 5.7, 2.5],\n", | |
| " [6.7, 3. , 5.2, 2.3],\n", | |
| " [6.3, 2.5, 5. , 1.9],\n", | |
| " [6.5, 3. , 5.2, 2. ],\n", | |
| " [6.2, 3.4, 5.4, 2.3],\n", | |
| " [5.9, 3. , 5.1, 1.8]])" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Preview the data\n", | |
| "x" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "fleet-dietary", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Minimum value: 0.1\n", | |
| "Maximum value: 7.9\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Minimum and maximum values\n", | |
| "print('Minimum value: {}'.format(x.min()))\n", | |
| "print('Maximum value: {}'.format(x.max()))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "polish-cherry", | |
| "metadata": {}, | |
| "source": [ | |
| "## Create k-means clustering function" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "latest-excellence", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Number of clusters\n", | |
| "k = 3" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "mathematical-closure", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Objective function\n", | |
| "def kmeans(solution, solutionIndex):\n", | |
| " # Global vars\n", | |
| " global x, k\n", | |
| " \n", | |
| " # Centroids\n", | |
| " centroids = np.array(solution).reshape(k, len(solution) // k)\n", | |
| " # Finding the distance between centroids and all data points\n", | |
| " distances = cdist(x, centroids, 'euclidean')\n", | |
| " # Centroid with the minimum Distance\n", | |
| " points = np.array([np.argmin(i) for i in distances])\n", | |
| " \n", | |
| " # Calculate the sum square error\n", | |
| " error = []\n", | |
| " for idx in np.unique(points):\n", | |
| " temp_cent = x[points == idx]\n", | |
| " distances = cdist(temp_cent, centroids[idx].reshape(1, -1), 'euclidean')\n", | |
| " error.append(distances.sum())\n", | |
| " # Return result\n", | |
| " return np.sum(error)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "id": "young-dream", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Fitness function\n", | |
| "def fitnessFunction(solution, solutionIndex):\n", | |
| " outputExpected = kmeans(solution, solutionIndex)\n", | |
| " fitnessValue = 1 / (outputExpected + 0.000001)\n", | |
| " return fitnessValue" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "endless-acrylic", | |
| "metadata": {}, | |
| "source": [ | |
| "## Run the Genetic Algorithm" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "necessary-tyler", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Prepare the parameters\n", | |
| "\n", | |
| "# Generation\n", | |
| "numberGeneration = 1000 # Number of generation\n", | |
| "numberParentsMating = 5\n", | |
| "solutionPerPopulation = 50 # Number of chromosomes in each generation\n", | |
| "parents = -1\n", | |
| "\n", | |
| "# Genes\n", | |
| "numberGenes = k * x.shape[1] # Number of genes in each chromosome\n", | |
| "geneType = float # Data type in each gene\n", | |
| "\n", | |
| "# Range of values\n", | |
| "minValue = 0 # Minimum value of solution\n", | |
| "maxValue = 8 # Maximum value of solution\n", | |
| "\n", | |
| "# Selection\n", | |
| "selectionType = 'sss' # Selection using steady state selection\n", | |
| "\n", | |
| "# Cross over\n", | |
| "crossoverType = 'single_point' # Cross over using single point method\n", | |
| "crossoverRate = 0.25 # Cross over rate (Pc)\n", | |
| "\n", | |
| "# Mutation rate\n", | |
| "# Mutation\n", | |
| "mutationType = 'random' # Mutation using random method\n", | |
| "mutationReplacement = True # Replace gene with random value\n", | |
| "mutationMin = 0\n", | |
| "mutationMax = 8\n", | |
| "mutationRate = 10 # Mutation rate (Pm)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "id": "general-concentration", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Set the parameters into Genetic Algorithm function\n", | |
| "geneticAlgorithm = pygad.GA(\n", | |
| " # Number of generation\n", | |
| " num_generations = numberGeneration,\n", | |
| " \n", | |
| " # Number of parents mating\n", | |
| " num_parents_mating = numberParentsMating,\n", | |
| " \n", | |
| " # Number of gene each chromosome\n", | |
| " num_genes = numberGenes,\n", | |
| " \n", | |
| " # Gene type\n", | |
| " gene_type = geneType,\n", | |
| " \n", | |
| " # Fitness function\n", | |
| " fitness_func = fitnessFunction,\n", | |
| " \n", | |
| " # Number solution per population\n", | |
| " sol_per_pop = solutionPerPopulation,\n", | |
| " \n", | |
| " # Min and max value\n", | |
| " init_range_low = minValue,\n", | |
| " init_range_high = maxValue,\n", | |
| " \n", | |
| " # Selection\n", | |
| " parent_selection_type = selectionType,\n", | |
| " keep_parents = parents,\n", | |
| " \n", | |
| " # Cross over\n", | |
| " crossover_type = crossoverType,\n", | |
| " #crossover_probability = crossoverRate,\n", | |
| " \n", | |
| " # Mutation\n", | |
| " mutation_type = mutationType,\n", | |
| " mutation_by_replacement = mutationReplacement,\n", | |
| " random_mutation_min_val = mutationMin,\n", | |
| " random_mutation_max_val = mutationMax,\n", | |
| " mutation_percent_genes = mutationRate,\n", | |
| " \n", | |
| " # Solutions\n", | |
| " save_solutions = True,\n", | |
| " save_best_solutions = True,\n", | |
| " suppress_warnings = True\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "id": "delayed-continent", | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Run the Genetic Algorithm\n", | |
| "geneticAlgorithm.run()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "id": "oriented-reviewer", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Parameters of the best solution : \n", | |
| "[[5.92514492 2.80725404 4.40248519 1.41205877]\n", | |
| " [6.73889534 3.068677 5.61225383 2.11112409]\n", | |
| " [5.01679467 3.414519 1.46429548 0.23415793]]\n", | |
| "Fitness value of the best solution : 0.01\n", | |
| "Index of the best solution : 0\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Solution\n", | |
| "solution, solutionFitness, solutionIndex = geneticAlgorithm.best_solution()\n", | |
| "print('Parameters of the best solution : \\n{solution}'.format(solution = solution.reshape(k, len(solution) // k)))\n", | |
| "print('Fitness value of the best solution : {solutionFitness}'.format(solutionFitness = round(solutionFitness, ndigits = 2)))\n", | |
| "print('Index of the best solution : {solutionIndex}'.format(solutionIndex = solutionIndex))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "bigger-inside", | |
| "metadata": {}, | |
| "source": [ | |
| "## Cluster accuracy" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "sixth-liberty", | |
| "metadata": {}, | |
| "source": [ | |
| "### Real labels" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "id": "early-habitat", | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[5.006, 3.428, 1.462, 0.246],\n", | |
| " [5.936, 2.77 , 4.26 , 1.326],\n", | |
| " [6.588, 2.974, 5.552, 2.026]])" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Array of data set\n", | |
| "array = np.column_stack([x, y])\n", | |
| "\n", | |
| "# Data frame\n", | |
| "df = pd.DataFrame(data = array,\n", | |
| " columns = [\n", | |
| " 'Feature 1',\n", | |
| " 'Feature 2',\n", | |
| " 'Feature 3',\n", | |
| " 'Feature 4',\n", | |
| " 'Cluster'\n", | |
| " ]\n", | |
| ")\n", | |
| "\n", | |
| "# Centroids in data frame\n", | |
| "df_centroids = df.groupby(['Cluster']).mean().reset_index().rename(columns = {\n", | |
| " 'Feature 1': 'Centroid Feature 1',\n", | |
| " 'Feature 2': 'Centroid Feature 2',\n", | |
| " 'Feature 3': 'Centroid Feature 3',\n", | |
| " 'Feature 4': 'Centroid Feature 4'\n", | |
| " }\n", | |
| ").astype({'Cluster': object})\n", | |
| "\n", | |
| "# Centroids in array\n", | |
| "del df_centroids['Cluster']\n", | |
| "np.array(df_centroids)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "id": "damaged-benefit", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
| " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
| " 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | |
| " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | |
| " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", | |
| " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", | |
| " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])" | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Real cluster labels\n", | |
| "y" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "systematic-groove", | |
| "metadata": {}, | |
| "source": [ | |
| "### Genetic Algorithm" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "id": "located-eclipse", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Final centroids\n", | |
| "centroids = solution.reshape(k, len(solution) // k)\n", | |
| "# Finding the distance between centroids and all data points\n", | |
| "distances = cdist(x, centroids, 'euclidean')\n", | |
| "# Centroid with the minimum Distance\n", | |
| "points = np.array([np.argmin(i) for i in distances])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "id": "norman-hampshire", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[5.92514492, 2.80725404, 4.40248519, 1.41205877],\n", | |
| " [6.73889534, 3.068677 , 5.61225383, 2.11112409],\n", | |
| " [5.01679467, 3.414519 , 1.46429548, 0.23415793]])" | |
| ] | |
| }, | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Centroids in array\n", | |
| "solution.reshape(k, len(solution) // k)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "id": "regulated-connection", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", | |
| " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", | |
| " 2, 2, 2, 2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
| " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
| " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,\n", | |
| " 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,\n", | |
| " 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0], dtype=int64)" | |
| ] | |
| }, | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Cluster labels\n", | |
| "points" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "adverse-intensity", | |
| "metadata": {}, | |
| "source": [ | |
| "### Sklearn kmeans++" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "id": "mechanical-greene", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "kmeansPlus = KMeans(\n", | |
| " n_clusters = 3\n", | |
| " )\n", | |
| "kmeansPlus.fit(x)\n", | |
| "centroid = kmeansPlus.cluster_centers_" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "id": "crazy-configuration", | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([[5.9016129 , 2.7483871 , 4.39354839, 1.43387097],\n", | |
| " [5.006 , 3.428 , 1.462 , 0.246 ],\n", | |
| " [6.85 , 3.07368421, 5.74210526, 2.07105263]])" | |
| ] | |
| }, | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Centroids in array\n", | |
| "centroid" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "id": "major-trustee", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | |
| " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", | |
| " 1, 1, 1, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
| " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", | |
| " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,\n", | |
| " 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2,\n", | |
| " 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0])" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Cluster labels\n", | |
| "kmeansPlus.labels_" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.8.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment