audhiaprilliant · December 11, 2021 17:27
diff --git a/genetic_algorithm_kmeans.ipynb b/genetic_algorithm_kmeans.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "suburban-replacement",
   "metadata": {},
   "source": [
    "# Genetic Algorithm based on Clustering Analysis"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "adult-hormone",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "funky-birmingham",
   "metadata": {},
   "source": [
    "## Import packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "operational-characteristic",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data manipulation\n",
    "import pandas as pd\n",
    "\n",
    "# Matrix calculation\n",
    "import numpy as np\n",
    "\n",
    "# Euclidian distance\n",
    "from scipy.spatial.distance import cdist\n",
    "\n",
    "# Genetic algorithm\n",
    "import pygad\n",
    "\n",
    "# Iris data set\n",
    "from sklearn import datasets\n",
    "\n",
    "# kmeans clustering\n",
    "from sklearn.cluster import KMeans"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "altered-impact",
   "metadata": {},
   "source": [
    "## Import data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "political-processing",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Iris data set\n",
    "df = datasets.load_iris()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "falling-cradle",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Features and target\n",
    "x, y = df['data'], df['target']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cognitive-valve",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[5.1, 3.5, 1.4, 0.2],\n",
       "       [4.9, 3. , 1.4, 0.2],\n",
       "       [4.7, 3.2, 1.3, 0.2],\n",
       "       [4.6, 3.1, 1.5, 0.2],\n",
       "       [5. , 3.6, 1.4, 0.2],\n",
       "       [5.4, 3.9, 1.7, 0.4],\n",
       "       [4.6, 3.4, 1.4, 0.3],\n",
       "       [5. , 3.4, 1.5, 0.2],\n",
       "       [4.4, 2.9, 1.4, 0.2],\n",
       "       [4.9, 3.1, 1.5, 0.1],\n",
       "       [5.4, 3.7, 1.5, 0.2],\n",
       "       [4.8, 3.4, 1.6, 0.2],\n",
       "       [4.8, 3. , 1.4, 0.1],\n",
       "       [4.3, 3. , 1.1, 0.1],\n",
       "       [5.8, 4. , 1.2, 0.2],\n",
       "       [5.7, 4.4, 1.5, 0.4],\n",
       "       [5.4, 3.9, 1.3, 0.4],\n",
       "       [5.1, 3.5, 1.4, 0.3],\n",
       "       [5.7, 3.8, 1.7, 0.3],\n",
       "       [5.1, 3.8, 1.5, 0.3],\n",
       "       [5.4, 3.4, 1.7, 0.2],\n",
       "       [5.1, 3.7, 1.5, 0.4],\n",
       "       [4.6, 3.6, 1. , 0.2],\n",
       "       [5.1, 3.3, 1.7, 0.5],\n",
       "       [4.8, 3.4, 1.9, 0.2],\n",
       "       [5. , 3. , 1.6, 0.2],\n",
       "       [5. , 3.4, 1.6, 0.4],\n",
       "       [5.2, 3.5, 1.5, 0.2],\n",
       "       [5.2, 3.4, 1.4, 0.2],\n",
       "       [4.7, 3.2, 1.6, 0.2],\n",
       "       [4.8, 3.1, 1.6, 0.2],\n",
       "       [5.4, 3.4, 1.5, 0.4],\n",
       "       [5.2, 4.1, 1.5, 0.1],\n",
       "       [5.5, 4.2, 1.4, 0.2],\n",
       "       [4.9, 3.1, 1.5, 0.2],\n",
       "       [5. , 3.2, 1.2, 0.2],\n",
       "       [5.5, 3.5, 1.3, 0.2],\n",
       "       [4.9, 3.6, 1.4, 0.1],\n",
       "       [4.4, 3. , 1.3, 0.2],\n",
       "       [5.1, 3.4, 1.5, 0.2],\n",
       "       [5. , 3.5, 1.3, 0.3],\n",
       "       [4.5, 2.3, 1.3, 0.3],\n",
       "       [4.4, 3.2, 1.3, 0.2],\n",
       "       [5. , 3.5, 1.6, 0.6],\n",
       "       [5.1, 3.8, 1.9, 0.4],\n",
       "       [4.8, 3. , 1.4, 0.3],\n",
       "       [5.1, 3.8, 1.6, 0.2],\n",
       "       [4.6, 3.2, 1.4, 0.2],\n",
       "       [5.3, 3.7, 1.5, 0.2],\n",
       "       [5. , 3.3, 1.4, 0.2],\n",
       "       [7. , 3.2, 4.7, 1.4],\n",
       "       [6.4, 3.2, 4.5, 1.5],\n",
       "       [6.9, 3.1, 4.9, 1.5],\n",
       "       [5.5, 2.3, 4. , 1.3],\n",
       "       [6.5, 2.8, 4.6, 1.5],\n",
       "       [5.7, 2.8, 4.5, 1.3],\n",
       "       [6.3, 3.3, 4.7, 1.6],\n",
       "       [4.9, 2.4, 3.3, 1. ],\n",
       "       [6.6, 2.9, 4.6, 1.3],\n",
       "       [5.2, 2.7, 3.9, 1.4],\n",
       "       [5. , 2. , 3.5, 1. ],\n",
       "       [5.9, 3. , 4.2, 1.5],\n",
       "       [6. , 2.2, 4. , 1. ],\n",
       "       [6.1, 2.9, 4.7, 1.4],\n",
       "       [5.6, 2.9, 3.6, 1.3],\n",
       "       [6.7, 3.1, 4.4, 1.4],\n",
       "       [5.6, 3. , 4.5, 1.5],\n",
       "       [5.8, 2.7, 4.1, 1. ],\n",
       "       [6.2, 2.2, 4.5, 1.5],\n",
       "       [5.6, 2.5, 3.9, 1.1],\n",
       "       [5.9, 3.2, 4.8, 1.8],\n",
       "       [6.1, 2.8, 4. , 1.3],\n",
       "       [6.3, 2.5, 4.9, 1.5],\n",
       "       [6.1, 2.8, 4.7, 1.2],\n",
       "       [6.4, 2.9, 4.3, 1.3],\n",
       "       [6.6, 3. , 4.4, 1.4],\n",
       "       [6.8, 2.8, 4.8, 1.4],\n",
       "       [6.7, 3. , 5. , 1.7],\n",
       "       [6. , 2.9, 4.5, 1.5],\n",
       "       [5.7, 2.6, 3.5, 1. ],\n",
       "       [5.5, 2.4, 3.8, 1.1],\n",
       "       [5.5, 2.4, 3.7, 1. ],\n",
       "       [5.8, 2.7, 3.9, 1.2],\n",
       "       [6. , 2.7, 5.1, 1.6],\n",
       "       [5.4, 3. , 4.5, 1.5],\n",
       "       [6. , 3.4, 4.5, 1.6],\n",
       "       [6.7, 3.1, 4.7, 1.5],\n",
       "       [6.3, 2.3, 4.4, 1.3],\n",
       "       [5.6, 3. , 4.1, 1.3],\n",
       "       [5.5, 2.5, 4. , 1.3],\n",
       "       [5.5, 2.6, 4.4, 1.2],\n",
       "       [6.1, 3. , 4.6, 1.4],\n",
       "       [5.8, 2.6, 4. , 1.2],\n",
       "       [5. , 2.3, 3.3, 1. ],\n",
       "       [5.6, 2.7, 4.2, 1.3],\n",
       "       [5.7, 3. , 4.2, 1.2],\n",
       "       [5.7, 2.9, 4.2, 1.3],\n",
       "       [6.2, 2.9, 4.3, 1.3],\n",
       "       [5.1, 2.5, 3. , 1.1],\n",
       "       [5.7, 2.8, 4.1, 1.3],\n",
       "       [6.3, 3.3, 6. , 2.5],\n",
       "       [5.8, 2.7, 5.1, 1.9],\n",
       "       [7.1, 3. , 5.9, 2.1],\n",
       "       [6.3, 2.9, 5.6, 1.8],\n",
       "       [6.5, 3. , 5.8, 2.2],\n",
       "       [7.6, 3. , 6.6, 2.1],\n",
       "       [4.9, 2.5, 4.5, 1.7],\n",
       "       [7.3, 2.9, 6.3, 1.8],\n",
       "       [6.7, 2.5, 5.8, 1.8],\n",
       "       [7.2, 3.6, 6.1, 2.5],\n",
       "       [6.5, 3.2, 5.1, 2. ],\n",
       "       [6.4, 2.7, 5.3, 1.9],\n",
       "       [6.8, 3. , 5.5, 2.1],\n",
       "       [5.7, 2.5, 5. , 2. ],\n",
       "       [5.8, 2.8, 5.1, 2.4],\n",
       "       [6.4, 3.2, 5.3, 2.3],\n",
       "       [6.5, 3. , 5.5, 1.8],\n",
       "       [7.7, 3.8, 6.7, 2.2],\n",
       "       [7.7, 2.6, 6.9, 2.3],\n",
       "       [6. , 2.2, 5. , 1.5],\n",
       "       [6.9, 3.2, 5.7, 2.3],\n",
       "       [5.6, 2.8, 4.9, 2. ],\n",
       "       [7.7, 2.8, 6.7, 2. ],\n",
       "       [6.3, 2.7, 4.9, 1.8],\n",
       "       [6.7, 3.3, 5.7, 2.1],\n",
       "       [7.2, 3.2, 6. , 1.8],\n",
       "       [6.2, 2.8, 4.8, 1.8],\n",
       "       [6.1, 3. , 4.9, 1.8],\n",
       "       [6.4, 2.8, 5.6, 2.1],\n",
       "       [7.2, 3. , 5.8, 1.6],\n",
       "       [7.4, 2.8, 6.1, 1.9],\n",
       "       [7.9, 3.8, 6.4, 2. ],\n",
       "       [6.4, 2.8, 5.6, 2.2],\n",
       "       [6.3, 2.8, 5.1, 1.5],\n",
       "       [6.1, 2.6, 5.6, 1.4],\n",
       "       [7.7, 3. , 6.1, 2.3],\n",
       "       [6.3, 3.4, 5.6, 2.4],\n",
       "       [6.4, 3.1, 5.5, 1.8],\n",
       "       [6. , 3. , 4.8, 1.8],\n",
       "       [6.9, 3.1, 5.4, 2.1],\n",
       "       [6.7, 3.1, 5.6, 2.4],\n",
       "       [6.9, 3.1, 5.1, 2.3],\n",
       "       [5.8, 2.7, 5.1, 1.9],\n",
       "       [6.8, 3.2, 5.9, 2.3],\n",
       "       [6.7, 3.3, 5.7, 2.5],\n",
       "       [6.7, 3. , 5.2, 2.3],\n",
       "       [6.3, 2.5, 5. , 1.9],\n",
       "       [6.5, 3. , 5.2, 2. ],\n",
       "       [6.2, 3.4, 5.4, 2.3],\n",
       "       [5.9, 3. , 5.1, 1.8]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Preview the data\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "fleet-dietary",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Minimum value: 0.1\n",
      "Maximum value: 7.9\n"
     ]
    }
   ],
   "source": [
    "# Minimum and maximum values\n",
    "print('Minimum value: {}'.format(x.min()))\n",
    "print('Maximum value: {}'.format(x.max()))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "polish-cherry",
   "metadata": {},
   "source": [
    "## Create k-means clustering function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "latest-excellence",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Number of clusters\n",
    "k = 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "mathematical-closure",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Objective function\n",
    "def kmeans(solution, solutionIndex):\n",
    "    # Global vars\n",
    "    global x, k\n",
    "    \n",
    "    # Centroids\n",
    "    centroids = np.array(solution).reshape(k, len(solution) // k)\n",
    "    # Finding the distance between centroids and all data points\n",
    "    distances = cdist(x, centroids, 'euclidean')\n",
    "    # Centroid with the minimum Distance\n",
    "    points = np.array([np.argmin(i) for i in distances])\n",
    "    \n",
    "    # Calculate the sum square error\n",
    "    error = []\n",
    "    for idx in np.unique(points):\n",
    "        temp_cent = x[points == idx]\n",
    "        distances = cdist(temp_cent, centroids[idx].reshape(1, -1), 'euclidean')\n",
    "        error.append(distances.sum())\n",
    "    # Return result\n",
    "    return np.sum(error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "young-dream",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fitness function\n",
    "def fitnessFunction(solution, solutionIndex):\n",
    "    outputExpected = kmeans(solution, solutionIndex)\n",
    "    fitnessValue = 1 / (outputExpected + 0.000001)\n",
    "    return fitnessValue"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "endless-acrylic",
   "metadata": {},
   "source": [
    "## Run the Genetic Algorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "necessary-tyler",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Prepare the parameters\n",
    "\n",
    "# Generation\n",
    "numberGeneration = 1000        # Number of generation\n",
    "numberParentsMating = 5\n",
    "solutionPerPopulation = 50     # Number of chromosomes in each generation\n",
    "parents = -1\n",
    "\n",
    "# Genes\n",
    "numberGenes = k * x.shape[1]   # Number of genes in each chromosome\n",
    "geneType = float               # Data type in each gene\n",
    "\n",
    "# Range of values\n",
    "minValue = 0                   # Minimum value of solution\n",
    "maxValue = 8                   # Maximum value of solution\n",
    "\n",
    "# Selection\n",
    "selectionType = 'sss'          # Selection using steady state selection\n",
    "\n",
    "# Cross over\n",
    "crossoverType = 'single_point' # Cross over using single point method\n",
    "crossoverRate = 0.25           # Cross over rate (Pc)\n",
    "\n",
    "# Mutation rate\n",
    "# Mutation\n",
    "mutationType = 'random'        # Mutation using random method\n",
    "mutationReplacement = True     # Replace gene with random value\n",
    "mutationMin = 0\n",
    "mutationMax = 8\n",
    "mutationRate = 10              # Mutation rate (Pm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "general-concentration",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set the parameters into Genetic Algorithm function\n",
    "geneticAlgorithm = pygad.GA(\n",
    "    # Number of generation\n",
    "    num_generations = numberGeneration,\n",
    "    \n",
    "    # Number of parents mating\n",
    "    num_parents_mating = numberParentsMating,\n",
    "    \n",
    "    # Number of gene each chromosome\n",
    "    num_genes = numberGenes,\n",
    "    \n",
    "    # Gene type\n",
    "    gene_type = geneType,\n",
    "    \n",
    "    # Fitness function\n",
    "    fitness_func = fitnessFunction,\n",
    "    \n",
    "    # Number solution per population\n",
    "    sol_per_pop = solutionPerPopulation,\n",
    "    \n",
    "    # Min and max value\n",
    "    init_range_low = minValue,\n",
    "    init_range_high = maxValue,\n",
    "    \n",
    "    # Selection\n",
    "    parent_selection_type = selectionType,\n",
    "    keep_parents = parents,\n",
    "    \n",
    "    # Cross over\n",
    "    crossover_type = crossoverType,\n",
    "    #crossover_probability = crossoverRate,\n",
    "    \n",
    "    # Mutation\n",
    "    mutation_type = mutationType,\n",
    "    mutation_by_replacement = mutationReplacement,\n",
    "    random_mutation_min_val = mutationMin,\n",
    "    random_mutation_max_val = mutationMax,\n",
    "    mutation_percent_genes = mutationRate,\n",
    "    \n",
    "    # Solutions\n",
    "    save_solutions = True,\n",
    "    save_best_solutions = True,\n",
    "    suppress_warnings = True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "delayed-continent",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Run the Genetic Algorithm\n",
    "geneticAlgorithm.run()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "oriented-reviewer",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Parameters of the best solution    : \n",
      "[[5.92514492 2.80725404 4.40248519 1.41205877]\n",
      " [6.73889534 3.068677   5.61225383 2.11112409]\n",
      " [5.01679467 3.414519   1.46429548 0.23415793]]\n",
      "Fitness value of the best solution : 0.01\n",
      "Index of the best solution         : 0\n"
     ]
    }
   ],
   "source": [
    "# Solution\n",
    "solution, solutionFitness, solutionIndex = geneticAlgorithm.best_solution()\n",
    "print('Parameters of the best solution    : \\n{solution}'.format(solution = solution.reshape(k, len(solution) // k)))\n",
    "print('Fitness value of the best solution : {solutionFitness}'.format(solutionFitness = round(solutionFitness, ndigits = 2)))\n",
    "print('Index of the best solution         : {solutionIndex}'.format(solutionIndex = solutionIndex))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bigger-inside",
   "metadata": {},
   "source": [
    "## Cluster accuracy"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "sixth-liberty",
   "metadata": {},
   "source": [
    "### Real labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "early-habitat",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[5.006, 3.428, 1.462, 0.246],\n",
       "       [5.936, 2.77 , 4.26 , 1.326],\n",
       "       [6.588, 2.974, 5.552, 2.026]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Array of data set\n",
    "array = np.column_stack([x, y])\n",
    "\n",
    "# Data frame\n",
    "df = pd.DataFrame(data = array,\n",
    "                  columns = [\n",
    "                      'Feature 1',\n",
    "                      'Feature 2',\n",
    "                      'Feature 3',\n",
    "                      'Feature 4',\n",
    "                      'Cluster'\n",
    "                  ]\n",
    ")\n",
    "\n",
    "# Centroids in data frame\n",
    "df_centroids = df.groupby(['Cluster']).mean().reset_index().rename(columns = {\n",
    "    'Feature 1': 'Centroid Feature 1',\n",
    "    'Feature 2': 'Centroid Feature 2',\n",
    "    'Feature 3': 'Centroid Feature 3',\n",
    "    'Feature 4': 'Centroid Feature 4'\n",
    "    }\n",
    ").astype({'Cluster': object})\n",
    "\n",
    "# Centroids in array\n",
    "del df_centroids['Cluster']\n",
    "np.array(df_centroids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "damaged-benefit",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Real cluster labels\n",
    "y"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "systematic-groove",
   "metadata": {},
   "source": [
    "### Genetic Algorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "located-eclipse",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Final centroids\n",
    "centroids = solution.reshape(k, len(solution) // k)\n",
    "# Finding the distance between centroids and all data points\n",
    "distances = cdist(x, centroids, 'euclidean')\n",
    "# Centroid with the minimum Distance\n",
    "points = np.array([np.argmin(i) for i in distances])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "norman-hampshire",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[5.92514492, 2.80725404, 4.40248519, 1.41205877],\n",
       "       [6.73889534, 3.068677  , 5.61225383, 2.11112409],\n",
       "       [5.01679467, 3.414519  , 1.46429548, 0.23415793]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Centroids in array\n",
    "solution.reshape(k, len(solution) // k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "regulated-connection",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,\n",
       "       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,\n",
       "       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0], dtype=int64)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Cluster labels\n",
    "points"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "adverse-intensity",
   "metadata": {},
   "source": [
    "### Sklearn kmeans++"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "mechanical-greene",
   "metadata": {},
   "outputs": [],
   "source": [
    "kmeansPlus = KMeans(\n",
    "        n_clusters = 3\n",
    "    )\n",
    "kmeansPlus.fit(x)\n",
    "centroid = kmeansPlus.cluster_centers_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "crazy-configuration",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[5.9016129 , 2.7483871 , 4.39354839, 1.43387097],\n",
       "       [5.006     , 3.428     , 1.462     , 0.246     ],\n",
       "       [6.85      , 3.07368421, 5.74210526, 2.07105263]])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Centroids in array\n",
    "centroid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "major-trustee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,\n",
       "       2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2,\n",
       "       2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Cluster labels\n",
    "kmeansPlus.labels_"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"id": "suburban-replacement",
	"metadata": {},
	"source": [
	"# Genetic Algorithm based on Clustering Analysis"
	]
	},
	{
	"cell_type": "markdown",
	"id": "adult-hormone",
	"metadata": {},
	"source": [
	"---"
	]
	},
	{
	"cell_type": "markdown",
	"id": "funky-birmingham",
	"metadata": {},
	"source": [
	"## Import packages"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"id": "operational-characteristic",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Data manipulation\n",
	"import pandas as pd\n",
	"\n",
	"# Matrix calculation\n",
	"import numpy as np\n",
	"\n",
	"# Euclidian distance\n",
	"from scipy.spatial.distance import cdist\n",
	"\n",
	"# Genetic algorithm\n",
	"import pygad\n",
	"\n",
	"# Iris data set\n",
	"from sklearn import datasets\n",
	"\n",
	"# kmeans clustering\n",
	"from sklearn.cluster import KMeans"
	]
	},
	{
	"cell_type": "markdown",
	"id": "altered-impact",
	"metadata": {},
	"source": [
	"## Import data"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"id": "political-processing",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Iris data set\n",
	"df = datasets.load_iris()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"id": "falling-cradle",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Features and target\n",
	"x, y = df['data'], df['target']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"id": "cognitive-valve",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[5.1, 3.5, 1.4, 0.2],\n",
	" [4.9, 3. , 1.4, 0.2],\n",
	" [4.7, 3.2, 1.3, 0.2],\n",
	" [4.6, 3.1, 1.5, 0.2],\n",
	" [5. , 3.6, 1.4, 0.2],\n",
	" [5.4, 3.9, 1.7, 0.4],\n",
	" [4.6, 3.4, 1.4, 0.3],\n",
	" [5. , 3.4, 1.5, 0.2],\n",
	" [4.4, 2.9, 1.4, 0.2],\n",
	" [4.9, 3.1, 1.5, 0.1],\n",
	" [5.4, 3.7, 1.5, 0.2],\n",
	" [4.8, 3.4, 1.6, 0.2],\n",
	" [4.8, 3. , 1.4, 0.1],\n",
	" [4.3, 3. , 1.1, 0.1],\n",
	" [5.8, 4. , 1.2, 0.2],\n",
	" [5.7, 4.4, 1.5, 0.4],\n",
	" [5.4, 3.9, 1.3, 0.4],\n",
	" [5.1, 3.5, 1.4, 0.3],\n",
	" [5.7, 3.8, 1.7, 0.3],\n",
	" [5.1, 3.8, 1.5, 0.3],\n",
	" [5.4, 3.4, 1.7, 0.2],\n",
	" [5.1, 3.7, 1.5, 0.4],\n",
	" [4.6, 3.6, 1. , 0.2],\n",
	" [5.1, 3.3, 1.7, 0.5],\n",
	" [4.8, 3.4, 1.9, 0.2],\n",
	" [5. , 3. , 1.6, 0.2],\n",
	" [5. , 3.4, 1.6, 0.4],\n",
	" [5.2, 3.5, 1.5, 0.2],\n",
	" [5.2, 3.4, 1.4, 0.2],\n",
	" [4.7, 3.2, 1.6, 0.2],\n",
	" [4.8, 3.1, 1.6, 0.2],\n",
	" [5.4, 3.4, 1.5, 0.4],\n",
	" [5.2, 4.1, 1.5, 0.1],\n",
	" [5.5, 4.2, 1.4, 0.2],\n",
	" [4.9, 3.1, 1.5, 0.2],\n",
	" [5. , 3.2, 1.2, 0.2],\n",
	" [5.5, 3.5, 1.3, 0.2],\n",
	" [4.9, 3.6, 1.4, 0.1],\n",
	" [4.4, 3. , 1.3, 0.2],\n",
	" [5.1, 3.4, 1.5, 0.2],\n",
	" [5. , 3.5, 1.3, 0.3],\n",
	" [4.5, 2.3, 1.3, 0.3],\n",
	" [4.4, 3.2, 1.3, 0.2],\n",
	" [5. , 3.5, 1.6, 0.6],\n",
	" [5.1, 3.8, 1.9, 0.4],\n",
	" [4.8, 3. , 1.4, 0.3],\n",
	" [5.1, 3.8, 1.6, 0.2],\n",
	" [4.6, 3.2, 1.4, 0.2],\n",
	" [5.3, 3.7, 1.5, 0.2],\n",
	" [5. , 3.3, 1.4, 0.2],\n",
	" [7. , 3.2, 4.7, 1.4],\n",
	" [6.4, 3.2, 4.5, 1.5],\n",
	" [6.9, 3.1, 4.9, 1.5],\n",
	" [5.5, 2.3, 4. , 1.3],\n",
	" [6.5, 2.8, 4.6, 1.5],\n",
	" [5.7, 2.8, 4.5, 1.3],\n",
	" [6.3, 3.3, 4.7, 1.6],\n",
	" [4.9, 2.4, 3.3, 1. ],\n",
	" [6.6, 2.9, 4.6, 1.3],\n",
	" [5.2, 2.7, 3.9, 1.4],\n",
	" [5. , 2. , 3.5, 1. ],\n",
	" [5.9, 3. , 4.2, 1.5],\n",
	" [6. , 2.2, 4. , 1. ],\n",
	" [6.1, 2.9, 4.7, 1.4],\n",
	" [5.6, 2.9, 3.6, 1.3],\n",
	" [6.7, 3.1, 4.4, 1.4],\n",
	" [5.6, 3. , 4.5, 1.5],\n",
	" [5.8, 2.7, 4.1, 1. ],\n",
	" [6.2, 2.2, 4.5, 1.5],\n",
	" [5.6, 2.5, 3.9, 1.1],\n",
	" [5.9, 3.2, 4.8, 1.8],\n",
	" [6.1, 2.8, 4. , 1.3],\n",
	" [6.3, 2.5, 4.9, 1.5],\n",
	" [6.1, 2.8, 4.7, 1.2],\n",
	" [6.4, 2.9, 4.3, 1.3],\n",
	" [6.6, 3. , 4.4, 1.4],\n",
	" [6.8, 2.8, 4.8, 1.4],\n",
	" [6.7, 3. , 5. , 1.7],\n",
	" [6. , 2.9, 4.5, 1.5],\n",
	" [5.7, 2.6, 3.5, 1. ],\n",
	" [5.5, 2.4, 3.8, 1.1],\n",
	" [5.5, 2.4, 3.7, 1. ],\n",
	" [5.8, 2.7, 3.9, 1.2],\n",
	" [6. , 2.7, 5.1, 1.6],\n",
	" [5.4, 3. , 4.5, 1.5],\n",
	" [6. , 3.4, 4.5, 1.6],\n",
	" [6.7, 3.1, 4.7, 1.5],\n",
	" [6.3, 2.3, 4.4, 1.3],\n",
	" [5.6, 3. , 4.1, 1.3],\n",
	" [5.5, 2.5, 4. , 1.3],\n",
	" [5.5, 2.6, 4.4, 1.2],\n",
	" [6.1, 3. , 4.6, 1.4],\n",
	" [5.8, 2.6, 4. , 1.2],\n",
	" [5. , 2.3, 3.3, 1. ],\n",
	" [5.6, 2.7, 4.2, 1.3],\n",
	" [5.7, 3. , 4.2, 1.2],\n",
	" [5.7, 2.9, 4.2, 1.3],\n",
	" [6.2, 2.9, 4.3, 1.3],\n",
	" [5.1, 2.5, 3. , 1.1],\n",
	" [5.7, 2.8, 4.1, 1.3],\n",
	" [6.3, 3.3, 6. , 2.5],\n",
	" [5.8, 2.7, 5.1, 1.9],\n",
	" [7.1, 3. , 5.9, 2.1],\n",
	" [6.3, 2.9, 5.6, 1.8],\n",
	" [6.5, 3. , 5.8, 2.2],\n",
	" [7.6, 3. , 6.6, 2.1],\n",
	" [4.9, 2.5, 4.5, 1.7],\n",
	" [7.3, 2.9, 6.3, 1.8],\n",
	" [6.7, 2.5, 5.8, 1.8],\n",
	" [7.2, 3.6, 6.1, 2.5],\n",
	" [6.5, 3.2, 5.1, 2. ],\n",
	" [6.4, 2.7, 5.3, 1.9],\n",
	" [6.8, 3. , 5.5, 2.1],\n",
	" [5.7, 2.5, 5. , 2. ],\n",
	" [5.8, 2.8, 5.1, 2.4],\n",
	" [6.4, 3.2, 5.3, 2.3],\n",
	" [6.5, 3. , 5.5, 1.8],\n",
	" [7.7, 3.8, 6.7, 2.2],\n",
	" [7.7, 2.6, 6.9, 2.3],\n",
	" [6. , 2.2, 5. , 1.5],\n",
	" [6.9, 3.2, 5.7, 2.3],\n",
	" [5.6, 2.8, 4.9, 2. ],\n",
	" [7.7, 2.8, 6.7, 2. ],\n",
	" [6.3, 2.7, 4.9, 1.8],\n",
	" [6.7, 3.3, 5.7, 2.1],\n",
	" [7.2, 3.2, 6. , 1.8],\n",
	" [6.2, 2.8, 4.8, 1.8],\n",
	" [6.1, 3. , 4.9, 1.8],\n",
	" [6.4, 2.8, 5.6, 2.1],\n",
	" [7.2, 3. , 5.8, 1.6],\n",
	" [7.4, 2.8, 6.1, 1.9],\n",
	" [7.9, 3.8, 6.4, 2. ],\n",
	" [6.4, 2.8, 5.6, 2.2],\n",
	" [6.3, 2.8, 5.1, 1.5],\n",
	" [6.1, 2.6, 5.6, 1.4],\n",
	" [7.7, 3. , 6.1, 2.3],\n",
	" [6.3, 3.4, 5.6, 2.4],\n",
	" [6.4, 3.1, 5.5, 1.8],\n",
	" [6. , 3. , 4.8, 1.8],\n",
	" [6.9, 3.1, 5.4, 2.1],\n",
	" [6.7, 3.1, 5.6, 2.4],\n",
	" [6.9, 3.1, 5.1, 2.3],\n",
	" [5.8, 2.7, 5.1, 1.9],\n",
	" [6.8, 3.2, 5.9, 2.3],\n",
	" [6.7, 3.3, 5.7, 2.5],\n",
	" [6.7, 3. , 5.2, 2.3],\n",
	" [6.3, 2.5, 5. , 1.9],\n",
	" [6.5, 3. , 5.2, 2. ],\n",
	" [6.2, 3.4, 5.4, 2.3],\n",
	" [5.9, 3. , 5.1, 1.8]])"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Preview the data\n",
	"x"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"id": "fleet-dietary",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Minimum value: 0.1\n",
	"Maximum value: 7.9\n"
	]
	}
	],
	"source": [
	"# Minimum and maximum values\n",
	"print('Minimum value: {}'.format(x.min()))\n",
	"print('Maximum value: {}'.format(x.max()))"
	]
	},
	{
	"cell_type": "markdown",
	"id": "polish-cherry",
	"metadata": {},
	"source": [
	"## Create k-means clustering function"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"id": "latest-excellence",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Number of clusters\n",
	"k = 3"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"id": "mathematical-closure",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Objective function\n",
	"def kmeans(solution, solutionIndex):\n",
	" # Global vars\n",
	" global x, k\n",
	" \n",
	" # Centroids\n",
	" centroids = np.array(solution).reshape(k, len(solution) // k)\n",
	" # Finding the distance between centroids and all data points\n",
	" distances = cdist(x, centroids, 'euclidean')\n",
	" # Centroid with the minimum Distance\n",
	" points = np.array([np.argmin(i) for i in distances])\n",
	" \n",
	" # Calculate the sum square error\n",
	" error = []\n",
	" for idx in np.unique(points):\n",
	" temp_cent = x[points == idx]\n",
	" distances = cdist(temp_cent, centroids[idx].reshape(1, -1), 'euclidean')\n",
	" error.append(distances.sum())\n",
	" # Return result\n",
	" return np.sum(error)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"id": "young-dream",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Fitness function\n",
	"def fitnessFunction(solution, solutionIndex):\n",
	" outputExpected = kmeans(solution, solutionIndex)\n",
	" fitnessValue = 1 / (outputExpected + 0.000001)\n",
	" return fitnessValue"
	]
	},
	{
	"cell_type": "markdown",
	"id": "endless-acrylic",
	"metadata": {},
	"source": [
	"## Run the Genetic Algorithm"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"id": "necessary-tyler",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Prepare the parameters\n",
	"\n",
	"# Generation\n",
	"numberGeneration = 1000 # Number of generation\n",
	"numberParentsMating = 5\n",
	"solutionPerPopulation = 50 # Number of chromosomes in each generation\n",
	"parents = -1\n",
	"\n",
	"# Genes\n",
	"numberGenes = k * x.shape[1] # Number of genes in each chromosome\n",
	"geneType = float # Data type in each gene\n",
	"\n",
	"# Range of values\n",
	"minValue = 0 # Minimum value of solution\n",
	"maxValue = 8 # Maximum value of solution\n",
	"\n",
	"# Selection\n",
	"selectionType = 'sss' # Selection using steady state selection\n",
	"\n",
	"# Cross over\n",
	"crossoverType = 'single_point' # Cross over using single point method\n",
	"crossoverRate = 0.25 # Cross over rate (Pc)\n",
	"\n",
	"# Mutation rate\n",
	"# Mutation\n",
	"mutationType = 'random' # Mutation using random method\n",
	"mutationReplacement = True # Replace gene with random value\n",
	"mutationMin = 0\n",
	"mutationMax = 8\n",
	"mutationRate = 10 # Mutation rate (Pm)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"id": "general-concentration",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Set the parameters into Genetic Algorithm function\n",
	"geneticAlgorithm = pygad.GA(\n",
	" # Number of generation\n",
	" num_generations = numberGeneration,\n",
	" \n",
	" # Number of parents mating\n",
	" num_parents_mating = numberParentsMating,\n",
	" \n",
	" # Number of gene each chromosome\n",
	" num_genes = numberGenes,\n",
	" \n",
	" # Gene type\n",
	" gene_type = geneType,\n",
	" \n",
	" # Fitness function\n",
	" fitness_func = fitnessFunction,\n",
	" \n",
	" # Number solution per population\n",
	" sol_per_pop = solutionPerPopulation,\n",
	" \n",
	" # Min and max value\n",
	" init_range_low = minValue,\n",
	" init_range_high = maxValue,\n",
	" \n",
	" # Selection\n",
	" parent_selection_type = selectionType,\n",
	" keep_parents = parents,\n",
	" \n",
	" # Cross over\n",
	" crossover_type = crossoverType,\n",
	" #crossover_probability = crossoverRate,\n",
	" \n",
	" # Mutation\n",
	" mutation_type = mutationType,\n",
	" mutation_by_replacement = mutationReplacement,\n",
	" random_mutation_min_val = mutationMin,\n",
	" random_mutation_max_val = mutationMax,\n",
	" mutation_percent_genes = mutationRate,\n",
	" \n",
	" # Solutions\n",
	" save_solutions = True,\n",
	" save_best_solutions = True,\n",
	" suppress_warnings = True\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"id": "delayed-continent",
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"# Run the Genetic Algorithm\n",
	"geneticAlgorithm.run()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"id": "oriented-reviewer",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Parameters of the best solution : \n",
	"[[5.92514492 2.80725404 4.40248519 1.41205877]\n",
	" [6.73889534 3.068677 5.61225383 2.11112409]\n",
	" [5.01679467 3.414519 1.46429548 0.23415793]]\n",
	"Fitness value of the best solution : 0.01\n",
	"Index of the best solution : 0\n"
	]
	}
	],
	"source": [
	"# Solution\n",
	"solution, solutionFitness, solutionIndex = geneticAlgorithm.best_solution()\n",
	"print('Parameters of the best solution : \\n{solution}'.format(solution = solution.reshape(k, len(solution) // k)))\n",
	"print('Fitness value of the best solution : {solutionFitness}'.format(solutionFitness = round(solutionFitness, ndigits = 2)))\n",
	"print('Index of the best solution : {solutionIndex}'.format(solutionIndex = solutionIndex))"
	]
	},
	{
	"cell_type": "markdown",
	"id": "bigger-inside",
	"metadata": {},
	"source": [
	"## Cluster accuracy"
	]
	},
	{
	"cell_type": "markdown",
	"id": "sixth-liberty",
	"metadata": {},
	"source": [
	"### Real labels"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"id": "early-habitat",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[5.006, 3.428, 1.462, 0.246],\n",
	" [5.936, 2.77 , 4.26 , 1.326],\n",
	" [6.588, 2.974, 5.552, 2.026]])"
	]
	},
	"execution_count": 13,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Array of data set\n",
	"array = np.column_stack([x, y])\n",
	"\n",
	"# Data frame\n",
	"df = pd.DataFrame(data = array,\n",
	" columns = [\n",
	" 'Feature 1',\n",
	" 'Feature 2',\n",
	" 'Feature 3',\n",
	" 'Feature 4',\n",
	" 'Cluster'\n",
	" ]\n",
	")\n",
	"\n",
	"# Centroids in data frame\n",
	"df_centroids = df.groupby(['Cluster']).mean().reset_index().rename(columns = {\n",
	" 'Feature 1': 'Centroid Feature 1',\n",
	" 'Feature 2': 'Centroid Feature 2',\n",
	" 'Feature 3': 'Centroid Feature 3',\n",
	" 'Feature 4': 'Centroid Feature 4'\n",
	" }\n",
	").astype({'Cluster': object})\n",
	"\n",
	"# Centroids in array\n",
	"del df_centroids['Cluster']\n",
	"np.array(df_centroids)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"id": "damaged-benefit",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
	" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
	" 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
	" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
	" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
	" 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
	" 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])"
	]
	},
	"execution_count": 14,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Real cluster labels\n",
	"y"
	]
	},
	{
	"cell_type": "markdown",
	"id": "systematic-groove",
	"metadata": {},
	"source": [
	"### Genetic Algorithm"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"id": "located-eclipse",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Final centroids\n",
	"centroids = solution.reshape(k, len(solution) // k)\n",
	"# Finding the distance between centroids and all data points\n",
	"distances = cdist(x, centroids, 'euclidean')\n",
	"# Centroid with the minimum Distance\n",
	"points = np.array([np.argmin(i) for i in distances])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"id": "norman-hampshire",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[5.92514492, 2.80725404, 4.40248519, 1.41205877],\n",
	" [6.73889534, 3.068677 , 5.61225383, 2.11112409],\n",
	" [5.01679467, 3.414519 , 1.46429548, 0.23415793]])"
	]
	},
	"execution_count": 16,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Centroids in array\n",
	"solution.reshape(k, len(solution) // k)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"id": "regulated-connection",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
	" 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
	" 2, 2, 2, 2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
	" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
	" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,\n",
	" 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1,\n",
	" 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0], dtype=int64)"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Cluster labels\n",
	"points"
	]
	},
	{
	"cell_type": "markdown",
	"id": "adverse-intensity",
	"metadata": {},
	"source": [
	"### Sklearn kmeans++"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"id": "mechanical-greene",
	"metadata": {},
	"outputs": [],
	"source": [
	"kmeansPlus = KMeans(\n",
	" n_clusters = 3\n",
	" )\n",
	"kmeansPlus.fit(x)\n",
	"centroid = kmeansPlus.cluster_centers_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"id": "crazy-configuration",
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([[5.9016129 , 2.7483871 , 4.39354839, 1.43387097],\n",
	" [5.006 , 3.428 , 1.462 , 0.246 ],\n",
	" [6.85 , 3.07368421, 5.74210526, 2.07105263]])"
	]
	},
	"execution_count": 19,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Centroids in array\n",
	"centroid"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"id": "major-trustee",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
	" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
	" 1, 1, 1, 1, 1, 1, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
	" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
	" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,\n",
	" 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2,\n",
	" 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0])"
	]
	},
	"execution_count": 20,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Cluster labels\n",
	"kmeansPlus.labels_"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}
No results found