Last active
January 31, 2020 12:09
-
-
Save jeanpat/2b95e3ca23a983316dc0d3fa4c064c50 to your computer and use it in GitHub Desktop.
This jupyter notebook can be used to clean the groundtruth images from the following dataset: https://github.com/jeanpat/DeepFISH/blob/master/dataset/LowRes_13434_overlapping_pairs.h5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Cleaning the groundtruth images from their spurious pixels" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import os\n", | |
| "import h5py\n", | |
| "from matplotlib import pyplot as plt" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 179, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "from numpy import newaxis\n", | |
| "from skimage import morphology as mo\n", | |
| "from scipy.ndimage import distance_transform_bf as distance" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 132, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def distanceTransform(bIm):\n", | |
| " #from pythonvision.org\n", | |
| " dist = distance(bIm)\n", | |
| " dist = dist.max() - dist\n", | |
| " dist -= dist.min()\n", | |
| " dist = dist/float(dist.ptp()) * 255\n", | |
| " dist = dist.astype(np.uint8)\n", | |
| " return dist\n", | |
| "\n", | |
| "def clean_ground_truth(gd_lab, size = 2):\n", | |
| " \"\"\"Remove spurious pixels from badly labelled groundtruth image\n", | |
| " returns three binary images (One hot shot) first and a labelled image.\n", | |
| " \"\"\"\n", | |
| " mask = gd_lab > 0\n", | |
| " dmap = distanceTransform(mask)\n", | |
| " \n", | |
| " cleaned_lab1 = mo.binary_opening(gd_lab == 1, selem = mo.disk(size))\n", | |
| " cleaned_lab2 = mo.binary_opening(gd_lab == 2, selem = mo.disk(size))\n", | |
| " cleaned_lab3 = mo.binary_opening(gd_lab == 3, selem = mo.disk(size))\n", | |
| " \n", | |
| " seeds = cleaned_lab1+2*cleaned_lab2+3*cleaned_lab3\n", | |
| " seg = mo.watershed(dmap, markers = seeds, mask = 1*mask)\n", | |
| " chrom_lab1 = seg == 1\n", | |
| " chrom_lab2 = seg == 2\n", | |
| " overlapp = seg == 3\n", | |
| " \n", | |
| " return chrom_lab1, chrom_lab2, overlapp, seg\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Download the dataset from its repository at github\n", | |
| "https://github.com/jeanpat/DeepFISH/tree/master/dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "--2019-12-20 09:04:27-- https://github.com/jeanpat/DeepFISH/blob/master/dataset/LowRes_13434_overlapping_pairs.h5\n", | |
| "Résolution de github.com (github.com)… 140.82.118.4\n", | |
| "Connexion à github.com (github.com)|140.82.118.4|:443… connecté.\n", | |
| "requête HTTP transmise, en attente de la réponse… 200 OK\n", | |
| "Taille : non indiqué [text/html]\n", | |
| "Enregistre : «LowRes_13434_overlapping_pairs.h5»\n", | |
| "\n", | |
| "LowRes_13434_overla [ <=> ] 62,54K --.-KB/s ds 0,1s \n", | |
| "\n", | |
| "2019-12-20 09:04:28 (630 KB/s) - «LowRes_13434_overlapping_pairs.h5» enregistré [64037]\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!wget https://github.com/jeanpat/DeepFISH/blob/master/dataset/LowRes_13434_overlapping_pairs.h5 " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "dataset is a numpy array of shape: (13434, 94, 93, 2)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "filename = './LowRes_13434_overlapping_pairs.h5'\n", | |
| "h5f = h5py.File(filename,'r')\n", | |
| "pairs = h5f['dataset_1'][:]\n", | |
| "h5f.close()\n", | |
| "print('dataset is a numpy array of shape:', pairs.shape)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 146, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "N = 11508\n", | |
| "grey = pairs[N,:,:,0]\n", | |
| "g_truth = pairs[N,:,:,1]\n", | |
| "l1, l2, l3, seg = clean_ground_truth(g_truth, size = 1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 175, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "(94, 93, 2)\n", | |
| "(2, 94, 93, 2)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "test = np.dstack((grey, g_truth))\n", | |
| "print(test.shape)\n", | |
| "t2 = np.stack((test,test))\n", | |
| "print(t2.shape)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Let's compare the groundtruth image befor and after cleaning" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 165, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<matplotlib.image.AxesImage at 0x7fe0814c80f0>" | |
| ] | |
| }, | |
| "execution_count": 165, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAABG0AAAH8CAYAAABrbFKGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAfXUlEQVR4nO3dTY9bZxkG4Ns0JWk+GigNCh8SqCpIZYNA6oY1P4p/hAQ/gA1ICDbsq0qAKJA2Sds0LfluGpqaRXhi+4wnmZl4xs85vi4JTcfjsd8Jfm3pPvd5zmw+nwcAAACAXr627QUAAAAAsJfQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANnTrMnWezmUtNsdPm8/ls22tYx95k13Xcm/Yl5OZ8Pr+07UUM2Ztgb0JTa/fmoUKbJHnppZc2sxwYmcePH297CQAwJle2vQBgLXsTelq7N50eBQAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABo6te0FTMX58+eTJGfOnEmSzOfzJMm9e/eSJF9++WWS5KWXXkqSPHr06KSXCDtqvs/ts33uN7wd2DS7EgDgYDRtAAAAABrStDmCast85zvfeXrbj370oySLRs3169eTJLPZk+ODr7/+epLk4cOHSZLPPvssSXLnzp0TWDHsmv2O4x/kPo7tw3GwKwEADk/TBgAAAKAhTZtDqDk1p0+fTpK8/fbbT3/2i1/8Ikny17/+NUny1VdfJUkuXbqUJPnZz36WJPne976XJHn33XeTJL/+9a+Pe9kAAADACGnaAAAAADQktAEAAABoyOlRh1BDhWvY8EcfffT0Z++8806SxSW+L168mCR58OBBkuS1115LkvzqV79aud/t27eTJH/605+SLAYTnzr15P+ax48fJ1kMP04Wp14BQwcZdTr8jWePNp0ZgQov5PC78vm7za4EgIXnfdb6vBw3TRsAAACAhjRtjuDMmTNJkhs3bjy9rdov586dS5L88Ic/XPn+8uXLSZKrV68mWbR0qoHz5ptvJkk++eSTJMnnn3+eJPn000/3PH8NRK7mD3Bwz2vW7Hv/+eoxDPsPNuewu8nug96GR/3tWdisw7ZYNVTHTdMGAAAAoCFNm0OouTJnz55Nkpw/f/7pz6oxU42aN954I0nyjW98I0nyk5/8JMli1s3169eTJN/97neTJO+///7Kc/33v/9Nkrz66qtJkrt37z79WbVvNG5ge+bzvcc47EUA2GtdK8AnJsDBaNoAAAAANKRpcwB1RP3ll19OkrzyyitJFi2YJLl06VKS5Mc//vHK9zXTplo57733XpLFPJz79+8nSb71rW8lWbR5qmlTrZ567uX11O/WfetKU472w8LTQsyJ7Atn8cNBnOTOsCvh4My9gN6OckVGxk/TBgAAAKAhTZtDqKtGnTr15J9tudHy+uuvJ1nMsKk2TjVsHjx4kCS5du1akuSDDz5IsmjN1P0ePnyYJDl9+vTK4yyrNk5dYer27dtJklu3bq08psYNu+MZxx1OcB/sHXNjL7K7uhwNtPvg4LbZgtvGGmAsunymsh2aNgAAAAANado8w9e+9iTT+uqrr5IsGi7Vgqk5NMlivs3Xv/71JItWTs22qaZNzZ+pNsywtVO/V/Np1hmuY3gU/z//+c+B/0aYopXGS4tDdqYEQLdXv10JT7T7yARghaYNAAAAQEOaNs9QbZdqw1SzZXgVqXW/U1eN+vnPf54k+fjjj5Mk77//fpLkiy++SJJ8+eWXSRZtnmrgVMtn2MBZvk+to+5T66zHqsZNNXNgZzSbIVPNn9nMsX3oonahXcmu6/batyfhCXNsKJo2AAAAAA1p2jxDNVfqyk41r6ZuX26wVMOmrv5U3//rX/9KsrjCU820qTZMNW3q/vV9NW3queprsv9VrIaNmnqOR48eJVm0e1zJhukY1zGIvY2bYk8yHePalXsbN8Pbge3QuAF4QtMGAAAAoCFNm2eo5ko1W2qGTX29d+/e0/tWi6XUz/75z3+u/LwaNTWPZvj7dZWpavUM59cs/3e1ceprrbe+L3fv3k2S3Lp1K8miDaRxw3g951j+8uWjGr7O54Plm3XDFIytYTM03H12Jbui+9WjtODYVcuv9bF/xvJiNG0AAAAAGtK0WaPaLcOGzdmzZ5MsriJVrZkkuXnzZpLk1VdfTZJcuHAhyWLuTDVn6vZq4lRrph67mjY126a+Lqvnr2ZNPfZw5s7wClS13vr7zLhhqpZf0vPFIJntLOYAXF2KXdD9aP6Qq0uxK9Ydze/8eh/DGmHT9pu/xm7QtAEAAABoSNNmjWqw1FWjqh0zbNpUsyVJ7ty5kyT5+OOPV+5bLZZz584lWbRfqsVTt9dVper3ag5N3V5zaJJFk6bWUY85bNrUc9dVpOqx9rtilcYNU1Qv6xc+MvH0gY7vGMcISkGwEWM6Uj6GNcKmbOr1fhKtgDG9j0AX9s04adoAAAAANKRps8ZwJkw1b2r+TDValpsp1Vr58MMPkyxm21Sjph5zOC/ntddeS7JowdQVnoZXl6o1LD9XNWhKrbd+d9jiefTo0coa6mu1eupxNW6Yotn/jy3Mj3ps4RgbNnuf6slz2YtMnSN+ME0nOXfD+wi75EVbbPbJOGnaAAAAADSkabOkjm4Pr7pULZe6ve63fPWo4dyYmnFz8eLFJIt2Tj1WPUY1cOoKVTVHp+bM1P2Xj7jXOoZf67Hqa/1urWl5vctrrtvrilbzpUaBI/30ssWZ+Scw02ZI44YxcCULYGgbV7rRuIHns0/GSdMGAAAAoCFNmyX7NWvq9vq+LH8/nBPzwQcfJFk0aKppU1eiqiZNtV3q92ruTKm1LF+pajhjpwybNqXaO8P113MNn/PBgwcZMu+GqTjybJsTbNisefL/f7X/mKYxvsLHuGY4KRp4cLyO2mbzmTVOmjYAAAAADQltAAAAABpyetSSOkWpTl0qdTrS8OvyqUL7nTZ09erVJMnp06eTLIYClxr+e//+/SR7hx3XqU7Lp0cNBw/XeuoUpuHA4frdOp2qfm94aladurV8etTDhw+TJLdv31657/By43AyNjfa8NCnSW1hEHFZPOXe53bKItu2yYGjYzrlaAxrhG3ZxiDi8qzntG+ZmsPutTF9zrKgaQMAAADQkKbNGsPhwcOBxMOBxcney4UP2zBXrlxJkly+fHnluarVc+vWrSSLZks1b86dO5dktdkyHChczZp6rPmgCTAcWFyDiesxa4313NX+Wb7tzJkzK99/9tlngZO3+WN2Rx5M3MRwv2vecNKO40i6I4HAcRi+X3mPYSq22W7j+GnaAAAAADSkabOkjlBXw2Y4C6a+r2bK8hHtar8M594MLwV+/fr1lfvXY929ezfJomFTX4ePs7yOar9UY2bYwKl11zyd4aybatzU31FrWP676nfrsaqFVM9ZDSEYu6eNm6eH+Md5/K3eKzRumAKNG+ht7Ht07OuHoeFrWfNmGjRtAAAAABrStFkybL8M59MMLc+0Gc6TqaPcw++r7VJzY6qpUldnqmZLtV6q0VK3J4u2S12Jqp6jmje13mrH1Pf13MP5O/X3VvNm+UpVNcOmHuuLL75Y+/fV+mHsFgWVcTdvNG6YEnMooKf9juqPbY+Odd3wPJo306BpAwAAANCQps2S4aya+r6aJ9V6qVbNsrqt2ix13/q+HqsMH6uaNXfu3EmyaOLUz5fbL/VY1b6phk19PXv27Mr9Hj16lGTRklm+EtW6tS03i4b/BrWuuk9dDevvf/97kuTGjRuBKamiyvAqTePh+CHTM/ZX9djXD/txBRvozefOOGnaAAAAADSkabOk5rYMDY+wr5sRMbx61HBeTFmeg7Pu9ps3byZZNFY+//zzJKtNm3Pnzq2stxo21bx59dVXVx67Zt/Uuuu5qnlT3w8bRuvUz956660kyS9/+cskyW9+85skye9///ski3YPbMb2j9kttv3+xxE7lnFqTbOZY/tsVoeX+/BVvW5NHV/xw/V2XCO8iIO8pju8hwzZk0BHmjYAAAAADWnaLBnOqqmGSn2tqzYNmynJ3nk4w8ccXm1peGWq4c/rOWrGTc3GSRZNm2rfXLhwIcmicTO8ktXwqlB1v1pzPdfycwzVfevfoLz33nsrz1XPoWnDZnU8S37vcbhFm+WJTs2bReNmu+tgOsaxK3tfearTWuCkdb6qjcYN0ImmDQAAAEBDmjZLan7Mw4cPkywaK/V1eEWo5TkzZdi0Gc642a+BUk2V/ebJLM/CqfXdv39/5fuaZVPPMVzvxYsXV9a933MtX11qeKWp+re4evVqkuQf//hHkuTatWsrP4fdtLr/q3nTqXEDu2a/o/mOoEMvHdt7AB1o2gAAAAA0pGmzZNg8qYbK8Gu1YpZbKNWkqdvqPsPmTT1H3a/m5dRj1xWdDmJ4Faiaf1OqUVNf62pTtdazZ8+ufD/8G5bXX+usJk21fO7evZtk0e7RtIFlT/ZPh8aNWTbwRKcrN3VYA3TToXFjTwKdaNoAAAAANKRps6QaJjUjptojwzZMfV03E2Z49afh7fW79VzDmTHDZs5B1O9U4+bevXsr66uGzXC91Yo5ffr0vn9Prbf+Lerfpho21e6p5k39Wx3l74Dp6tC4sSdhWYfGjV0J++vQuAHoQNMGAAAAoCFNmzWqTVKtkWqwDOe6nD9//unvnDt3Lkny6aefrjxWNWzqyk7f//73kySffPLJytd67HVXpDqoarfUVbCGV64aXg1r+PdU06buv3xbNWrqsatZ8+DBgySLfzMNG47X2I+7nXzjxpbkuE1jV55s48YsGzi4k3yPsSeBjjRtAAAAABrStFmj5rjUbJhq0VTL5MyZM0mSN9544+nv/PSnP02S/O53v0uSfPTRRyv3vXTpUpLkrbfeSpJ8+9vfTpK88847SZJvfvObSRZNmxs3brzw31FtmGrLDBtDFy5cSLKYV1PP/fbbbz99jDfffDNJ8tvf/jZJcvPmzSSLf4uabVP/ZsBBHP9xw0XDxnFDOAgNG9hd9iTQmaYNAAAAQEOaNs8wvJpUfa35LpcvX35632ra/PnPf06ymBNTM1/+9re/JUmuXLmycvu1a9eSJLdv306yaOgsz5U5rJorU/N0qg1Tf081bqodU82bV155ZeXnyWJGTz1mPdatW7dWHgOmb/PH4fbOgHrxWTcaNuySsbRjNGzgxe23f16ks2pPAmOgaQMAAADQkNAGAAAAoCGnRz1DDSKuQb41qLdOCfrjH//49L51ee1///vfSfaeklSnS9WpR/XYdZpR3f9FTosaqlMvag11ulSdmlXPVbfXmv7whz88fYyXX345SfLhhx8mWZwiVn8PbMdULwC6ib9L2ZvtmPqu3PZjAAC7SdMGAAAAoCFNmwOoNkw1Vi5evJgk+ctf/vL0Pu++++7Kfeoy2tVmqdvrUtnDS2bX/Y9TNW+GlzSv9k+1apYHEdc6S11GHHo4yWP7J2cxoHj175ovHa+fDYYW7x1qDNsxzV25MPy7Zmt+ZjfCyTnMfrNHYfuWP0ftxYPRtAEAAABoSNPmEKqZUk2VM2fO7PlZNWvqPtVeqctqD2fabMNw1k3Nqam11feJ2TWMxSaP7XfK/GeD7/Yem1Cwoavd2JWOGMKY2KOwffbh4WnaAAAAADSkaXMI1UypuS5Tme8ynIWhXcN4TT27n/rfxxRN/VU79b8PANguTRsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGjo1CHvf/Px48dXjmUl0N8Ptr2AZ7iZxN5kV3Xdm/Ylu87ehJ7sTehp7d6czefzk14IAAAAAM/h9CgAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANHRq2wsAXtxsNptvew2wTfP5fLbtNQzZl5Cb8/n80rYXMWRvgr0JTa3dm5o2AAAchyvbXgCwlr0JPa3dm0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABo6NS2FwBwvOb73D7b537D24FNsysBAA5G0wYAAACgIU0bYIL2O45/kPs4tg/Hwa4EADg8TRsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANGQQMTAhBxl1OvyNg402NQAVjubwu9J+A4DDOOhnrc/XcdK0AQAAAGhI0wbYKQdt1uz5vfnqMYzZzLEK2BS7CaZleNTfHofNOkqLlfHStAEAAABoSNMG2AlHbdjs+3jzvcc4tG/gcOwY2A3rWgH2Pxyehs1u0rQBAAAAaEjTBpi0TTdsnvlc5t7AgdgZgLk3cHAaNrtN0wYAAACgIU0bYAJ6Hn+o5o3GDbuo564Euqr3DJ+YAKs0bQAAAAAa0rQBJuck59gchMYNOHoOHIzGDTyhsUrRtAEAAABoSNMGmIxuDZshjRt2kVc7cBQaN+wqDRuGNG0AAAAAGtK0AUZs9VjE7P/fj6VxUzRvmJLhEUJHy4EXMXxP8V7C1NVrXOOGomkDAAAA0JCmDTBC0zr2YNYNUzCtXQl0pb0H7BpNGwAAAICGNG2AyZktHfPvPt9mmcYNU7bcxPEKB16Uxg1Tt/za1mbdbZo2AAAAAA0JbYBJm2W+0rw5+gPNnvwPeGHzOGoIUzaLBgxs0qb2lM/fcRLaAAAAADRkpg2wE6ptc+QZN/OTOy5htg27wkwKmKaTPJLvfYRdUq/zo+4x+2ScNG0AAAAAGtK0AUZki2fhVutF4wZWODceGHrRNsBRaNzA89kn46RpAwAAANCQpg2wU4482+YEGzZ7n1rjhmlz5A+mZZsNPO8n7IKjttnsi3HStAEAAABoSGgDAAAA0JDTo4AR2dxow0OfJrWFQcRD8zXP7ZQptm2TA0ed1gDTsI1BxEPrntt7C1Nz2L3mc3acNG0AAAAAGtK0AUZk88fsjjyYuIlh+0bzhpN2HEfSHQkEjsPw/cp7DFPRod3G8dG0AQAAAGhI0wYgi8ZNGXvzRuOGKXBUHDhOWn1MzfC1rHkzDZo2AAAAAA0JbQDWmGW+p30zJvP5fO3VpmDM5nHUENg87y1M1SyaZFMgtAEAAABoyEwbgGfYO+tmXMy4YYrMugGOgxk3TJXX9Lhp2gAAAAA0pGkDjECffks1VsY2L0bjhk0b1w4AtqE+ccb2fqFxA3SiaQMAAADQkKYNMAL9jtWta6yMrX0DL6LfrgS6WtdY8d4BcDCaNgAAAAANadoAbMiwfaN5AwDrDds3PjEB1tO0AQAAAGhIaANwTGazmas1AcABzOJqTQDrCG0AAAAAGjLTBuCYVdtmmzNuNH4AGIMOV6bziQl0omkDAAAA0JDQBhiRcZ/xbsYNUzTuXQl05b0F4AmhDQAAAEBDZtoAnLCTnHGj2QPAmJ3kjBufmEBHmjYAAAAADWnaABzI5o+/DVswm2zeaNiwC7zKYXcM9/smmzfeS4DONG0AAAAAGhLaAAAAADTk9ChghKY5lnATA4qdFsW2THNXAl1t4j3HewkwBpo2AAAAAA1p2gAjdpLH9k/OURo3GjZ0Mc1dCXR1lPccn5jAmGjaAAAAADSkaQNMwCaP7fc5/vasS4Jr1tDdNHcl0NWzLgnuPQQYM00bAAAAgIY0bYAJmfaxNO0axsirFtgG7z3AVGjaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ6e2vQBgI24mubLtRcCW/GDbC9iHfcmuszehJ3sTelq7N2fz+fykFwIAAADAczg9CgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKCh/wEPXSou7UqvZQAAAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<Figure size 1440x720 with 9 Axes>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "plt.figure(figsize=(20,10))\n", | |
| "\n", | |
| "plt.subplot(251,xticks=[],yticks=[])\n", | |
| "plt.imshow(grey, cmap=plt.cm.gray)\n", | |
| "plt.subplot(252,xticks=[],yticks=[])\n", | |
| "plt.imshow(g_truth, cmap=plt.cm.flag_r)\n", | |
| "plt.subplot(253,xticks=[],yticks=[])\n", | |
| "plt.imshow(g_truth == 1, cmap=plt.cm.flag_r)\n", | |
| "plt.subplot(254,xticks=[],yticks=[])\n", | |
| "plt.imshow(g_truth == 2, cmap=plt.cm.flag_r)\n", | |
| "plt.subplot(255,xticks=[],yticks=[])\n", | |
| "plt.imshow(g_truth == 3, cmap=plt.cm.flag_r)\n", | |
| "\n", | |
| "#plt.subplot(256,xticks=[],yticks=[])\n", | |
| "#plt.imshow(mo.white_tophat(grey, selem = mo.disk(2)) > 0, cmap=plt.cm.jet)\n", | |
| "plt.subplot(257,xticks=[],yticks=[])\n", | |
| "plt.imshow(l1+2*l2+3*l3, cmap=plt.cm.flag_r)\n", | |
| "plt.subplot(258,xticks=[],yticks=[])\n", | |
| "plt.imshow(l1, cmap=plt.cm.flag_r)\n", | |
| "plt.subplot(259,xticks=[],yticks=[])\n", | |
| "plt.imshow(l2, cmap=plt.cm.flag_r)\n", | |
| "plt.subplot(2,5,10,xticks=[],yticks=[])\n", | |
| "plt.imshow(l3, cmap=plt.cm.flag_r)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Clean the whole dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 196, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "new_data = np.zeros((1,94,93,2), dtype = int)\n", | |
| "N = pairs.shape[0]#10\n", | |
| "for idx in range(N):\n", | |
| " g_truth = pairs[idx,:,:,1]\n", | |
| " grey = pairs[idx,:,:,0]\n", | |
| " _, _, _, seg = clean_ground_truth(g_truth, size = 1)\n", | |
| " paired = np.dstack((grey, seg))\n", | |
| " #\n", | |
| " #https://stackoverflow.com/questions/7372316/how-to-make-a-2d-numpy-array-a-3d-array/7372678\n", | |
| " #\n", | |
| " new_data = np.concatenate((new_data, paired[newaxis,:, :, :]))\n", | |
| "new_data = new_data[1:,:,:,:]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "ename": "NameError", | |
| "evalue": "name 'plt' is not defined", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[0;32m<ipython-input-1-9caa5f72b343>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10580\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mgrey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mN\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mg_truth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mN\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
| "\u001b[0;31mNameError\u001b[0m: name 'plt' is not defined" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "plt.figure(figsize=(20,10))\n", | |
| "N=10580\n", | |
| "grey = new_data[N,:,:,0]\n", | |
| "g_truth = new_data[N,:,:,1]\n", | |
| "\n", | |
| "plt.subplot(121,xticks=[],yticks=[])\n", | |
| "plt.imshow(grey, cmap=plt.cm.gray)\n", | |
| "plt.subplot(122,xticks=[],yticks=[])\n", | |
| "plt.imshow(g_truth, cmap=plt.cm.flag_r)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Save the dataset using hdf5 format" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 200, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "filename = './Cleaned_LowRes_13434_overlapping_pairs.h5'\n", | |
| "hf = h5py.File(filename,'w')\n", | |
| "hf.create_dataset('13434_overlapping_chrom_pairs_LowRes', data=new_data, compression='gzip', compression_opts=9)\n", | |
| "hf.close()" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment