Skip to content

Instantly share code, notes, and snippets.

@jeanpat
Last active January 31, 2020 12:09
Show Gist options
  • Select an option

  • Save jeanpat/2b95e3ca23a983316dc0d3fa4c064c50 to your computer and use it in GitHub Desktop.

Select an option

Save jeanpat/2b95e3ca23a983316dc0d3fa4c064c50 to your computer and use it in GitHub Desktop.
This jupyter notebook can be used to clean the groundtruth images from the following dataset: https://github.com/jeanpat/DeepFISH/blob/master/dataset/LowRes_13434_overlapping_pairs.h5
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cleaning the groundtruth images from their spurious pixels"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import h5py\n",
"from matplotlib import pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from numpy import newaxis\n",
"from skimage import morphology as mo\n",
"from scipy.ndimage import distance_transform_bf as distance"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [],
"source": [
"def distanceTransform(bIm):\n",
" #from pythonvision.org\n",
" dist = distance(bIm)\n",
" dist = dist.max() - dist\n",
" dist -= dist.min()\n",
" dist = dist/float(dist.ptp()) * 255\n",
" dist = dist.astype(np.uint8)\n",
" return dist\n",
"\n",
"def clean_ground_truth(gd_lab, size = 2):\n",
" \"\"\"Remove spurious pixels from badly labelled groundtruth image\n",
" returns three binary images (One hot shot) first and a labelled image.\n",
" \"\"\"\n",
" mask = gd_lab > 0\n",
" dmap = distanceTransform(mask)\n",
" \n",
" cleaned_lab1 = mo.binary_opening(gd_lab == 1, selem = mo.disk(size))\n",
" cleaned_lab2 = mo.binary_opening(gd_lab == 2, selem = mo.disk(size))\n",
" cleaned_lab3 = mo.binary_opening(gd_lab == 3, selem = mo.disk(size))\n",
" \n",
" seeds = cleaned_lab1+2*cleaned_lab2+3*cleaned_lab3\n",
" seg = mo.watershed(dmap, markers = seeds, mask = 1*mask)\n",
" chrom_lab1 = seg == 1\n",
" chrom_lab2 = seg == 2\n",
" overlapp = seg == 3\n",
" \n",
" return chrom_lab1, chrom_lab2, overlapp, seg\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download the dataset from its repository at github\n",
"https://github.com/jeanpat/DeepFISH/tree/master/dataset"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2019-12-20 09:04:27-- https://github.com/jeanpat/DeepFISH/blob/master/dataset/LowRes_13434_overlapping_pairs.h5\n",
"Résolution de github.com (github.com)… 140.82.118.4\n",
"Connexion à github.com (github.com)|140.82.118.4|:443… connecté.\n",
"requête HTTP transmise, en attente de la réponse… 200 OK\n",
"Taille : non indiqué [text/html]\n",
"Enregistre : «LowRes_13434_overlapping_pairs.h5»\n",
"\n",
"LowRes_13434_overla [ <=> ] 62,54K --.-KB/s ds 0,1s \n",
"\n",
"2019-12-20 09:04:28 (630 KB/s) - «LowRes_13434_overlapping_pairs.h5» enregistré [64037]\n",
"\n"
]
}
],
"source": [
"!wget https://github.com/jeanpat/DeepFISH/blob/master/dataset/LowRes_13434_overlapping_pairs.h5 "
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dataset is a numpy array of shape: (13434, 94, 93, 2)\n"
]
}
],
"source": [
"filename = './LowRes_13434_overlapping_pairs.h5'\n",
"h5f = h5py.File(filename,'r')\n",
"pairs = h5f['dataset_1'][:]\n",
"h5f.close()\n",
"print('dataset is a numpy array of shape:', pairs.shape)"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [],
"source": [
"N = 11508\n",
"grey = pairs[N,:,:,0]\n",
"g_truth = pairs[N,:,:,1]\n",
"l1, l2, l3, seg = clean_ground_truth(g_truth, size = 1)"
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(94, 93, 2)\n",
"(2, 94, 93, 2)\n"
]
}
],
"source": [
"test = np.dstack((grey, g_truth))\n",
"print(test.shape)\n",
"t2 = np.stack((test,test))\n",
"print(t2.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's compare the groundtruth image befor and after cleaning"
]
},
{
"cell_type": "code",
"execution_count": 165,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.image.AxesImage at 0x7fe0814c80f0>"
]
},
"execution_count": 165,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABG0AAAH8CAYAAABrbFKGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAfXUlEQVR4nO3dTY9bZxkG4Ns0JWk+GigNCh8SqCpIZYNA6oY1P4p/hAQ/gA1ICDbsq0qAKJA2Sds0LfluGpqaRXhi+4wnmZl4xs85vi4JTcfjsd8Jfm3pPvd5zmw+nwcAAACAXr627QUAAAAAsJfQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANnTrMnWezmUtNsdPm8/ls22tYx95k13Xcm/Yl5OZ8Pr+07UUM2Ztgb0JTa/fmoUKbJHnppZc2sxwYmcePH297CQAwJle2vQBgLXsTelq7N50eBQAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABoS2gAAAAA0JLQBAAAAaEhoAwAAANCQ0AYAAACgIaENAAAAQENCGwAAAICGhDYAAAAADQltAAAAABo6te0FTMX58+eTJGfOnEmSzOfzJMm9e/eSJF9++WWS5KWXXkqSPHr06KSXCDtqvs/ts33uN7wd2DS7EgDgYDRtAAAAABrStDmCast85zvfeXrbj370oySLRs3169eTJLPZk+ODr7/+epLk4cOHSZLPPvssSXLnzp0TWDHsmv2O4x/kPo7tw3GwKwEADk/TBgAAAKAhTZtDqDk1p0+fTpK8/fbbT3/2i1/8Ikny17/+NUny1VdfJUkuXbqUJPnZz36WJPne976XJHn33XeTJL/+9a+Pe9kAAADACGnaAAAAADQktAEAAABoyOlRh1BDhWvY8EcfffT0Z++8806SxSW+L168mCR58OBBkuS1115LkvzqV79aud/t27eTJH/605+SLAYTnzr15P+ax48fJ1kMP04Wp14BQwcZdTr8jWePNp0ZgQov5PC78vm7za4EgIXnfdb6vBw3TRsAAACAhjRtjuDMmTNJkhs3bjy9rdov586dS5L88Ic/XPn+8uXLSZKrV68mWbR0qoHz5ptvJkk++eSTJMnnn3+eJPn000/3PH8NRK7mD3Bwz2vW7Hv/+eoxDPsPNuewu8nug96GR/3tWdisw7ZYNVTHTdMGAAAAoCFNm0OouTJnz55Nkpw/f/7pz6oxU42aN954I0nyjW98I0nyk5/8JMli1s3169eTJN/97neTJO+///7Kc/33v/9Nkrz66qtJkrt37z79WbVvNG5ge+bzvcc47EUA2GtdK8AnJsDBaNoAAAAANKRpcwB1RP3ll19OkrzyyitJFi2YJLl06VKS5Mc//vHK9zXTplo57733XpLFPJz79+8nSb71rW8lWbR5qmlTrZ567uX11O/WfetKU472w8LTQsyJ7Atn8cNBnOTOsCvh4My9gN6OckVGxk/TBgAAAKAhTZtDqKtGnTr15J9tudHy+uuvJ1nMsKk2TjVsHjx4kCS5du1akuSDDz5IsmjN1P0ePnyYJDl9+vTK4yyrNk5dYer27dtJklu3bq08psYNu+MZxx1OcB/sHXNjL7K7uhwNtPvg4LbZgtvGGmAsunymsh2aNgAAAAANado8w9e+9iTT+uqrr5IsGi7Vgqk5NMlivs3Xv/71JItWTs22qaZNzZ+pNsywtVO/V/Np1hmuY3gU/z//+c+B/0aYopXGS4tDdqYEQLdXv10JT7T7yARghaYNAAAAQEOaNs9QbZdqw1SzZXgVqXW/U1eN+vnPf54k+fjjj5Mk77//fpLkiy++SJJ8+eWXSRZtnmrgVMtn2MBZvk+to+5T66zHqsZNNXNgZzSbIVPNn9nMsX3oonahXcmu6/batyfhCXNsKJo2AAAAAA1p2jxDNVfqyk41r6ZuX26wVMOmrv5U3//rX/9KsrjCU820qTZMNW3q/vV9NW3queprsv9VrIaNmnqOR48eJVm0e1zJhukY1zGIvY2bYk8yHePalXsbN8Pbge3QuAF4QtMGAAAAoCFNm2eo5ko1W2qGTX29d+/e0/tWi6XUz/75z3+u/LwaNTWPZvj7dZWpavUM59cs/3e1ceprrbe+L3fv3k2S3Lp1K8miDaRxw3g951j+8uWjGr7O54Plm3XDFIytYTM03H12Jbui+9WjtODYVcuv9bF/xvJiNG0AAAAAGtK0WaPaLcOGzdmzZ5MsriJVrZkkuXnzZpLk1VdfTZJcuHAhyWLuTDVn6vZq4lRrph67mjY126a+Lqvnr2ZNPfZw5s7wClS13vr7zLhhqpZf0vPFIJntLOYAXF2KXdD9aP6Qq0uxK9Ydze/8eh/DGmHT9pu/xm7QtAEAAABoSNNmjWqw1FWjqh0zbNpUsyVJ7ty5kyT5+OOPV+5bLZZz584lWbRfqsVTt9dVper3ag5N3V5zaJJFk6bWUY85bNrUc9dVpOqx9rtilcYNU1Qv6xc+MvH0gY7vGMcISkGwEWM6Uj6GNcKmbOr1fhKtgDG9j0AX9s04adoAAAAANKRps8ZwJkw1b2r+TDValpsp1Vr58MMPkyxm21Sjph5zOC/ntddeS7JowdQVnoZXl6o1LD9XNWhKrbd+d9jiefTo0coa6mu1eupxNW6Yotn/jy3Mj3ps4RgbNnuf6slz2YtMnSN+ME0nOXfD+wi75EVbbPbJOGnaAAAAADSkabOkjm4Pr7pULZe6ve63fPWo4dyYmnFz8eLFJIt2Tj1WPUY1cOoKVTVHp+bM1P2Xj7jXOoZf67Hqa/1urWl5vctrrtvrilbzpUaBI/30ssWZ+Scw02ZI44YxcCULYGgbV7rRuIHns0/GSdMGAAAAoCFNmyX7NWvq9vq+LH8/nBPzwQcfJFk0aKppU1eiqiZNtV3q92ruTKm1LF+pajhjpwybNqXaO8P113MNn/PBgwcZMu+GqTjybJsTbNisefL/f7X/mKYxvsLHuGY4KRp4cLyO2mbzmTVOmjYAAAAADQltAAAAABpyetSSOkWpTl0qdTrS8OvyqUL7nTZ09erVJMnp06eTLIYClxr+e//+/SR7hx3XqU7Lp0cNBw/XeuoUpuHA4frdOp2qfm94aladurV8etTDhw+TJLdv31657/By43AyNjfa8NCnSW1hEHFZPOXe53bKItu2yYGjYzrlaAxrhG3ZxiDi8qzntG+ZmsPutTF9zrKgaQMAAADQkKbNGsPhwcOBxMOBxcney4UP2zBXrlxJkly+fHnluarVc+vWrSSLZks1b86dO5dktdkyHChczZp6rPmgCTAcWFyDiesxa4313NX+Wb7tzJkzK99/9tlngZO3+WN2Rx5M3MRwv2vecNKO40i6I4HAcRi+X3mPYSq22W7j+GnaAAAAADSkabOkjlBXw2Y4C6a+r2bK8hHtar8M594MLwV+/fr1lfvXY929ezfJomFTX4ePs7yOar9UY2bYwKl11zyd4aybatzU31FrWP676nfrsaqFVM9ZDSEYu6eNm6eH+Md5/K3eKzRumAKNG+ht7Ht07OuHoeFrWfNmGjRtAAAAABrStFkybL8M59MMLc+0Gc6TqaPcw++r7VJzY6qpUldnqmZLtV6q0VK3J4u2S12Jqp6jmje13mrH1Pf13MP5O/X3VvNm+UpVNcOmHuuLL75Y+/fV+mHsFgWVcTdvNG6YEnMooKf9juqPbY+Odd3wPJo306BpAwAAANCQps2S4aya+r6aJ9V6qVbNsrqt2ix13/q+HqsMH6uaNXfu3EmyaOLUz5fbL/VY1b6phk19PXv27Mr9Hj16lGTRklm+EtW6tS03i4b/BrWuuk9dDevvf/97kuTGjRuBKamiyvAqTePh+CHTM/ZX9djXD/txBRvozefOOGnaAAAAADSkabOk5rYMDY+wr5sRMbx61HBeTFmeg7Pu9ps3byZZNFY+//zzJKtNm3Pnzq2stxo21bx59dVXVx67Zt/Uuuu5qnlT3w8bRuvUz956660kyS9/+cskyW9+85skye9///ski3YPbMb2j9kttv3+xxE7lnFqTbOZY/tsVoeX+/BVvW5NHV/xw/V2XCO8iIO8pju8hwzZk0BHmjYAAAAADWnaLBnOqqmGSn2tqzYNmynJ3nk4w8ccXm1peGWq4c/rOWrGTc3GSRZNm2rfXLhwIcmicTO8ktXwqlB1v1pzPdfycwzVfevfoLz33nsrz1XPoWnDZnU8S37vcbhFm+WJTs2bReNmu+tgOsaxK3tfearTWuCkdb6qjcYN0ImmDQAAAEBDmjZLan7Mw4cPkywaK/V1eEWo5TkzZdi0Gc642a+BUk2V/ebJLM/CqfXdv39/5fuaZVPPMVzvxYsXV9a933MtX11qeKWp+re4evVqkuQf//hHkuTatWsrP4fdtLr/q3nTqXEDu2a/o/mOoEMvHdt7AB1o2gAAAAA0pGmzZNg8qYbK8Gu1YpZbKNWkqdvqPsPmTT1H3a/m5dRj1xWdDmJ4Faiaf1OqUVNf62pTtdazZ8+ufD/8G5bXX+usJk21fO7evZtk0e7RtIFlT/ZPh8aNWTbwRKcrN3VYA3TToXFjTwKdaNoAAAAANKRps6QaJjUjptojwzZMfV03E2Z49afh7fW79VzDmTHDZs5B1O9U4+bevXsr66uGzXC91Yo5ffr0vn9Prbf+Lerfpho21e6p5k39Wx3l74Dp6tC4sSdhWYfGjV0J++vQuAHoQNMGAAAAoCFNmzWqTVKtkWqwDOe6nD9//unvnDt3Lkny6aefrjxWNWzqyk7f//73kySffPLJytd67HVXpDqoarfUVbCGV64aXg1r+PdU06buv3xbNWrqsatZ8+DBgySLfzMNG47X2I+7nXzjxpbkuE1jV55s48YsGzi4k3yPsSeBjjRtAAAAABrStFmj5rjUbJhq0VTL5MyZM0mSN9544+nv/PSnP02S/O53v0uSfPTRRyv3vXTpUpLkrbfeSpJ8+9vfTpK88847SZJvfvObSRZNmxs3brzw31FtmGrLDBtDFy5cSLKYV1PP/fbbbz99jDfffDNJ8tvf/jZJcvPmzSSLf4uabVP/ZsBBHP9xw0XDxnFDOAgNG9hd9iTQmaYNAAAAQEOaNs8wvJpUfa35LpcvX35632ra/PnPf06ymBNTM1/+9re/JUmuXLmycvu1a9eSJLdv306yaOgsz5U5rJorU/N0qg1Tf081bqodU82bV155ZeXnyWJGTz1mPdatW7dWHgOmb/PH4fbOgHrxWTcaNuySsbRjNGzgxe23f16ks2pPAmOgaQMAAADQkNAGAAAAoCGnRz1DDSKuQb41qLdOCfrjH//49L51ee1///vfSfaeklSnS9WpR/XYdZpR3f9FTosaqlMvag11ulSdmlXPVbfXmv7whz88fYyXX345SfLhhx8mWZwiVn8PbMdULwC6ib9L2ZvtmPqu3PZjAAC7SdMGAAAAoCFNmwOoNkw1Vi5evJgk+ctf/vL0Pu++++7Kfeoy2tVmqdvrUtnDS2bX/Y9TNW+GlzSv9k+1apYHEdc6S11GHHo4yWP7J2cxoHj175ovHa+fDYYW7x1qDNsxzV25MPy7Zmt+ZjfCyTnMfrNHYfuWP0ftxYPRtAEAAABoSNPmEKqZUk2VM2fO7PlZNWvqPtVeqctqD2fabMNw1k3Nqam11feJ2TWMxSaP7XfK/GeD7/Yem1Cwoavd2JWOGMKY2KOwffbh4WnaAAAAADSkaXMI1UypuS5Tme8ynIWhXcN4TT27n/rfxxRN/VU79b8PANguTRsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGjo1CHvf/Px48dXjmUl0N8Ptr2AZ7iZxN5kV3Xdm/Ylu87ehJ7sTehp7d6czefzk14IAAAAAM/h9CgAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANCS0AQAAAGhIaAMAAADQkNAGAAAAoCGhDQAAAEBDQhsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANHRq2wsAXtxsNptvew2wTfP5fLbtNQzZl5Cb8/n80rYXMWRvgr0JTa3dm5o2AAAchyvbXgCwlr0JPa3dm0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABo6NS2FwBwvOb73D7b537D24FNsysBAA5G0wYAAACgIU0bYIL2O45/kPs4tg/Hwa4EADg8TRsAAACAhoQ2AAAAAA0JbQAAAAAaEtoAAAAANGQQMTAhBxl1OvyNg402NQAVjubwu9J+A4DDOOhnrc/XcdK0AQAAAGhI0wbYKQdt1uz5vfnqMYzZzLEK2BS7CaZleNTfHofNOkqLlfHStAEAAABoSNMG2AlHbdjs+3jzvcc4tG/gcOwY2A3rWgH2Pxyehs1u0rQBAAAAaEjTBpi0TTdsnvlc5t7AgdgZgLk3cHAaNrtN0wYAAACgIU0bYAJ6Hn+o5o3GDbuo564Euqr3DJ+YAKs0bQAAAAAa0rQBJuck59gchMYNOHoOHIzGDTyhsUrRtAEAAABoSNMGmIxuDZshjRt2kVc7cBQaN+wqDRuGNG0AAAAAGtK0AUZs9VjE7P/fj6VxUzRvmJLhEUJHy4EXMXxP8V7C1NVrXOOGomkDAAAA0JCmDTBC0zr2YNYNUzCtXQl0pb0H7BpNGwAAAICGNG2AyZktHfPvPt9mmcYNU7bcxPEKB16Uxg1Tt/za1mbdbZo2AAAAAA0JbYBJm2W+0rw5+gPNnvwPeGHzOGoIUzaLBgxs0qb2lM/fcRLaAAAAADRkpg2wE6ptc+QZN/OTOy5htg27wkwKmKaTPJLvfYRdUq/zo+4x+2ScNG0AAAAAGtK0AUZki2fhVutF4wZWODceGHrRNsBRaNzA89kn46RpAwAAANCQpg2wU4482+YEGzZ7n1rjhmlz5A+mZZsNPO8n7IKjttnsi3HStAEAAABoSGgDAAAA0JDTo4AR2dxow0OfJrWFQcRD8zXP7ZQptm2TA0ed1gDTsI1BxEPrntt7C1Nz2L3mc3acNG0AAAAAGtK0AUZk88fsjjyYuIlh+0bzhpN2HEfSHQkEjsPw/cp7DFPRod3G8dG0AQAAAGhI0wYgi8ZNGXvzRuOGKXBUHDhOWn1MzfC1rHkzDZo2AAAAAA0JbQDWmGW+p30zJvP5fO3VpmDM5nHUENg87y1M1SyaZFMgtAEAAABoyEwbgGfYO+tmXMy4YYrMugGOgxk3TJXX9Lhp2gAAAAA0pGkDjECffks1VsY2L0bjhk0b1w4AtqE+ccb2fqFxA3SiaQMAAADQkKYNMAL9jtWta6yMrX0DL6LfrgS6WtdY8d4BcDCaNgAAAAANadoAbMiwfaN5AwDrDds3PjEB1tO0AQAAAGhIaANwTGazmas1AcABzOJqTQDrCG0AAAAAGjLTBuCYVdtmmzNuNH4AGIMOV6bziQl0omkDAAAA0JDQBhiRcZ/xbsYNUzTuXQl05b0F4AmhDQAAAEBDZtoAnLCTnHGj2QPAmJ3kjBufmEBHmjYAAAAADWnaABzI5o+/DVswm2zeaNiwC7zKYXcM9/smmzfeS4DONG0AAAAAGhLaAAAAADTk9ChghKY5lnATA4qdFsW2THNXAl1t4j3HewkwBpo2AAAAAA1p2gAjdpLH9k/OURo3GjZ0Mc1dCXR1lPccn5jAmGjaAAAAADSkaQNMwCaP7fc5/vasS4Jr1tDdNHcl0NWzLgnuPQQYM00bAAAAgIY0bYAJmfaxNO0axsirFtgG7z3AVGjaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKAhoQ0AAABAQ6e2vQBgI24mubLtRcCW/GDbC9iHfcmuszehJ3sTelq7N2fz+fykFwIAAADAczg9CgAAAKAhoQ0AAABAQ0IbAAAAgIaENgAAAAANCW0AAAAAGhLaAAAAADQktAEAAABoSGgDAAAA0JDQBgAAAKCh/wEPXSou7UqvZQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 1440x720 with 9 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(20,10))\n",
"\n",
"plt.subplot(251,xticks=[],yticks=[])\n",
"plt.imshow(grey, cmap=plt.cm.gray)\n",
"plt.subplot(252,xticks=[],yticks=[])\n",
"plt.imshow(g_truth, cmap=plt.cm.flag_r)\n",
"plt.subplot(253,xticks=[],yticks=[])\n",
"plt.imshow(g_truth == 1, cmap=plt.cm.flag_r)\n",
"plt.subplot(254,xticks=[],yticks=[])\n",
"plt.imshow(g_truth == 2, cmap=plt.cm.flag_r)\n",
"plt.subplot(255,xticks=[],yticks=[])\n",
"plt.imshow(g_truth == 3, cmap=plt.cm.flag_r)\n",
"\n",
"#plt.subplot(256,xticks=[],yticks=[])\n",
"#plt.imshow(mo.white_tophat(grey, selem = mo.disk(2)) > 0, cmap=plt.cm.jet)\n",
"plt.subplot(257,xticks=[],yticks=[])\n",
"plt.imshow(l1+2*l2+3*l3, cmap=plt.cm.flag_r)\n",
"plt.subplot(258,xticks=[],yticks=[])\n",
"plt.imshow(l1, cmap=plt.cm.flag_r)\n",
"plt.subplot(259,xticks=[],yticks=[])\n",
"plt.imshow(l2, cmap=plt.cm.flag_r)\n",
"plt.subplot(2,5,10,xticks=[],yticks=[])\n",
"plt.imshow(l3, cmap=plt.cm.flag_r)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Clean the whole dataset"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [],
"source": [
"new_data = np.zeros((1,94,93,2), dtype = int)\n",
"N = pairs.shape[0]#10\n",
"for idx in range(N):\n",
" g_truth = pairs[idx,:,:,1]\n",
" grey = pairs[idx,:,:,0]\n",
" _, _, _, seg = clean_ground_truth(g_truth, size = 1)\n",
" paired = np.dstack((grey, seg))\n",
" #\n",
" #https://stackoverflow.com/questions/7372316/how-to-make-a-2d-numpy-array-a-3d-array/7372678\n",
" #\n",
" new_data = np.concatenate((new_data, paired[newaxis,:, :, :]))\n",
"new_data = new_data[1:,:,:,:]"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'plt' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-9caa5f72b343>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mplt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfigure\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mN\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10580\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mgrey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mN\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mg_truth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mN\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'plt' is not defined"
]
}
],
"source": [
"plt.figure(figsize=(20,10))\n",
"N=10580\n",
"grey = new_data[N,:,:,0]\n",
"g_truth = new_data[N,:,:,1]\n",
"\n",
"plt.subplot(121,xticks=[],yticks=[])\n",
"plt.imshow(grey, cmap=plt.cm.gray)\n",
"plt.subplot(122,xticks=[],yticks=[])\n",
"plt.imshow(g_truth, cmap=plt.cm.flag_r)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Save the dataset using hdf5 format"
]
},
{
"cell_type": "code",
"execution_count": 200,
"metadata": {},
"outputs": [],
"source": [
"filename = './Cleaned_LowRes_13434_overlapping_pairs.h5'\n",
"hf = h5py.File(filename,'w')\n",
"hf.create_dataset('13434_overlapping_chrom_pairs_LowRes', data=new_data, compression='gzip', compression_opts=9)\n",
"hf.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment