Created
May 7, 2016 07:02
-
-
Save Sentient07/a527bcbec4b1300f8cd313941865cd90 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from lxml import etree\n", | |
"import numpy as np\n", | |
"from PIL import Image\n", | |
"from os import listdir\n", | |
"from os.path import isfile, join\n", | |
"import numpy as np\n", | |
"from PIL import ImageDraw\n", | |
"from random import randint" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def get_bbox(path):\n", | |
" path = 'Annotations/' + path\n", | |
" tree = etree.parse(path)\n", | |
" root = tree.getroot()\n", | |
" xmin,ymin,xmax,ymax = [], [], [], []\n", | |
" for j in root.findall('object'):\n", | |
" for i in j.findall('bndbox'):\n", | |
" xmin.append(i.find('xmin').text)\n", | |
" ymin.append(i.find('ymin').text)\n", | |
" ymax.append(i.find('ymax').text)\n", | |
" xmax.append(i.find('xmax').text)\n", | |
" return xmin, ymin, xmax, ymax" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 260, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 180, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1 235\n", | |
"1.0\n", | |
"(500, 335)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/Ramana/projects/macvnev/lib/python2.7/site-packages/ipykernel/__main__.py:2: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future\n", | |
" from ipykernel import kernelapp as app\n" | |
] | |
} | |
], | |
"source": [ | |
"print xmin[1], ymin[1]\n", | |
"print zero_arr[xmin[1],ymin[1]]\n", | |
"print np.argpartition(zero_arr, 0).shape" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 181, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def floodfill(matrix, x, y):\n", | |
" #\"hidden\" stop clause - not reinvoking for \"c\" or \"b\", only for \"a\".\n", | |
" if matrix[x][y] == 0: \n", | |
" matrix[x][y] = 2 \n", | |
" #recursively invoke flood fill on all surrounding cells:\n", | |
" if x > 0:\n", | |
" floodfill(matrix,x-1,y)\n", | |
" if x < len(matrix[y]) - 1:\n", | |
" floodfill(matrix,x+1,y)\n", | |
" if y > 0:\n", | |
" floodfill(matrix,x,y-1)\n", | |
" if y < len(matrix) - 1:\n", | |
" floodfill(matrix,x,y+1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def crop_image(xmin, ymin, xmax, ymax, j):\n", | |
" image_path = 'JPEGImages/' +j\n", | |
" original = Image.open(image_path)\n", | |
" for i in range(0, len(xmin)):\n", | |
" original.crop((int(xmin[i]), int(ymin[i]), int(xmax[i]), int(ymax[i]))).save(\"Cropped/\"+ str(i) + \"-\" +str(j))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"test_path = 'JPEGImages/2007_000042.jpg'\n", | |
"original = Image.open(test_path)\n", | |
"original.crop((0, 100, 450, 300)).show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"#Add all the XML files to the list\n", | |
"onlyfiles = []\n", | |
"path = 'Annotations/'\n", | |
"onlyfiles = [f for f in listdir(path) if isfile(join(path, f))]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 121, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "invalid literal for int() with base 10: '45.70000076293945'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-121-a3dbd65e2864>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mj\u001b[0m \u001b[0;32min\u001b[0m \u001b[0monlyfiles\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mxmin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mymin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxmax\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mymax\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_bbox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mcrop_image\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxmin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mymin\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxmax\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mymax\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'.'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\".jpg\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0mk\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m<ipython-input-119-fc90298b8a5f>\u001b[0m in \u001b[0;36mcrop_image\u001b[0;34m(xmin, ymin, xmax, ymax, j)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0moriginal\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimage_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxmin\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0moriginal\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxmin\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mymin\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxmax\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mymax\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Cropped/\"\u001b[0m\u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"-\"\u001b[0m \u001b[0;34m+\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 10: '45.70000076293945'" | |
] | |
} | |
], | |
"source": [ | |
"k = 1\n", | |
"for j in onlyfiles:\n", | |
" xmin, ymin, xmax, ymax = get_bbox(j)\n", | |
" crop_image(xmin, ymin, xmax, ymax, j.split('.')[0]+\".jpg\")\n", | |
" k += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 127, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#Testing Block\n", | |
"for k in range(14737, len(onlyfiles)-1):\n", | |
" xmin, ymin, xmax, ymax = get_bbox(onlyfiles[k])\n", | |
" crop_image(xmin, ymin, xmax, ymax, onlyfiles[k].split('.')[0]+\".jpg\")\n", | |
" k += 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 191, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[1 0]\n", | |
" [0 1]]\n" | |
] | |
} | |
], | |
"source": [ | |
"bin_rand = np.random.randint(0,2,(40,40))\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "invalid literal for int() with base 10: '45.70000076293945'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-6-4958e7aa399e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mzero_arr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mzeros\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moriginal\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxmin\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mzero_arr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mymin\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mymax\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxmin\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mxmax\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mxmin1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mymin1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mxmax1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mymax1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mzero_arr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mValueError\u001b[0m: invalid literal for int() with base 10: '45.70000076293945'" | |
] | |
} | |
], | |
"source": [ | |
"for j in range(1453, len(onlyfiles)):\n", | |
" xmin, xmax, ymin, ymax = get_bbox(onlyfiles[j])\n", | |
" img_path = onlyfiles[j].split('.')[0]+ \".jpg\"\n", | |
" original = Image.open('JPEGImages/' + img_path)\n", | |
" zero_arr = np.zeros(original.size)\n", | |
" for i in range(len(xmin)):\n", | |
" zero_arr[int(ymin[i]):int(ymax[i]), int(xmin[i]):int(xmax[i])] = 1\n", | |
" xmin1,ymin1,xmax1,ymax1 = [],[],[],[]\n", | |
" for i in range(zero_arr.shape[0]-32):\n", | |
" for k in range(zero_arr.shape[1]-32):\n", | |
" if np.count_nonzero(zero_arr[i:i+32, k:k+32])==0:\n", | |
" xmin1.append(k)\n", | |
" ymin1.append(i)\n", | |
" xmax1.append(k+32)\n", | |
" ymax1.append(i+32)\n", | |
" if len(xmin1) > 4:\n", | |
" break\n", | |
" n_back = 5\n", | |
" if len(xmin1) < 5:\n", | |
" n_back = len(xmin1)\n", | |
" for ntemp in range(0,n_back):\n", | |
" n_img = original.crop((xmin1[ntemp], ymin1[ntemp], xmax1[ntemp], ymax1[ntemp]))\n", | |
" n_img.save('Background/' +str(ntemp)+ \"-\" +img_path)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 200, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[2 0 0 0]\n", | |
" [0 0 2 1]\n", | |
" [2 1 0 2]\n", | |
" [0 0 2 1]]\n" | |
] | |
} | |
], | |
"source": [ | |
"a = np.random.randint(0,3,(4,4))\n", | |
"print a" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 250, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[0, 0, 2, 1],\n", | |
" [2, 1, 0, 2]])" | |
] | |
}, | |
"execution_count": 250, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"a[1:3, 0:4]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"#Add jpg files to list\n", | |
"back_files, obj_files = [], []\n", | |
"path1 = 'Background/'\n", | |
"back_files = [f for f in listdir(path1) if isfile(join(path1, f))]\n", | |
"path2 = 'Cropped/'\n", | |
"cropped_files = [f for f in listdir(path2) if isfile(join(path2, f))]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"with open('train1.txt', 'w+') as f2:\n", | |
" for i in range(1, len(cropped_files)):\n", | |
" f2.write(path1+cropped_files[i]+ \" \" +str(1))\n", | |
" f2.write(\"\\n\")\n", | |
" for j in range(1, len(back_files)):\n", | |
" f2.write(path2+back_files[j] + \" \" +str(0))\n", | |
" f2.write(\"\\n\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 285, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1453\n" | |
] | |
} | |
], | |
"source": [ | |
"print len(back_files)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import random\n", | |
"with open('train1.txt','r') as source:\n", | |
" data = [ (random.random(), line) for line in source ]\n", | |
"data.sort()\n", | |
"with open('train4.txt','w+') as target:\n", | |
" for _, line in data:\n", | |
" target.write( line )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"for kj in cropped_files[1:]:\n", | |
" b_file = Image.open(path2+kj)\n", | |
" b_file = b_file.resize((32, 32))\n", | |
" b_file.save(path2+kj)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"f6 = open('val.txt', 'w+')\n", | |
"count_temp = 0\n", | |
"for l in open('train4.txt', 'r'):\n", | |
" count_temp += 1\n", | |
" if count_temp<4000:\n", | |
" continue\n", | |
" else:\n", | |
" f6.write(l)\n", | |
" if count_temp == 6000:\n", | |
" break\n", | |
"\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"f5 = open('train.txt', 'w+')\n", | |
"count_temp = 0\n", | |
"for l in open('train4.txt', 'r'):\n", | |
" f5.write(l)\n", | |
" if count_temp == 4000:\n", | |
" break\n", | |
" count_temp += 1" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment