Created
October 29, 2018 07:49
-
-
Save nulledge/bb4ff8106d63a6ab58491c7728534798 to your computer and use it in GitHub Desktop.
MPI-INF-3DHP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import copy\n", | |
"import cv2 as cv\n", | |
"import math\n", | |
"import numpy as np\n", | |
"import random\n", | |
"import imageio\n", | |
"import scipy.io\n", | |
"import skimage.transform\n", | |
"from tqdm import tqdm\n", | |
"from itertools import product\n", | |
"from vectormath import Vector2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"root = 'D:/data/MPI-INF-3DHP'\n", | |
"\n", | |
"available_subject = [1, 2, 3, 4, 5, 6, 7, 8, ]\n", | |
"available_sequence = [1, 2, ]\n", | |
"available_camera = [camera for camera in range(14)]\n", | |
"\n", | |
"# available_segment[subject][sequence]\n", | |
"# segmented sequence = np.squeeze(np.where(available_segment[subject])) + 1\n", | |
"available_segment = [\n", | |
" [False, True], # subject 1\n", | |
" [False, True], # subject 2\n", | |
" [False, True],\n", | |
" [False, True],\n", | |
" [False, True],\n", | |
" [False, True],\n", | |
" [True, False],\n", | |
" [True, False], # subject 8\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"class SequentialDictionary:\n", | |
" '''The custom dictionary class\n", | |
" \n", | |
" You can use a dictionary with multiple indices, i.e. x['1st dim']['2nd dim'] = 2.\n", | |
" '''\n", | |
" \n", | |
" def __init__(self):\n", | |
" self.data = dict()\n", | |
" \n", | |
" def __getitem__(self, index):\n", | |
" if index not in self.data.keys():\n", | |
" self.data[index] = SequentialDictionary()\n", | |
" return self.data[index]\n", | |
" \n", | |
" def __setitem__(self, index, value):\n", | |
" self.data[index] = value\n", | |
" \n", | |
" def __len__(self):\n", | |
" length = 0\n", | |
" for key, value in self.data.items():\n", | |
" if type(value) is SequentialDictionary:\n", | |
" length = length + len(value)\n", | |
" else:\n", | |
" length = length + 1\n", | |
" return length" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 672/672 [00:34<00:00, 20.23it/s]\n" | |
] | |
} | |
], | |
"source": [ | |
"# Video\n", | |
"VIDEO_RGB = 'imageSequence'\n", | |
"VIDEO_MASK_HUMAN_AND_CHAIR = 'FGmasks'\n", | |
"VIDEO_MASK_CHAIR = 'ChairMasks'\n", | |
"\n", | |
"available_format = [\n", | |
" VIDEO_RGB,\n", | |
" VIDEO_MASK_HUMAN_AND_CHAIR,\n", | |
" VIDEO_MASK_CHAIR,\n", | |
"]\n", | |
"\n", | |
"video_path = '{root}/{subject}/{sequence}/{format}/video_{camera}.avi'\n", | |
"video = SequentialDictionary()\n", | |
"\n", | |
"available_video = product(*[\n", | |
" available_subject, \n", | |
" available_sequence, \n", | |
" available_format, \n", | |
" available_camera,\n", | |
"])\n", | |
"total = len(available_subject) * len(available_sequence) * len(available_format) * len(available_camera)\n", | |
"\n", | |
"for subject, sequence, format, camera in tqdm(available_video, total=total):\n", | |
" video[subject][sequence][format][camera] = cv.VideoCapture(video_path.format(\n", | |
" root=root,\n", | |
" subject='S%d' % subject,\n", | |
" sequence='Seq%d' % sequence,\n", | |
" format=format,\n", | |
" camera=camera,\n", | |
" ))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:17<00:00, 1.14it/s]\n" | |
] | |
} | |
], | |
"source": [ | |
"# annotation\n", | |
"ANNOT_CAMERA_2D = 'annot2'\n", | |
"ANNOT_CAMERA_3D = 'annot3'\n", | |
"ANNOT_WORLD_3D = 'univ_annot3'\n", | |
"ANNOT_CAMERA_CALI = 'cameras'\n", | |
"\n", | |
"annot_path = '{root}/{subject}/{sequence}/annot.mat'\n", | |
"annot = SequentialDictionary()\n", | |
"\n", | |
"available_annot = product(*[\n", | |
" available_subject, \n", | |
" available_sequence,\n", | |
"])\n", | |
"total = len(available_subject) * len(available_sequence)\n", | |
"\n", | |
"for subject, sequence, in tqdm(available_annot, total=total):\n", | |
" annot[subject][sequence] = scipy.io.loadmat(annot_path.format(\n", | |
" root=root,\n", | |
" subject='S%d' % subject,\n", | |
" sequence='Seq%d' % sequence,\n", | |
" ))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 144.42it/s]\n" | |
] | |
} | |
], | |
"source": [ | |
"# camera parameters\n", | |
"CAMERA_INTRINSIC = 'intrinsic'\n", | |
"CAMERA_EXTRINSIC = 'extrinsic'\n", | |
"\n", | |
"camera_path = '{root}/{subject}/{sequence}/camera.calibration'\n", | |
"camera_parameter = SequentialDictionary()\n", | |
"\n", | |
"available_camera_parameter = product(*[\n", | |
" available_subject, \n", | |
" available_sequence,\n", | |
"])\n", | |
"total = len(available_subject) * len(available_sequence)\n", | |
"\n", | |
"for subject, sequence, in tqdm(available_camera_parameter, total=total):\n", | |
" camera_index = -1\n", | |
" with open(camera_path.format(\n", | |
" root=root,\n", | |
" subject='S%d' % subject,\n", | |
" sequence='Seq%d' % sequence,\n", | |
" ), 'r') as file:\n", | |
" for line in file:\n", | |
" word = line.strip().split() # remove whilespace\n", | |
"\n", | |
" if word[0] == 'name':\n", | |
" camera_index = int(word[-1])\n", | |
"\n", | |
" elif word[0] == CAMERA_INTRINSIC:\n", | |
" mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))\n", | |
" mat = mat[0:3, 0:3]\n", | |
" camera_parameter[subject][sequence][camera_index][CAMERA_INTRINSIC] = mat\n", | |
" elif word[0] == CAMERA_EXTRINSIC:\n", | |
" mat = np.reshape(np.asarray(word[1:], dtype=np.float), newshape=(4, 4))\n", | |
" mat = mat[0:3, 0:4]\n", | |
" camera_parameter[subject][sequence][camera_index][CAMERA_EXTRINSIC] = mat\n", | |
" else:\n", | |
" continue" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 45, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def rotate_bound(image, angle):\n", | |
" height, width, channel = image.shape\n", | |
" \n", | |
" mat = cv.getRotationMatrix2D((width/2, height/2), -angle, 1)\n", | |
" return cv.warpAffine(image, mat, (width, height))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 46, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"def crop_image(image, center, scale, rotate, resolution):\n", | |
" center = Vector2(center) # assign new array\n", | |
" height, width, channel = image.shape\n", | |
" crop_ratio = 200 * scale / resolution\n", | |
" \n", | |
" if crop_ratio >= 2: # if box size is greater than two time of resolution px\n", | |
" # scale down image\n", | |
" height = math.floor(height / crop_ratio)\n", | |
" width = math.floor(width / crop_ratio)\n", | |
"\n", | |
" if max([height, width]) < 2:\n", | |
" # Zoomed out so much that the image is now a single pixel or less\n", | |
" raise ValueError(\"Width or height is invalid!\")\n", | |
"\n", | |
"# image = skimage.transform.resize(image, (height, width), mode='constant')\n", | |
"# image = image.resize(image, (height, width), mode='constant')\n", | |
" image = cv.resize(image, (height, width))\n", | |
" center /= crop_ratio\n", | |
" scale /= crop_ratio\n", | |
"\n", | |
" ul = (center - 200 * scale / 2).astype(int)\n", | |
" br = (center + 200 * scale / 2).astype(int) # Vector2\n", | |
"\n", | |
" if crop_ratio >= 2: # force image size 256 x 256\n", | |
" br -= (br - ul - resolution)\n", | |
"\n", | |
" pad_length = math.ceil((ul - br).length - (br.x - ul.x) / 2)\n", | |
"\n", | |
" if rotate != 0:\n", | |
" ul -= pad_length\n", | |
" br += pad_length\n", | |
"\n", | |
" src = [max(0, ul.y), min(height, br.y), max(0, ul.x), min(width, br.x)]\n", | |
" dst = [max(0, -ul.y), min(height, br.y) - ul.y, max(0, -ul.x), min(width, br.x) - ul.x]\n", | |
"\n", | |
" new_image = np.zeros([br.y - ul.y, br.x - ul.x, channel], dtype=np.uint8)\n", | |
" new_image[dst[0]:dst[1], dst[2]:dst[3], :] = image[src[0]:src[1], src[2]:src[3], :]\n", | |
"\n", | |
" if rotate != 0:\n", | |
" # new_image = skimage.transform.rotate(new_image, rotate)\n", | |
" new_image = rotate_bound(new_image, rotate)\n", | |
" new_height, new_width, _ = new_image.shape\n", | |
" new_image = new_image[pad_length:new_height - pad_length, pad_length:new_width - pad_length, :]\n", | |
"\n", | |
" if crop_ratio < 2:\n", | |
" new_image = cv.resize(new_image, (resolution, resolution))\n", | |
"# new_image = skimage.transform.resize(new_image, (resolution, resolution), mode='constant')\n", | |
"# new_image = Image.resize(new_image, (resolution, resolution), mode='constant')\n", | |
"\n", | |
" return new_image" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"subject = 3\n", | |
"sequence = np.squeeze(np.where(available_segment[subject-1])) + 1\n", | |
"camera = 8\n", | |
"frame = 4748\n", | |
"\n", | |
"image = SequentialDictionary()\n", | |
"for format in available_format:\n", | |
" video[subject][sequence][format][camera].set(cv.CAP_PROP_POS_FRAMES, frame)\n", | |
" success, image[format] = video[subject][sequence][format][camera].read()\n", | |
" assert success\n", | |
"\n", | |
"height, width, channel = np.asarray([\n", | |
" video[subject][sequence][VIDEO_RGB][camera].get(cv.CAP_PROP_FRAME_HEIGHT),\n", | |
" video[subject][sequence][VIDEO_RGB][camera].get(cv.CAP_PROP_FRAME_WIDTH),\n", | |
" 3,\n", | |
"]).astype(np.int)\n", | |
" \n", | |
"for image_name in ['checker', 'room', 'flower', ]:\n", | |
" image[image_name] = cv.imread('{image_name}.jpg'.format(image_name=image_name))\n", | |
" image[image_name] = cv.resize(image[image_name], (height, width))\n", | |
"\n", | |
"gitter = 0.4 + 0.8 * random.random()\n", | |
"\n", | |
"background = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 2] < 200\n", | |
"chair = image[VIDEO_MASK_CHAIR][:, :, 2] < 200\n", | |
"pants = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 0] < 200\n", | |
"shirts = image[VIDEO_MASK_HUMAN_AND_CHAIR][:, :, 1] < 200\n", | |
"\n", | |
"image[VIDEO_RGB][shirts] = image['checker'][shirts]\n", | |
"image[VIDEO_RGB][pants] = image[VIDEO_RGB][pants] * gitter\n", | |
"image[VIDEO_RGB][background] = image['room'][background]\n", | |
"image[VIDEO_RGB][chair] = image['flower'][chair]\n", | |
"\n", | |
"in_3D = np.reshape(annot[subject][sequence][ANNOT_CAMERA_3D][camera, 0][frame], newshape=(-1, 3))\n", | |
"\n", | |
"num_keypoints = len(in_3D)\n", | |
"\n", | |
"# reshape for easy matrix multiplication\n", | |
"in_3D = np.concatenate((in_3D, np.ones(shape=(num_keypoints, 1))), axis=1).transpose(1, 0)\n", | |
"identity_transform = np.concatenate((np.eye(3), np.ones(shape=(3, 1))), axis=1)\n", | |
"\n", | |
"projected = np.matmul(identity_transform, in_3D)\n", | |
"projected = np.matmul(camera_parameter[subject][sequence][camera][CAMERA_INTRINSIC], projected)\n", | |
"projected = projected / projected[-1, :]\n", | |
"projected = projected.transpose(1, 0)\n", | |
"\n", | |
"pad = np.asarray([50, 50], dtype=np.int)\n", | |
"ul = np.asarray([np.min(projected[:, 0]), np.min(projected[:, 1])], dtype=np.int) - pad\n", | |
"br = np.asarray([np.max(projected[:, 0]), np.max(projected[:, 1])], dtype=np.int) + pad\n", | |
"\n", | |
"center = (ul + br) * 0.5\n", | |
"center = center.astype(np.int)\n", | |
"scale = np.max(br - ul) / 200\n", | |
"\n", | |
"tmp = crop_image(image[VIDEO_RGB], center, scale, 30, 256)\n", | |
"cv.imwrite('crop.jpg', tmp)\n", | |
"\n", | |
"image[VIDEO_MASK_HUMAN_AND_CHAIR][ul[1]:br[1], ul[0]:br[0], :] = [255, 255, 255]\n", | |
"\n", | |
"for keypoint in projected:\n", | |
" x, y, _ = keypoint\n", | |
" \n", | |
" for tx in range(-10, 10):\n", | |
" for ty in range(-10, 10):\n", | |
" xx = x + tx\n", | |
" yy = y + ty\n", | |
" \n", | |
" if xx < 0 or image[VIDEO_RGB].shape[1] <= xx \\\n", | |
" or yy < 0 or image[VIDEO_RGB].shape[0] <= yy:\n", | |
" continue\n", | |
" \n", | |
" image[VIDEO_RGB][int(yy), int(xx), :] = [0, 0, 255]\n", | |
" \n", | |
"for format in available_format:\n", | |
" success = cv.imwrite('{format}.jpg'.format(format=format), image[format])\n", | |
" assert success" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"for subject, sequence, format, camera in available_video:\n", | |
" video[subject][sequence][format][camera].release()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# print('Video:', video_path)\n", | |
"# print('Open:', video.isOpened())\n", | |
"# print('Resolution:', '%dx%d' % (video.get(cv.CAP_PROP_FRAME_WIDTH), video.get(cv.CAP_PROP_FRAME_HEIGHT)))\n", | |
"# print('Total frames:', video.get(cv.CAP_PROP_FRAME_COUNT))\n", | |
"# print('Frame-rate:', video.get(cv.CAP_PROP_FPS))\n", | |
"# print('OpenCV:', cv.__version__)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment