Created
October 1, 2018 02:11
-
-
Save nulledge/07c8337d16762793f087473a2ef0cad9 to your computer and use it in GitHub Desktop.
Convert Human3.6M from MATLAB to Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import copy\n", | |
"import cv2\n", | |
"import h5py\n", | |
"import imageio\n", | |
"import math\n", | |
"import matlab.engine\n", | |
"import numpy as np\n", | |
"import os\n", | |
"import pickle\n", | |
"import skimage\n", | |
"import skimage.io\n", | |
"import skimage.transform\n", | |
"from functools import lru_cache\n", | |
"from tqdm import tqdm as tqdm\n", | |
"from vectormath import Vector2, Vector3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"root = 'D:/data/Human3.6M/Release-v1.1/'\n", | |
"script_paths = [subdir for subdir, _, _ in os.walk(root) if '.git' not in subdir]\n", | |
"additional_script_paths = [\n", | |
" # empty\n", | |
"]\n", | |
"subjects = [\n", | |
" 1, 5, 6, 7, 8, # training\n", | |
" 9, 11, # validation\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"core = matlab.engine.start_matlab()\n", | |
"for script_path in script_paths + additional_script_paths:\n", | |
" core.addpath(script_path)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"core.workspace['DB'] = core.H36MDataBase.instance()\n", | |
"core.workspace['feature_RGB'] = core.H36MRGBVideoFeature()\n", | |
"core.workspace['feature_BB'] = core.H36MMyBBMask()\n", | |
"core.workspace['feature_BG'] = core.H36MMyBGMask()\n", | |
"core.workspace['features'] = [\n", | |
" core.H36MPose2DPositionsFeature(),\n", | |
" core.H36MPose3DPositionsFeature('Monocular', True),\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def valid_sequence(subject, action, sub_action, camera):\n", | |
" return subject in [1, 5, 6, 7, 8, 9, 11] and\\\n", | |
" 1 <= action <= 16 and\\\n", | |
" 1 <= sub_action <= 2 and\\\n", | |
" 1 <= camera <= 4" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_max_frame(subject, action, sub_action):\n", | |
" return int(core.getNumFrames(core.workspace['DB'], subject, action, sub_action))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_sequence(subject, action, sub_action, camera):\n", | |
" core.workspace['sequence'] = core.H36MSequence(subject, action, sub_action, camera, -1)\n", | |
" return core.workspace['sequence']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_intrinsics(subject, action, sub_action, camera):\n", | |
" if not valid_sequence(subject, action, sub_action, camera):\n", | |
" raise IndexError()\n", | |
" \n", | |
" sequence = get_sequence(subject, action, sub_action, camera)\n", | |
" core.workspace['camera'] = core.getCamera(sequence)\n", | |
" \n", | |
" f, c, k, p = [core.eval('camera.%s' % attrib)[0] for attrib in ['f', 'c', 'k', 'p']]\n", | |
" \n", | |
" return f, c, k, p" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_RGB(subject, action, sub_action, camera, frame):\n", | |
" if not valid_sequence(subject, action, sub_action, camera):\n", | |
" raise IndexError()\n", | |
" \n", | |
" max_frame = get_max_frame(subject, action, sub_action)\n", | |
" if not (1 <= frame <= max_frame):\n", | |
" raise IndexError()\n", | |
" \n", | |
" sequence = get_sequence(subject, action, sub_action, camera)\n", | |
" core.workspace['metadata'] = core.serializer(core.workspace['feature_RGB'], sequence)\n", | |
" \n", | |
" image = core.getFrame(core.workspace['metadata'], core.double(frame))\n", | |
" image = np.reshape(np.asarray(image._data, dtype=np.float), newshape=(image._size[2], image._size[1], image._size[0])).transpose(2, 1, 0)\n", | |
" \n", | |
" video_name = core.eval('metadata.Reader.VideoName')\n", | |
" \n", | |
" return image, video_name" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_video_name(subject, action, sub_action, camera):\n", | |
" if not valid_sequence(subject, action, sub_action, camera):\n", | |
" raise IndexError()\n", | |
" \n", | |
" sequence = get_sequence(subject, action, sub_action, camera)\n", | |
" core.workspace['metadata'] = core.serializer(core.workspace['feature_RGB'], sequence)\n", | |
" \n", | |
" video_name = core.eval('metadata.Reader.VideoName')\n", | |
" \n", | |
" return video_name" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_pose(subject, action, sub_action, camera, frame):\n", | |
" if not valid_sequence(subject, action, sub_action, camera):\n", | |
" raise IndexError()\n", | |
" \n", | |
" max_frame = get_max_frame(subject, action, sub_action)\n", | |
" if not (1 <= frame <= max_frame):\n", | |
" raise IndexError()\n", | |
" \n", | |
" sequence = get_sequence(subject, action, sub_action, camera)\n", | |
" core.eval('sequence.IdxFrames = %d;' % frame, nargout=0)\n", | |
" \n", | |
" pose = core.H36MComputeFeatures(sequence, core.workspace['features'])\n", | |
" \n", | |
" return np.reshape(np.asarray(pose[0]), newshape=(32, 2)),\\\n", | |
" np.reshape(np.asarray(pose[1]), newshape=(32, 3))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_center_scale(subject, action, sub_action, camera, frame):\n", | |
" if not valid_sequence(subject, action, sub_action, camera):\n", | |
" raise IndexError()\n", | |
" \n", | |
" max_frame = get_max_frame(subject, action, sub_action)\n", | |
" if not (1 <= frame <= max_frame):\n", | |
" raise IndexError()\n", | |
" \n", | |
" sequence = get_sequence(subject, action, sub_action, camera)\n", | |
" core.workspace['metadata'] = core.serializer(core.workspace['feature_BB'], sequence)\n", | |
" \n", | |
" mask = core.getFrame(core.workspace['metadata'], core.double(frame))\n", | |
" mask = np.reshape(np.asarray(mask._data, dtype=np.float), newshape=(mask._size[1], mask._size[0])).transpose(1, 0)\n", | |
" \n", | |
" flatten = mask.flatten()\n", | |
" flatten = np.nonzero(flatten)[0]\n", | |
" ul, br = [flatten[where] for where in [0, -1]]\n", | |
" ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n", | |
" br = Vector2(br % mask.shape[1], br // mask.shape[1])\n", | |
"\n", | |
" center = (ul + br) / 2\n", | |
" height = (br - ul).y\n", | |
" width = (br - ul).x\n", | |
" scale = max(height, width) / 200\n", | |
" \n", | |
" return center, scale" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_center_scale_directly(video_name, frame):\n", | |
" \n", | |
" sub = video_name.split('/')[-3].split('\\\\')[0]\n", | |
" act, cam = video_name.split('/')[-1].split('.mp4')[0].split('.')\n", | |
" #act = act.replace(' ', '_')\n", | |
" \n", | |
" data_root = 'D:/data/Human3.6M/downloaded/'\n", | |
" bb_path = os.path.join(data_root, sub, 'MySegmentsMat', 'ground_truth_bb', '%s.%s.mat' % (act, cam))\n", | |
" with h5py.File(bb_path, 'r') as file:\n", | |
" mask = np.asarray(file[file['Masks'][frame][0]]).transpose(1, 0)\n", | |
"\n", | |
" flatten = mask.flatten()\n", | |
" flatten = np.nonzero(flatten)[0]\n", | |
" ul, br = [flatten[where] for where in [0, -1]]\n", | |
" ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n", | |
" br = Vector2(br % mask.shape[1], br // mask.shape[1])\n", | |
"\n", | |
" center = (ul + br) / 2\n", | |
" height = (br - ul).y\n", | |
" width = (br - ul).x\n", | |
" scale = max(height, width) / 200\n", | |
" \n", | |
" return center, scale" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def project(keypoints, f, c, k, p):\n", | |
" X = keypoints.transpose(1, 0) # Already in 3D pose\n", | |
" XX = np.divide(X[0:2, :], X[2, :])\n", | |
" r2 = np.power(XX[0, :], 2) + np.power(XX[1, :], 2)\n", | |
" radial = np.dot(k, np.asarray([r2, np.power(r2, 2), np.power(r2, 3)])) + 1\n", | |
" tan = p[0] * XX[1, :] + p[1] * XX[0, :]\n", | |
" temp = radial + tan\n", | |
" first = XX * np.stack([temp, temp])\n", | |
" second = np.expand_dims(np.asarray([p[1], p[0]]), axis=1) * np.expand_dims(r2, axis=0)\n", | |
" XXX = first + second\n", | |
" XXX = XXX.transpose(1, 0)\n", | |
" proj = f * XXX + c\n", | |
" \n", | |
" return proj" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def crop_image(image, center, scale, rotate, resolution):\n", | |
" center = Vector2(center) # assign new array\n", | |
" height, width, channel = image.shape\n", | |
" crop_ratio = 200 * scale / resolution\n", | |
" if crop_ratio >= 2: # if box size is greater than two time of resolution px\n", | |
" # scale down image\n", | |
" height = math.floor(height / crop_ratio)\n", | |
" width = math.floor(width / crop_ratio)\n", | |
"\n", | |
" if max([height, width]) < 2:\n", | |
" # Zoomed out so much that the image is now a single pixel or less\n", | |
" raise ValueError(\"Width or height is invalid!\")\n", | |
"\n", | |
" image = skimage.transform.resize(image, (height, width), mode='constant')\n", | |
"# image = image.resize(image, (height, width), mode='constant')\n", | |
" center /= crop_ratio\n", | |
" scale /= crop_ratio\n", | |
"\n", | |
" ul = (center - 200 * scale / 2).astype(int)\n", | |
" br = (center + 200 * scale / 2).astype(int) # Vector2\n", | |
"\n", | |
" if crop_ratio >= 2: # force image size 256 x 256\n", | |
" br -= (br - ul - resolution)\n", | |
"\n", | |
" pad_length = math.ceil((ul - br).length - (br.x - ul.x) / 2)\n", | |
"\n", | |
" if rotate != 0:\n", | |
" ul -= pad_length\n", | |
" br += pad_length\n", | |
"\n", | |
" src = [max(0, ul.y), min(height, br.y), max(0, ul.x), min(width, br.x)]\n", | |
" dst = [max(0, -ul.y), min(height, br.y) - ul.y, max(0, -ul.x), min(width, br.x) - ul.x]\n", | |
"\n", | |
" new_image = np.zeros([br.y - ul.y, br.x - ul.x, channel], dtype=np.float32)\n", | |
" new_image[dst[0]:dst[1], dst[2]:dst[3], :] = image[src[0]:src[1], src[2]:src[3], :]\n", | |
"\n", | |
" if rotate != 0:\n", | |
" new_image = skimage.transform.rotate(new_image, rotate)\n", | |
" new_height, new_width, _ = new_image.shape\n", | |
" new_image = new_image[pad_length:new_height - pad_length, pad_length:new_width - pad_length, :]\n", | |
"\n", | |
" if crop_ratio < 2:\n", | |
" new_image = skimage.transform.resize(new_image, (resolution, resolution), mode='constant')\n", | |
"# new_image = Image.resize(new_image, (resolution, resolution), mode='constant')\n", | |
"\n", | |
" return new_image\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pelvis = [1]\n", | |
"left_leg = [7, 8, 9]\n", | |
"right_leg = [2, 3, 4]\n", | |
"spine = [13, 14, 15, 16]\n", | |
"left_arm = [18, 19, 20]\n", | |
"right_arm = [26, 27, 28]\n", | |
"keypoints = pelvis + left_leg + right_leg + spine + left_arm + right_arm\n", | |
"\n", | |
"converted = dict()\n", | |
"converted ['S'] = list()\n", | |
"converted ['part'] = list()\n", | |
"converted ['center'] = list()\n", | |
"converted ['scale'] = list()\n", | |
"converted ['image'] = list()\n", | |
"\n", | |
"total = 0\n", | |
"\n", | |
"for subject in [1, 5, 6, 7, 8, ]:\n", | |
" for action in range(2, 16 + 1):\n", | |
" for sub_action in [1, 2]:\n", | |
" for camera in [1, 2, 3, 4]:\n", | |
"\n", | |
" # Data corrupted.\n", | |
" if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n", | |
" continue\n", | |
" \n", | |
" max_frame = get_max_frame(subject, action, sub_action)\n", | |
" total = total + max_frame//5\n", | |
" \n", | |
" \n", | |
"with tqdm(total=total) as progress:\n", | |
"\n", | |
" for subject in [1, 5, 6, 7, 8, ]:\n", | |
" for action in range(2, 16 + 1):\n", | |
" for sub_action in [1, 2]:\n", | |
" for camera in [1, 2, 3, 4]:\n", | |
"\n", | |
" progress.set_description('subject(%d) action(%d-%d) camera(%d)' % (subject, action, sub_action, camera))\n", | |
"\n", | |
" # Data corrupted.\n", | |
" if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n", | |
" continue\n", | |
"\n", | |
" max_frame = get_max_frame(subject, action, sub_action)\n", | |
"\n", | |
" video_name = get_video_name(subject, action, sub_action, camera)\n", | |
" sub = video_name.split('/')[-3].split('\\\\')[0]\n", | |
" act, cam = video_name.split('/')[-1].split('.mp4')[0].split('.')\n", | |
" \n", | |
" data_root = 'D:/data/Human3.6M/downloaded/'\n", | |
" bb_path = os.path.join(data_root, sub, 'MySegmentsMat', 'ground_truth_bb', '%s.%s.mat' % (act, cam))\n", | |
" \n", | |
" act = act.replace(' ', '_')\n", | |
" video_name = '%s_%s.%s' % (sub, act, cam)\n", | |
" \n", | |
" with h5py.File(bb_path, 'r') as file:\n", | |
"\n", | |
" for frame in range(1, max_frame+1, 5):\n", | |
" mask = np.asarray(file[file['Masks'][frame-1][0]]).transpose(1, 0)\n", | |
"\n", | |
" flatten = mask.flatten()\n", | |
" flatten = np.nonzero(flatten)[0]\n", | |
" ul, br = [flatten[where] for where in [0, -1]]\n", | |
" ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n", | |
" br = Vector2(br % mask.shape[1], br // mask.shape[1])\n", | |
"\n", | |
" center = (ul + br) / 2 # center\n", | |
" height = (br - ul).y\n", | |
" width = (br - ul).x\n", | |
" scale = max(height, width) / 200 # scale\n", | |
" \n", | |
" # center, scale = get_center_scale(subject, action, sub_action, camera, frame) # center, scale\n", | |
" in_image_space, in_camera_space = get_pose(subject, action, sub_action, camera, frame) # part, S\n", | |
"\n", | |
" converted ['S'].append(np.reshape([in_camera_space[idx-1] for idx in keypoints], (-1, 3)))\n", | |
" converted ['part'].append(np.reshape([in_image_space[idx-1] for idx in keypoints], (-1, 2)))\n", | |
" converted ['center'].append(center)\n", | |
" converted ['scale'].append(scale)\n", | |
" converted ['image'].append('%s_%06d.jpg' % (video_name, frame))\n", | |
"\n", | |
" progress.update(1)\n", | |
"\n", | |
"pickle.dump(converted, open('train.bin', 'wb'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pelvis = [1]\n", | |
"left_leg = [7, 8, 9]\n", | |
"right_leg = [2, 3, 4]\n", | |
"spine = [13, 14, 15, 16]\n", | |
"left_arm = [18, 19, 20]\n", | |
"right_arm = [26, 27, 28]\n", | |
"keypoints = pelvis + left_leg + right_leg + spine + left_arm + right_arm\n", | |
"\n", | |
"converted = dict()\n", | |
"converted ['S'] = list()\n", | |
"converted ['part'] = list()\n", | |
"converted ['center'] = list()\n", | |
"converted ['scale'] = list()\n", | |
"converted ['image'] = list()\n", | |
"\n", | |
"total = 0\n", | |
"\n", | |
"for subject in [9. 11, ]:\n", | |
" for action in range(2, 16 + 1):\n", | |
" for sub_action in [1, 2]:\n", | |
" for camera in [1, 2, 3, 4]:\n", | |
"\n", | |
" # Data corrupted.\n", | |
" if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n", | |
" continue\n", | |
" \n", | |
" max_frame = get_max_frame(subject, action, sub_action)\n", | |
" total = total + max_frame//5\n", | |
" \n", | |
" \n", | |
"with tqdm(total=total) as progress:\n", | |
"\n", | |
" for subject in [9, 11, ]:\n", | |
" for action in range(2, 16 + 1):\n", | |
" for sub_action in [1, 2]:\n", | |
" for camera in [1, 2, 3, 4]:\n", | |
"\n", | |
" progress.set_description('subject(%d) action(%d-%d) camera(%d)' % (subject, action, sub_action, camera))\n", | |
"\n", | |
" # Data corrupted.\n", | |
" if subject == 11 and action == 2 and sub_action == 2 and camera == 1:\n", | |
" continue\n", | |
"\n", | |
" max_frame = get_max_frame(subject, action, sub_action)\n", | |
"\n", | |
" video_name = get_video_name(subject, action, sub_action, camera)\n", | |
" sub = video_name.split('/')[-3].split('\\\\')[0]\n", | |
" act, cam = video_name.split('/')[-1].split('.mp4')[0].split('.')\n", | |
" \n", | |
" data_root = 'D:/data/Human3.6M/downloaded/'\n", | |
" bb_path = os.path.join(data_root, sub, 'MySegmentsMat', 'ground_truth_bb', '%s.%s.mat' % (act, cam))\n", | |
" \n", | |
" act = act.replace(' ', '_')\n", | |
" video_name = '%s_%s.%s' % (sub, act, cam)\n", | |
" \n", | |
" with h5py.File(bb_path, 'r') as file:\n", | |
"\n", | |
" for frame in range(1, max_frame+1, 5):\n", | |
" mask = np.asarray(file[file['Masks'][frame-1][0]]).transpose(1, 0)\n", | |
"\n", | |
" flatten = mask.flatten()\n", | |
" flatten = np.nonzero(flatten)[0]\n", | |
" ul, br = [flatten[where] for where in [0, -1]]\n", | |
" ul = Vector2(ul % mask.shape[1], ul // mask.shape[1])\n", | |
" br = Vector2(br % mask.shape[1], br // mask.shape[1])\n", | |
"\n", | |
" center = (ul + br) / 2 # center\n", | |
" height = (br - ul).y\n", | |
" width = (br - ul).x\n", | |
" scale = max(height, width) / 200 # scale\n", | |
" \n", | |
" # center, scale = get_center_scale(subject, action, sub_action, camera, frame) # center, scale\n", | |
" in_image_space, in_camera_space = get_pose(subject, action, sub_action, camera, frame) # part, S\n", | |
"\n", | |
" converted ['S'].append(np.reshape([in_camera_space[idx-1] for idx in keypoints], (-1, 3)))\n", | |
" converted ['part'].append(np.reshape([in_image_space[idx-1] for idx in keypoints], (-1, 2)))\n", | |
" converted ['center'].append(center)\n", | |
" converted ['scale'].append(scale)\n", | |
" converted ['image'].append('%s_%06d.jpg' % (video_name, frame))\n", | |
"\n", | |
" progress.update(1)\n", | |
"\n", | |
"pickle.dump(converted, open('valid.bin', 'wb'))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Notes. Human3.6M dataset is not that quite accurate.