Last active
September 4, 2019 01:36
-
-
Save dmc5179/12bb133e55052b4743459b72bb28d496 to your computer and use it in GitHub Desktop.
Jupyter Nodebook and Python script for the Mask R-CNN Ship Detection Minimum Viable Model (Python 3.6)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np # linear algebra\n", | |
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", | |
"import matplotlib.pyplot as plt # plot & image processing\n", | |
"from skimage.morphology import label\n", | |
"from skimage.data import imread\n", | |
"\n", | |
"import os\n", | |
"import time\n", | |
"import sys\n", | |
"\n", | |
"# Configurations\n", | |
"# Split x ratio of train dataset for validation \n", | |
"TRAINING_VALIDATION_RATIO = 0.2\n", | |
"WORKING_DIR = '/notebooks/kaggle/working'\n", | |
"INPUT_DIR = '/notebooks/kaggle/input'\n", | |
"OUTPUT_DIR = '/notebooks/kaggle/output'\n", | |
"LOGS_DIR = os.path.join(WORKING_DIR, \"logs\")\n", | |
"TRAIN_DATA_PATH = os.path.join(INPUT_DIR, 'train_v2')\n", | |
"TEST_DATA_PATH = os.path.join(INPUT_DIR, 'test_v2')\n", | |
"SAMPLE_SUBMISSION_PATH = os.path.join(INPUT_DIR, 'sample_submission_v2.csv')\n", | |
"TRAIN_SHIP_SEGMENTATIONS_PATH = os.path.join(INPUT_DIR, 'train_ship_segmentations_v2.csv')\n", | |
"MASK_RCNN_PATH = os.path.join(WORKING_DIR, 'Mask_RCNN')\n", | |
"COCO_WEIGHTS_PATH = os.path.join(WORKING_DIR, \"mask_rcnn_coco.h5\")\n", | |
"SHIP_CLASS_NAME = 'ship'\n", | |
"IMAGE_WIDTH = 768\n", | |
"IMAGE_HEIGHT = 768\n", | |
"SHAPE = (IMAGE_WIDTH, IMAGE_HEIGHT)\n", | |
"\n", | |
"test_ds = os.listdir(TEST_DATA_PATH)\n", | |
"train_ds = os.listdir(TRAIN_DATA_PATH)\n", | |
"\n", | |
"print('Working Dir:', WORKING_DIR, os.listdir(WORKING_DIR))\n", | |
"print('Input Dir:', INPUT_DIR, os.listdir(INPUT_DIR))\n", | |
"print('train dataset from: {}, {}'.format(TRAIN_DATA_PATH, len(train_ds)))\n", | |
"print('test dataset from: {}, {}'.format(TRAIN_DATA_PATH, len(test_ds)))\n", | |
"print(TRAIN_SHIP_SEGMENTATIONS_PATH)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Read mask encording from the input CSV file \n", | |
"masks = pd.read_csv(TRAIN_SHIP_SEGMENTATIONS_PATH)\n", | |
"masks.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# ref: https://www.kaggle.com/kmader/baseline-u-net-model-part-1\n", | |
"def multi_rle_encode(img):\n", | |
" labels = label(img[:, :, 0])\n", | |
" return [rle_encode(labels==k) for k in np.unique(labels[labels>0])]\n", | |
"\n", | |
"# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode\n", | |
"def rle_encode(img):\n", | |
" '''\n", | |
" img: numpy array, 1 - mask, 0 - background\n", | |
" Returns run length as string formated: [start0] [length0] [start1] [length1]... in 1d array\n", | |
" '''\n", | |
" # reshape to 1d array\n", | |
" pixels = img.T.flatten() # Needed to align to RLE direction\n", | |
" # pads the head & the tail with 0 & converts to ndarray\n", | |
" pixels = np.concatenate([[0], pixels, [0]])\n", | |
" # gets all start(0->1) & end(1->0) positions \n", | |
" runs = np.where(pixels[1:] != pixels[:-1])[0] + 1\n", | |
" # transforms end positions to lengths\n", | |
" runs[1::2] -= runs[::2]\n", | |
" # converts to the string formated: '[s0] [l0] [s1] [l1]...'\n", | |
" return ' '.join(str(x) for x in runs)\n", | |
"\n", | |
"def rle_decode(mask_rle, shape=SHAPE):\n", | |
" '''\n", | |
" mask_rle: run-length as string formated: [start0] [length0] [start1] [length1]... in 1d array\n", | |
" shape: (height,width) of array to return \n", | |
" Returns numpy array according to the shape, 1 - mask, 0 - background\n", | |
" '''\n", | |
" s = mask_rle.split()\n", | |
" # gets starts & lengths 1d arrays \n", | |
" starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]\n", | |
" starts -= 1\n", | |
" # gets ends 1d array\n", | |
" ends = starts + lengths\n", | |
" # creates blank mask image 1d array\n", | |
" img = np.zeros(shape[0]*shape[1], dtype=np.uint8)\n", | |
" # sets mark pixles\n", | |
" for lo, hi in zip(starts, ends):\n", | |
" img[lo:hi] = 1\n", | |
" # reshape as a 2d mask image\n", | |
" return img.reshape(shape).T # Needed to align to RLE direction\n", | |
"\n", | |
"def masks_as_image(in_mask_list, shape=SHAPE):\n", | |
" '''Take the individual ship masks and create a single mask array for all ships\n", | |
" in_mask_list: pd Series: [idx0] [RLE string0]...\n", | |
" Returns numpy array as (shape.h, sahpe.w, 1)\n", | |
" '''\n", | |
" all_masks = np.zeros(shape, dtype = np.int16)\n", | |
" # if isinstance(in_mask_list, list):\n", | |
" for mask in in_mask_list:\n", | |
" if isinstance(mask, str):\n", | |
" all_masks += rle_decode(mask)\n", | |
" return np.expand_dims(all_masks, -1)\n", | |
"\n", | |
"def shows_decode_encode(image_id, path=TRAIN_DATA_PATH):\n", | |
" '''Show image, ship mask, and encoded/decoded result\n", | |
" '''\n", | |
" fig, axarr = plt.subplots(1, 3, figsize = (10, 5))\n", | |
" # image\n", | |
" img_0 = imread(os.path.join(path, image_id))\n", | |
" axarr[0].imshow(img_0)\n", | |
" axarr[0].set_title(image_id)\n", | |
" \n", | |
" # input mask\n", | |
" rle_1 = masks.query('ImageId==\"{}\"'.format(image_id))['EncodedPixels']\n", | |
" img_1 = masks_as_image(rle_1)\n", | |
" # takes 2d array (shape.h, sahpe.w)\n", | |
" axarr[1].imshow(img_1[:, :, 0])\n", | |
" axarr[1].set_title('Ship Mask')\n", | |
" \n", | |
" # encode & decode mask \n", | |
" rle_2 = multi_rle_encode(img_1)\n", | |
" img_2 = masks_as_image(rle_2)\n", | |
" axarr[2].imshow(img_0)\n", | |
" axarr[2].imshow(img_2[:, :, 0], alpha=0.3)\n", | |
" axarr[2].set_title('Encoded & Decoded Mask')\n", | |
" plt.show()\n", | |
" print(image_id , ' Check Decoding->Encoding',\n", | |
" 'RLE_0:', len(rle_1), '->',\n", | |
" 'RLE_1:', len(rle_2))\n", | |
"\n", | |
"# inspects a few example\n", | |
"shows_decode_encode('000155de5.jpg')\n", | |
"shows_decode_encode('00003e153.jpg')\n", | |
"print('It could be different when there is no mask.')\n", | |
"shows_decode_encode('00021ddc3.jpg')\n", | |
"print('It could be different when there are masks overlapped.')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# check if a mask has a ship \n", | |
"masks['ships'] = masks['EncodedPixels'].map(lambda encoded_pixels: 1 if isinstance(encoded_pixels, str) else 0)\n", | |
"# sum ship# by ImageId and create the unique image id/mask list\n", | |
"start_time = time.time()\n", | |
"unique_img_ids = masks.groupby('ImageId').agg({'ships': 'sum'})\n", | |
"unique_img_ids['RleMaskList'] = masks.groupby('ImageId')['EncodedPixels'].apply(list)\n", | |
"unique_img_ids = unique_img_ids.reset_index()\n", | |
"end_time = time.time() - start_time\n", | |
"print(\"unique_img_ids groupby took: {}\".format(end_time))\n", | |
"\n", | |
"# Only care image with ships\n", | |
"unique_img_ids = unique_img_ids[unique_img_ids['ships'] > 0]\n", | |
"unique_img_ids['ships'].hist()\n", | |
"unique_img_ids.sample(3)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# split to training & validation sets \n", | |
"from sklearn.model_selection import train_test_split\n", | |
"train_ids, val_ids = train_test_split(unique_img_ids, \n", | |
" test_size = TRAINING_VALIDATION_RATIO, \n", | |
" stratify = unique_img_ids['ships'])\n", | |
"print(train_ids.shape[0], 'training masks')\n", | |
"print(val_ids.shape[0], 'validation masks')\n", | |
"train_ids['ships'].hist()\n", | |
"val_ids['ships'].hist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# if to clone Mask_R-CNN git when it exists \n", | |
"UPDATE_MASK_RCNN = False\n", | |
"\n", | |
"os.chdir(WORKING_DIR)\n", | |
"if UPDATE_MASK_RCNN:\n", | |
" !rm -rf {MASK_RCNN_PATH}\n", | |
"\n", | |
"# Downlaod Mask RCNN code to a local folder \n", | |
"if not os.path.exists(MASK_RCNN_PATH):\n", | |
" git clone https://github.com/matterport/Mask_RCNN.git", | |
"# ! wget https://github.com/samlin001/Mask_RCNN/archive/master.zip -O Mask_RCNN-master.zip\n", | |
"# ! unzip Mask_RCNN-master.zip 'Mask_RCNN-master/mrcnn/*'\n", | |
"# ! rm Mask_RCNN-master.zip\n", | |
"\n", | |
"# Import Mask RCNN\n", | |
"sys.path.append(MASK_RCNN_PATH) # To find local version of the library\n", | |
"from mrcnn.config import Config\n", | |
"from mrcnn import utils\n", | |
"import mrcnn.model as modellib\n", | |
"from mrcnn import visualize\n", | |
"from mrcnn.model import log " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class AirbusShipDetectionChallengeDataset(utils.Dataset):\n", | |
" \"\"\"Airbus Ship Detection Challenge Dataset\n", | |
" \"\"\"\n", | |
" def __init__(self, image_file_dir, ids, masks, image_width=IMAGE_WIDTH, image_height=IMAGE_HEIGHT):\n", | |
" super().__init__(self)\n", | |
" self.image_file_dir = image_file_dir\n", | |
" self.ids = ids\n", | |
" self.masks = masks\n", | |
" self.image_width = image_width\n", | |
" self.image_height = image_height\n", | |
" \n", | |
" # Add classes\n", | |
" self.add_class(SHIP_CLASS_NAME, 1, SHIP_CLASS_NAME)\n", | |
" self.load_dataset()\n", | |
" \n", | |
" def load_dataset(self):\n", | |
" \"\"\"Load dataset from the path\n", | |
" \"\"\"\n", | |
" # Add images\n", | |
" for index, row in self.ids.iterrows():\n", | |
" image_id = row['ImageId']\n", | |
" image_path = os.path.join(self.image_file_dir, image_id)\n", | |
" rle_mask_list = row['RleMaskList']\n", | |
" #print(rle_mask_list)\n", | |
" self.add_image(\n", | |
" SHIP_CLASS_NAME,\n", | |
" image_id=image_id,\n", | |
" path=image_path,\n", | |
" width=self.image_width, height=self.image_height,\n", | |
" rle_mask_list=rle_mask_list)\n", | |
"\n", | |
" def load_mask(self, image_id):\n", | |
" \"\"\"Generate instance masks for shapes of the given image ID.\n", | |
" \"\"\"\n", | |
" info = self.image_info[image_id]\n", | |
" rle_mask_list = info['rle_mask_list']\n", | |
" mask_count = len(rle_mask_list)\n", | |
" mask = np.zeros([info['height'], info['width'], mask_count],\n", | |
" dtype=np.uint8)\n", | |
" i = 0\n", | |
" for rel in rle_mask_list:\n", | |
" if isinstance(rel, str):\n", | |
" np.copyto(mask[:,:,i], rle_decode(rel))\n", | |
" i += 1\n", | |
" \n", | |
" # Return mask, and array of class IDs of each instance. Since we have\n", | |
" # one class ID only, we return an array of 1s\n", | |
" return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)\n", | |
" \n", | |
" def image_reference(self, image_id):\n", | |
" \"\"\"Return the path of the image.\"\"\"\n", | |
" info = self.image_info[image_id]\n", | |
" if info['source'] == SHIP_CLASS_NAME:\n", | |
" return info['path']\n", | |
" else:\n", | |
" super(self.__class__, self).image_reference(image_id)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class AirbusShipDetectionChallengeGPUConfig(Config):\n", | |
" \"\"\"\n", | |
" Configuration of Airbus Ship Detection Challenge Dataset \n", | |
" Overrides values in the base Config class.\n", | |
" From https://github.com/samlin001/Mask_RCNN/blob/master/mrcnn/config.py\n", | |
" \"\"\"\n", | |
" # https://www.kaggle.com/docs/kernels#technical-specifications\n", | |
" NAME = 'ASDC_GPU'\n", | |
" # NUMBER OF GPUs to use.\n", | |
" GPU_COUNT = 1\n", | |
" IMAGES_PER_GPU = 2\n", | |
" \n", | |
" NUM_CLASSES = 2 # ship or background\n", | |
" IMAGE_MIN_DIM = IMAGE_WIDTH\n", | |
" IMAGE_MAX_DIM = IMAGE_WIDTH\n", | |
" STEPS_PER_EPOCH = 300\n", | |
" VALIDATION_STEPS = 50\n", | |
" SAVE_BEST_ONLY = True\n", | |
" \n", | |
" # Minimum probability value to accept a detected instance\n", | |
" # ROIs below this threshold are skipped\n", | |
" DETECTION_MIN_CONFIDENCE = 0.95\n", | |
"\n", | |
" # Non-maximum suppression threshold for detection\n", | |
" # Keep it small to merge overlapping ROIs \n", | |
" DETECTION_NMS_THRESHOLD = 0.05\n", | |
"\n", | |
" \n", | |
"config = AirbusShipDetectionChallengeGPUConfig()\n", | |
"config.display()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"start_time = time.time()\n", | |
"# Training dataset.\n", | |
"dataset_train = AirbusShipDetectionChallengeDataset(image_file_dir=TRAIN_DATA_PATH, ids=train_ids, masks=masks)\n", | |
"dataset_train.prepare()\n", | |
"\n", | |
"# Validation dataset\n", | |
"dataset_val = AirbusShipDetectionChallengeDataset(image_file_dir=TRAIN_DATA_PATH, ids=val_ids, masks=masks)\n", | |
"dataset_val.prepare()\n", | |
"\n", | |
"# Load and display random samples\n", | |
"image_ids = np.random.choice(dataset_train.image_ids, 3)\n", | |
"for image_id in image_ids:\n", | |
" image = dataset_train.load_image(image_id)\n", | |
" mask, class_ids = dataset_train.load_mask(image_id)\n", | |
" visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names, limit=1)\n", | |
"\n", | |
"end_time = time.time() - start_time\n", | |
"print(\"dataset prepare: {}\".format(end_time))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"start_time = time.time()\n", | |
"model = modellib.MaskRCNN(mode=\"training\", config=config, model_dir=WORKING_DIR)\n", | |
"\n", | |
"import errno\n", | |
"try:\n", | |
" weights_path = model.find_last()\n", | |
" load_weights = True\n", | |
"except FileNotFoundError:\n", | |
" # if there is no previous trained weights, load COCO\n", | |
" load_weights = True\n", | |
" weights_path = COCO_WEIGHTS_PATH\n", | |
" utils.download_trained_weights(weights_path)\n", | |
" \n", | |
"if load_weights:\n", | |
" print(\"Loading weights: \", weights_path)\n", | |
" model.load_weights(weights_path, by_name=True, exclude=[\n", | |
" \"mrcnn_class_logits\", \"mrcnn_bbox_fc\",\n", | |
" \"mrcnn_bbox\", \"mrcnn_mask\"])\n", | |
"\n", | |
"end_time = time.time() - start_time\n", | |
"print(\"loading weights: {}\".format(end_time))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"\"\"\"Train the model.\"\"\"\n", | |
"start_time = time.time() \n", | |
"model.train(dataset_train, dataset_val,\n", | |
" learning_rate=config.LEARNING_RATE * 1.5,\n", | |
" epochs=21,\n", | |
" layers='all')\n", | |
"end_time = time.time() - start_time\n", | |
"print(\"Train model: {}\".format(end_time))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import tensorflow as tf\n", | |
"from tensorflow.python.framework import graph_util\n", | |
"from tensorflow.python.framework import graph_io\n", | |
"import keras\n", | |
"from keras import backend as K\n", | |
"\n", | |
"#print(model.keras_model.output.name)\n", | |
"saver = tf.train.Saver()\n", | |
"saver.save(K.get_session(), os.path.join(OUTPUT_DIR, 'mask_rcnn_model.ckpt')\n", | |
"\n", | |
"#import tensorflow as tf\n", | |
"# This doesn't work because the of variables that are not constants\n", | |
"# Save tf.keras model in HDF5 format.\n", | |
"#keras_file = \"/notebooks/kaggle/output/keras_model.h5\"\n", | |
"\n", | |
"#tf.keras.models.save_model(model.keras_model, keras_file)\n", | |
"\n", | |
"\n", | |
"sess = K.get_session()\n", | |
"output_names=[out.op.name for out in model.keras_model.outputs]\n", | |
"constant_graph = graph_util.convert_variables_to_constants(\n", | |
" sess,\n", | |
" sess.graph.as_graph_def(),\n", | |
" output_names)\n", | |
"\n", | |
"graph_io.write_graph(constant_graph, OUTPUT_DIR, \"mask_rcnn_frozen_model.pb\", as_text=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"class InferenceConfig(AirbusShipDetectionChallengeGPUConfig):\n", | |
" GPU_COUNT = 1\n", | |
" # 1 image for inference \n", | |
" IMAGES_PER_GPU = 1\n", | |
"\n", | |
"inference_config = InferenceConfig()\n", | |
"\n", | |
"# create a model in inference mode\n", | |
"infer_model = modellib.MaskRCNN(mode=\"inference\", \n", | |
" config=inference_config,\n", | |
" model_dir=WORKING_DIR)\n", | |
"\n", | |
"model_path = infer_model.find_last()\n", | |
"\n", | |
"# Load trained weights\n", | |
"print(\"Loading weights from \", model_path)\n", | |
"infer_model.load_weights(model_path, by_name=True)\n", | |
"\n", | |
"\n", | |
"# Test on a random image\n", | |
"image_id = np.random.choice(dataset_val.image_ids)\n", | |
"original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\\\n", | |
" modellib.load_image_gt(dataset_val, inference_config, \n", | |
" image_id, use_mini_mask=False)\n", | |
"\n", | |
"log(\"original_image\", original_image)\n", | |
"log(\"image_meta\", image_meta)\n", | |
"log(\"gt_class_id\", gt_class_id)\n", | |
"log(\"gt_bbox\", gt_bbox)\n", | |
"log(\"gt_mask\", gt_mask)\n", | |
"\n", | |
"visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, \n", | |
" dataset_train.class_names, figsize=(8, 8))\n", | |
"\n", | |
"results = infer_model.detect([original_image], verbose=1)\n", | |
"\n", | |
"r = results[0]\n", | |
"visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], \n", | |
" dataset_val.class_names, r['scores'])\n", | |
"\n", | |
"# Compute VOC-Style mean Average Precision @ IoU=0.5\n", | |
"# Running on a few images. Increase for better accuracy.\n", | |
"image_ids = np.random.choice(dataset_val.image_ids, 20)\n", | |
"APs = []\n", | |
"inference_start = time.time()\n", | |
"for image_id in image_ids:\n", | |
" # Load image and ground truth data\n", | |
" image, image_meta, gt_class_id, gt_bbox, gt_mask =\\\n", | |
" modellib.load_image_gt(dataset_val, inference_config,\n", | |
" image_id, use_mini_mask=False)\n", | |
" molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)\n", | |
" # Run object detection\n", | |
" results = infer_model.detect([image], verbose=1)\n", | |
" r = results[0]\n", | |
" visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], \n", | |
" dataset_val.class_names, r['scores'])\n", | |
"\n", | |
" # Compute AP\n", | |
" AP, precisions, recalls, overlaps =\\\n", | |
" utils.compute_ap(gt_bbox, gt_class_id, gt_mask,\n", | |
" r[\"rois\"], r[\"class_ids\"], r[\"scores\"], r['masks'])\n", | |
" APs.append(AP)\n", | |
"\n", | |
"inference_end = time.time()\n", | |
"print('Inference Time: %0.2f Minutes'%((inference_end - inference_start)/60))\n", | |
"print(\"mAP: \", np.mean(APs))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[ ]: | |
import numpy as np # linear algebra | |
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) | |
import matplotlib.pyplot as plt # plot & image processing | |
from skimage.morphology import label | |
from skimage.data import imread | |
import os | |
import time | |
import sys | |
# Configurations | |
# Split x ratio of train dataset for validation | |
TRAINING_VALIDATION_RATIO = 0.2 | |
WORKING_DIR = '/notebooks/kaggle/working' | |
INPUT_DIR = '/notebooks/kaggle/input' | |
OUTPUT_DIR = '/notebooks/kaggle/output' | |
LOGS_DIR = os.path.join(WORKING_DIR, "logs") | |
TRAIN_DATA_PATH = os.path.join(INPUT_DIR, 'train_v2') | |
TEST_DATA_PATH = os.path.join(INPUT_DIR, 'test_v2') | |
SAMPLE_SUBMISSION_PATH = os.path.join(INPUT_DIR, 'sample_submission_v2.csv') | |
TRAIN_SHIP_SEGMENTATIONS_PATH = os.path.join(INPUT_DIR, 'train_ship_segmentations_v2.csv') | |
MASK_RCNN_PATH = os.path.join(WORKING_DIR, 'Mask_RCNN') | |
COCO_WEIGHTS_PATH = os.path.join(WORKING_DIR, "mask_rcnn_coco.h5") | |
SHIP_CLASS_NAME = 'ship' | |
IMAGE_WIDTH = 768 | |
IMAGE_HEIGHT = 768 | |
SHAPE = (IMAGE_WIDTH, IMAGE_HEIGHT) | |
test_ds = os.listdir(TEST_DATA_PATH) | |
train_ds = os.listdir(TRAIN_DATA_PATH) | |
print('Working Dir:', WORKING_DIR, os.listdir(WORKING_DIR)) | |
print('Input Dir:', INPUT_DIR, os.listdir(INPUT_DIR)) | |
print('train dataset from: {}, {}'.format(TRAIN_DATA_PATH, len(train_ds))) | |
print('test dataset from: {}, {}'.format(TRAIN_DATA_PATH, len(test_ds))) | |
print(TRAIN_SHIP_SEGMENTATIONS_PATH) | |
# In[ ]: | |
# Read mask encording from the input CSV file | |
masks = pd.read_csv(TRAIN_SHIP_SEGMENTATIONS_PATH) | |
masks.head() | |
# In[ ]: | |
# ref: https://www.kaggle.com/kmader/baseline-u-net-model-part-1 | |
def multi_rle_encode(img): | |
labels = label(img[:, :, 0]) | |
return [rle_encode(labels==k) for k in np.unique(labels[labels>0])] | |
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode | |
def rle_encode(img): | |
''' | |
img: numpy array, 1 - mask, 0 - background | |
Returns run length as string formated: [start0] [length0] [start1] [length1]... in 1d array | |
''' | |
# reshape to 1d array | |
pixels = img.T.flatten() # Needed to align to RLE direction | |
# pads the head & the tail with 0 & converts to ndarray | |
pixels = np.concatenate([[0], pixels, [0]]) | |
# gets all start(0->1) & end(1->0) positions | |
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1 | |
# transforms end positions to lengths | |
runs[1::2] -= runs[::2] | |
# converts to the string formated: '[s0] [l0] [s1] [l1]...' | |
return ' '.join(str(x) for x in runs) | |
def rle_decode(mask_rle, shape=SHAPE): | |
''' | |
mask_rle: run-length as string formated: [start0] [length0] [start1] [length1]... in 1d array | |
shape: (height,width) of array to return | |
Returns numpy array according to the shape, 1 - mask, 0 - background | |
''' | |
s = mask_rle.split() | |
# gets starts & lengths 1d arrays | |
starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])] | |
starts -= 1 | |
# gets ends 1d array | |
ends = starts + lengths | |
# creates blank mask image 1d array | |
img = np.zeros(shape[0]*shape[1], dtype=np.uint8) | |
# sets mark pixles | |
for lo, hi in zip(starts, ends): | |
img[lo:hi] = 1 | |
# reshape as a 2d mask image | |
return img.reshape(shape).T # Needed to align to RLE direction | |
def masks_as_image(in_mask_list, shape=SHAPE): | |
'''Take the individual ship masks and create a single mask array for all ships | |
in_mask_list: pd Series: [idx0] [RLE string0]... | |
Returns numpy array as (shape.h, sahpe.w, 1) | |
''' | |
all_masks = np.zeros(shape, dtype = np.int16) | |
# if isinstance(in_mask_list, list): | |
for mask in in_mask_list: | |
if isinstance(mask, str): | |
all_masks += rle_decode(mask) | |
return np.expand_dims(all_masks, -1) | |
def shows_decode_encode(image_id, path=TRAIN_DATA_PATH): | |
'''Show image, ship mask, and encoded/decoded result | |
''' | |
fig, axarr = plt.subplots(1, 3, figsize = (10, 5)) | |
# image | |
img_0 = imread(os.path.join(path, image_id)) | |
axarr[0].imshow(img_0) | |
axarr[0].set_title(image_id) | |
# input mask | |
rle_1 = masks.query('ImageId=="{}"'.format(image_id))['EncodedPixels'] | |
img_1 = masks_as_image(rle_1) | |
# takes 2d array (shape.h, sahpe.w) | |
axarr[1].imshow(img_1[:, :, 0]) | |
axarr[1].set_title('Ship Mask') | |
# encode & decode mask | |
rle_2 = multi_rle_encode(img_1) | |
img_2 = masks_as_image(rle_2) | |
axarr[2].imshow(img_0) | |
axarr[2].imshow(img_2[:, :, 0], alpha=0.3) | |
axarr[2].set_title('Encoded & Decoded Mask') | |
plt.show() | |
print(image_id , ' Check Decoding->Encoding', | |
'RLE_0:', len(rle_1), '->', | |
'RLE_1:', len(rle_2)) | |
# inspects a few example | |
shows_decode_encode('000155de5.jpg') | |
shows_decode_encode('00003e153.jpg') | |
print('It could be different when there is no mask.') | |
shows_decode_encode('00021ddc3.jpg') | |
print('It could be different when there are masks overlapped.') | |
# In[ ]: | |
# check if a mask has a ship | |
masks['ships'] = masks['EncodedPixels'].map(lambda encoded_pixels: 1 if isinstance(encoded_pixels, str) else 0) | |
# sum ship# by ImageId and create the unique image id/mask list | |
start_time = time.time() | |
unique_img_ids = masks.groupby('ImageId').agg({'ships': 'sum'}) | |
unique_img_ids['RleMaskList'] = masks.groupby('ImageId')['EncodedPixels'].apply(list) | |
unique_img_ids = unique_img_ids.reset_index() | |
end_time = time.time() - start_time | |
print("unique_img_ids groupby took: {}".format(end_time)) | |
# Only care image with ships | |
unique_img_ids = unique_img_ids[unique_img_ids['ships'] > 0] | |
unique_img_ids['ships'].hist() | |
unique_img_ids.sample(3) | |
# In[ ]: | |
# split to training & validation sets | |
from sklearn.model_selection import train_test_split | |
train_ids, val_ids = train_test_split(unique_img_ids, | |
test_size = TRAINING_VALIDATION_RATIO, | |
stratify = unique_img_ids['ships']) | |
print(train_ids.shape[0], 'training masks') | |
print(val_ids.shape[0], 'validation masks') | |
train_ids['ships'].hist() | |
val_ids['ships'].hist() | |
# In[ ]: | |
# if to clone Mask_R-CNN git when it exists | |
UPDATE_MASK_RCNN = False | |
os.chdir(WORKING_DIR) | |
if UPDATE_MASK_RCNN: | |
get_ipython().system('rm -rf {MASK_RCNN_PATH}') | |
# Downlaod Mask RCNN code to a local folder | |
if not os.path.exists(MASK_RCNN_PATH): | |
get_ipython().system(' git clone https://github.com/matterport/Mask_RCNN.git') | |
# get_ipython().system(' wget https://github.com/samlin001/Mask_RCNN/archive/master.zip -O Mask_RCNN-master.zip') | |
# get_ipython().system(" unzip Mask_RCNN-master.zip 'Mask_RCNN-master/mrcnn/*'") | |
# get_ipython().system(' rm Mask_RCNN-master.zip') | |
# Import Mask RCNN | |
sys.path.append(MASK_RCNN_PATH) # To find local version of the library | |
from mrcnn.config import Config | |
from mrcnn import utils | |
import mrcnn.model as modellib | |
from mrcnn import visualize | |
from mrcnn.model import log | |
# In[ ]: | |
class AirbusShipDetectionChallengeDataset(utils.Dataset): | |
"""Airbus Ship Detection Challenge Dataset | |
""" | |
def __init__(self, image_file_dir, ids, masks, image_width=IMAGE_WIDTH, image_height=IMAGE_HEIGHT): | |
super().__init__(self) | |
self.image_file_dir = image_file_dir | |
self.ids = ids | |
self.masks = masks | |
self.image_width = image_width | |
self.image_height = image_height | |
# Add classes | |
self.add_class(SHIP_CLASS_NAME, 1, SHIP_CLASS_NAME) | |
self.load_dataset() | |
def load_dataset(self): | |
"""Load dataset from the path | |
""" | |
# Add images | |
for index, row in self.ids.iterrows(): | |
image_id = row['ImageId'] | |
image_path = os.path.join(self.image_file_dir, image_id) | |
rle_mask_list = row['RleMaskList'] | |
#print(rle_mask_list) | |
self.add_image( | |
SHIP_CLASS_NAME, | |
image_id=image_id, | |
path=image_path, | |
width=self.image_width, height=self.image_height, | |
rle_mask_list=rle_mask_list) | |
def load_mask(self, image_id): | |
"""Generate instance masks for shapes of the given image ID. | |
""" | |
info = self.image_info[image_id] | |
rle_mask_list = info['rle_mask_list'] | |
mask_count = len(rle_mask_list) | |
mask = np.zeros([info['height'], info['width'], mask_count], | |
dtype=np.uint8) | |
i = 0 | |
for rel in rle_mask_list: | |
if isinstance(rel, str): | |
np.copyto(mask[:,:,i], rle_decode(rel)) | |
i += 1 | |
# Return mask, and array of class IDs of each instance. Since we have | |
# one class ID only, we return an array of 1s | |
return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32) | |
def image_reference(self, image_id): | |
"""Return the path of the image.""" | |
info = self.image_info[image_id] | |
if info['source'] == SHIP_CLASS_NAME: | |
return info['path'] | |
else: | |
super(self.__class__, self).image_reference(image_id) | |
# In[ ]: | |
class AirbusShipDetectionChallengeGPUConfig(Config): | |
""" | |
Configuration of Airbus Ship Detection Challenge Dataset | |
Overrides values in the base Config class. | |
From https://github.com/samlin001/Mask_RCNN/blob/master/mrcnn/config.py | |
""" | |
# https://www.kaggle.com/docs/kernels#technical-specifications | |
NAME = 'ASDC_GPU' | |
# NUMBER OF GPUs to use. | |
GPU_COUNT = 1 | |
IMAGES_PER_GPU = 2 | |
NUM_CLASSES = 2 # ship or background | |
IMAGE_MIN_DIM = IMAGE_WIDTH | |
IMAGE_MAX_DIM = IMAGE_WIDTH | |
STEPS_PER_EPOCH = 300 | |
VALIDATION_STEPS = 50 | |
SAVE_BEST_ONLY = True | |
# Minimum probability value to accept a detected instance | |
# ROIs below this threshold are skipped | |
DETECTION_MIN_CONFIDENCE = 0.95 | |
# Non-maximum suppression threshold for detection | |
# Keep it small to merge overlapping ROIs | |
DETECTION_NMS_THRESHOLD = 0.05 | |
config = AirbusShipDetectionChallengeGPUConfig() | |
config.display() | |
# In[ ]: | |
start_time = time.time() | |
# Training dataset. | |
dataset_train = AirbusShipDetectionChallengeDataset(image_file_dir=TRAIN_DATA_PATH, ids=train_ids, masks=masks) | |
dataset_train.prepare() | |
# Validation dataset | |
dataset_val = AirbusShipDetectionChallengeDataset(image_file_dir=TRAIN_DATA_PATH, ids=val_ids, masks=masks) | |
dataset_val.prepare() | |
# Load and display random samples | |
image_ids = np.random.choice(dataset_train.image_ids, 3) | |
for image_id in image_ids: | |
image = dataset_train.load_image(image_id) | |
mask, class_ids = dataset_train.load_mask(image_id) | |
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names, limit=1) | |
end_time = time.time() - start_time | |
print("dataset prepare: {}".format(end_time)) | |
# In[ ]: | |
start_time = time.time() | |
model = modellib.MaskRCNN(mode="training", config=config, model_dir=WORKING_DIR) | |
import errno | |
try: | |
weights_path = model.find_last() | |
load_weights = True | |
except FileNotFoundError: | |
# if there is no previous trained weights, load COCO | |
load_weights = True | |
weights_path = COCO_WEIGHTS_PATH | |
utils.download_trained_weights(weights_path) | |
if load_weights: | |
print("Loading weights: ", weights_path) | |
model.load_weights(weights_path, by_name=True, exclude=[ | |
"mrcnn_class_logits", "mrcnn_bbox_fc", | |
"mrcnn_bbox", "mrcnn_mask"]) | |
end_time = time.time() - start_time | |
print("loading weights: {}".format(end_time)) | |
# In[ ]: | |
"""Train the model.""" | |
start_time = time.time() | |
model.train(dataset_train, dataset_val, | |
learning_rate=config.LEARNING_RATE * 1.5, | |
epochs=21, | |
layers='all') | |
end_time = time.time() - start_time | |
print("Train model: {}".format(end_time)) | |
# In[ ]: | |
import tensorflow as tf | |
from tensorflow.python.framework import graph_util | |
from tensorflow.python.framework import graph_io | |
import keras | |
from keras import backend as K | |
#print(model.keras_model.output.name) | |
saver = tf.train.Saver() | |
saver.save(K.get_session(), os.path.join(OUTPUT_DIR, 'mask_rcnn_model.ckpt') | |
#import tensorflow as tf | |
# This doesn't work because the of variables that are not constants | |
# Save tf.keras model in HDF5 format. | |
#keras_file = "/notebooks/kaggle/output/keras_model.h5" | |
#tf.keras.models.save_model(model.keras_model, keras_file) | |
sess = K.get_session() | |
output_names=[out.op.name for out in model.keras_model.outputs] | |
constant_graph = graph_util.convert_variables_to_constants( | |
sess, | |
sess.graph.as_graph_def(), | |
output_names) | |
graph_io.write_graph(constant_graph, OUTPUT_DIR, "mask_rcnn_frozen_model.pb", as_text=False) | |
# In[ ]: | |
class InferenceConfig(AirbusShipDetectionChallengeGPUConfig): | |
GPU_COUNT = 1 | |
# 1 image for inference | |
IMAGES_PER_GPU = 1 | |
inference_config = InferenceConfig() | |
# create a model in inference mode | |
infer_model = modellib.MaskRCNN(mode="inference", | |
config=inference_config, | |
model_dir=WORKING_DIR) | |
model_path = infer_model.find_last() | |
# Load trained weights | |
print("Loading weights from ", model_path) | |
infer_model.load_weights(model_path, by_name=True) | |
# Test on a random image | |
image_id = np.random.choice(dataset_val.image_ids) | |
original_image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(dataset_val, inference_config, | |
image_id, use_mini_mask=False) | |
log("original_image", original_image) | |
log("image_meta", image_meta) | |
log("gt_class_id", gt_class_id) | |
log("gt_bbox", gt_bbox) | |
log("gt_mask", gt_mask) | |
visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, | |
dataset_train.class_names, figsize=(8, 8)) | |
results = infer_model.detect([original_image], verbose=1) | |
r = results[0] | |
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], | |
dataset_val.class_names, r['scores']) | |
# Compute VOC-Style mean Average Precision @ IoU=0.5 | |
# Running on a few images. Increase for better accuracy. | |
image_ids = np.random.choice(dataset_val.image_ids, 20) | |
APs = [] | |
inference_start = time.time() | |
for image_id in image_ids: | |
# Load image and ground truth data | |
image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(dataset_val, inference_config, | |
image_id, use_mini_mask=False) | |
molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0) | |
# Run object detection | |
results = infer_model.detect([image], verbose=1) | |
r = results[0] | |
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], | |
dataset_val.class_names, r['scores']) | |
# Compute AP | |
AP, precisions, recalls, overlaps = utils.compute_ap(gt_bbox, gt_class_id, gt_mask, | |
r["rois"], r["class_ids"], r["scores"], r['masks']) | |
APs.append(AP) | |
inference_end = time.time() | |
print('Inference Time: %0.2f Minutes'%((inference_end - inference_start)/60)) | |
print("mAP: ", np.mean(APs)) | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment