Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save dmc5179/12bb133e55052b4743459b72bb28d496 to your computer and use it in GitHub Desktop.
Save dmc5179/12bb133e55052b4743459b72bb28d496 to your computer and use it in GitHub Desktop.
Jupyter Nodebook and Python script for the Mask R-CNN Ship Detection Minimum Viable Model (Python 3.6)
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np # linear algebra\n",
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
"import matplotlib.pyplot as plt # plot & image processing\n",
"from skimage.morphology import label\n",
"from skimage.data import imread\n",
"\n",
"import os\n",
"import time\n",
"import sys\n",
"\n",
"# Configurations\n",
"# Split x ratio of train dataset for validation \n",
"TRAINING_VALIDATION_RATIO = 0.2\n",
"WORKING_DIR = '/notebooks/kaggle/working'\n",
"INPUT_DIR = '/notebooks/kaggle/input'\n",
"OUTPUT_DIR = '/notebooks/kaggle/output'\n",
"LOGS_DIR = os.path.join(WORKING_DIR, \"logs\")\n",
"TRAIN_DATA_PATH = os.path.join(INPUT_DIR, 'train_v2')\n",
"TEST_DATA_PATH = os.path.join(INPUT_DIR, 'test_v2')\n",
"SAMPLE_SUBMISSION_PATH = os.path.join(INPUT_DIR, 'sample_submission_v2.csv')\n",
"TRAIN_SHIP_SEGMENTATIONS_PATH = os.path.join(INPUT_DIR, 'train_ship_segmentations_v2.csv')\n",
"MASK_RCNN_PATH = os.path.join(WORKING_DIR, 'Mask_RCNN')\n",
"COCO_WEIGHTS_PATH = os.path.join(WORKING_DIR, \"mask_rcnn_coco.h5\")\n",
"SHIP_CLASS_NAME = 'ship'\n",
"IMAGE_WIDTH = 768\n",
"IMAGE_HEIGHT = 768\n",
"SHAPE = (IMAGE_WIDTH, IMAGE_HEIGHT)\n",
"\n",
"test_ds = os.listdir(TEST_DATA_PATH)\n",
"train_ds = os.listdir(TRAIN_DATA_PATH)\n",
"\n",
"print('Working Dir:', WORKING_DIR, os.listdir(WORKING_DIR))\n",
"print('Input Dir:', INPUT_DIR, os.listdir(INPUT_DIR))\n",
"print('train dataset from: {}, {}'.format(TRAIN_DATA_PATH, len(train_ds)))\n",
"print('test dataset from: {}, {}'.format(TRAIN_DATA_PATH, len(test_ds)))\n",
"print(TRAIN_SHIP_SEGMENTATIONS_PATH)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Read mask encording from the input CSV file \n",
"masks = pd.read_csv(TRAIN_SHIP_SEGMENTATIONS_PATH)\n",
"masks.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ref: https://www.kaggle.com/kmader/baseline-u-net-model-part-1\n",
"def multi_rle_encode(img):\n",
" labels = label(img[:, :, 0])\n",
" return [rle_encode(labels==k) for k in np.unique(labels[labels>0])]\n",
"\n",
"# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode\n",
"def rle_encode(img):\n",
" '''\n",
" img: numpy array, 1 - mask, 0 - background\n",
" Returns run length as string formated: [start0] [length0] [start1] [length1]... in 1d array\n",
" '''\n",
" # reshape to 1d array\n",
" pixels = img.T.flatten() # Needed to align to RLE direction\n",
" # pads the head & the tail with 0 & converts to ndarray\n",
" pixels = np.concatenate([[0], pixels, [0]])\n",
" # gets all start(0->1) & end(1->0) positions \n",
" runs = np.where(pixels[1:] != pixels[:-1])[0] + 1\n",
" # transforms end positions to lengths\n",
" runs[1::2] -= runs[::2]\n",
" # converts to the string formated: '[s0] [l0] [s1] [l1]...'\n",
" return ' '.join(str(x) for x in runs)\n",
"\n",
"def rle_decode(mask_rle, shape=SHAPE):\n",
" '''\n",
" mask_rle: run-length as string formated: [start0] [length0] [start1] [length1]... in 1d array\n",
" shape: (height,width) of array to return \n",
" Returns numpy array according to the shape, 1 - mask, 0 - background\n",
" '''\n",
" s = mask_rle.split()\n",
" # gets starts & lengths 1d arrays \n",
" starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]\n",
" starts -= 1\n",
" # gets ends 1d array\n",
" ends = starts + lengths\n",
" # creates blank mask image 1d array\n",
" img = np.zeros(shape[0]*shape[1], dtype=np.uint8)\n",
" # sets mark pixles\n",
" for lo, hi in zip(starts, ends):\n",
" img[lo:hi] = 1\n",
" # reshape as a 2d mask image\n",
" return img.reshape(shape).T # Needed to align to RLE direction\n",
"\n",
"def masks_as_image(in_mask_list, shape=SHAPE):\n",
" '''Take the individual ship masks and create a single mask array for all ships\n",
" in_mask_list: pd Series: [idx0] [RLE string0]...\n",
" Returns numpy array as (shape.h, sahpe.w, 1)\n",
" '''\n",
" all_masks = np.zeros(shape, dtype = np.int16)\n",
" # if isinstance(in_mask_list, list):\n",
" for mask in in_mask_list:\n",
" if isinstance(mask, str):\n",
" all_masks += rle_decode(mask)\n",
" return np.expand_dims(all_masks, -1)\n",
"\n",
"def shows_decode_encode(image_id, path=TRAIN_DATA_PATH):\n",
" '''Show image, ship mask, and encoded/decoded result\n",
" '''\n",
" fig, axarr = plt.subplots(1, 3, figsize = (10, 5))\n",
" # image\n",
" img_0 = imread(os.path.join(path, image_id))\n",
" axarr[0].imshow(img_0)\n",
" axarr[0].set_title(image_id)\n",
" \n",
" # input mask\n",
" rle_1 = masks.query('ImageId==\"{}\"'.format(image_id))['EncodedPixels']\n",
" img_1 = masks_as_image(rle_1)\n",
" # takes 2d array (shape.h, sahpe.w)\n",
" axarr[1].imshow(img_1[:, :, 0])\n",
" axarr[1].set_title('Ship Mask')\n",
" \n",
" # encode & decode mask \n",
" rle_2 = multi_rle_encode(img_1)\n",
" img_2 = masks_as_image(rle_2)\n",
" axarr[2].imshow(img_0)\n",
" axarr[2].imshow(img_2[:, :, 0], alpha=0.3)\n",
" axarr[2].set_title('Encoded & Decoded Mask')\n",
" plt.show()\n",
" print(image_id , ' Check Decoding->Encoding',\n",
" 'RLE_0:', len(rle_1), '->',\n",
" 'RLE_1:', len(rle_2))\n",
"\n",
"# inspects a few example\n",
"shows_decode_encode('000155de5.jpg')\n",
"shows_decode_encode('00003e153.jpg')\n",
"print('It could be different when there is no mask.')\n",
"shows_decode_encode('00021ddc3.jpg')\n",
"print('It could be different when there are masks overlapped.')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# check if a mask has a ship \n",
"masks['ships'] = masks['EncodedPixels'].map(lambda encoded_pixels: 1 if isinstance(encoded_pixels, str) else 0)\n",
"# sum ship# by ImageId and create the unique image id/mask list\n",
"start_time = time.time()\n",
"unique_img_ids = masks.groupby('ImageId').agg({'ships': 'sum'})\n",
"unique_img_ids['RleMaskList'] = masks.groupby('ImageId')['EncodedPixels'].apply(list)\n",
"unique_img_ids = unique_img_ids.reset_index()\n",
"end_time = time.time() - start_time\n",
"print(\"unique_img_ids groupby took: {}\".format(end_time))\n",
"\n",
"# Only care image with ships\n",
"unique_img_ids = unique_img_ids[unique_img_ids['ships'] > 0]\n",
"unique_img_ids['ships'].hist()\n",
"unique_img_ids.sample(3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# split to training & validation sets \n",
"from sklearn.model_selection import train_test_split\n",
"train_ids, val_ids = train_test_split(unique_img_ids, \n",
" test_size = TRAINING_VALIDATION_RATIO, \n",
" stratify = unique_img_ids['ships'])\n",
"print(train_ids.shape[0], 'training masks')\n",
"print(val_ids.shape[0], 'validation masks')\n",
"train_ids['ships'].hist()\n",
"val_ids['ships'].hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# if to clone Mask_R-CNN git when it exists \n",
"UPDATE_MASK_RCNN = False\n",
"\n",
"os.chdir(WORKING_DIR)\n",
"if UPDATE_MASK_RCNN:\n",
" !rm -rf {MASK_RCNN_PATH}\n",
"\n",
"# Downlaod Mask RCNN code to a local folder \n",
"if not os.path.exists(MASK_RCNN_PATH):\n",
" git clone https://github.com/matterport/Mask_RCNN.git",
"# ! wget https://github.com/samlin001/Mask_RCNN/archive/master.zip -O Mask_RCNN-master.zip\n",
"# ! unzip Mask_RCNN-master.zip 'Mask_RCNN-master/mrcnn/*'\n",
"# ! rm Mask_RCNN-master.zip\n",
"\n",
"# Import Mask RCNN\n",
"sys.path.append(MASK_RCNN_PATH) # To find local version of the library\n",
"from mrcnn.config import Config\n",
"from mrcnn import utils\n",
"import mrcnn.model as modellib\n",
"from mrcnn import visualize\n",
"from mrcnn.model import log "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class AirbusShipDetectionChallengeDataset(utils.Dataset):\n",
" \"\"\"Airbus Ship Detection Challenge Dataset\n",
" \"\"\"\n",
" def __init__(self, image_file_dir, ids, masks, image_width=IMAGE_WIDTH, image_height=IMAGE_HEIGHT):\n",
" super().__init__(self)\n",
" self.image_file_dir = image_file_dir\n",
" self.ids = ids\n",
" self.masks = masks\n",
" self.image_width = image_width\n",
" self.image_height = image_height\n",
" \n",
" # Add classes\n",
" self.add_class(SHIP_CLASS_NAME, 1, SHIP_CLASS_NAME)\n",
" self.load_dataset()\n",
" \n",
" def load_dataset(self):\n",
" \"\"\"Load dataset from the path\n",
" \"\"\"\n",
" # Add images\n",
" for index, row in self.ids.iterrows():\n",
" image_id = row['ImageId']\n",
" image_path = os.path.join(self.image_file_dir, image_id)\n",
" rle_mask_list = row['RleMaskList']\n",
" #print(rle_mask_list)\n",
" self.add_image(\n",
" SHIP_CLASS_NAME,\n",
" image_id=image_id,\n",
" path=image_path,\n",
" width=self.image_width, height=self.image_height,\n",
" rle_mask_list=rle_mask_list)\n",
"\n",
" def load_mask(self, image_id):\n",
" \"\"\"Generate instance masks for shapes of the given image ID.\n",
" \"\"\"\n",
" info = self.image_info[image_id]\n",
" rle_mask_list = info['rle_mask_list']\n",
" mask_count = len(rle_mask_list)\n",
" mask = np.zeros([info['height'], info['width'], mask_count],\n",
" dtype=np.uint8)\n",
" i = 0\n",
" for rel in rle_mask_list:\n",
" if isinstance(rel, str):\n",
" np.copyto(mask[:,:,i], rle_decode(rel))\n",
" i += 1\n",
" \n",
" # Return mask, and array of class IDs of each instance. Since we have\n",
" # one class ID only, we return an array of 1s\n",
" return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)\n",
" \n",
" def image_reference(self, image_id):\n",
" \"\"\"Return the path of the image.\"\"\"\n",
" info = self.image_info[image_id]\n",
" if info['source'] == SHIP_CLASS_NAME:\n",
" return info['path']\n",
" else:\n",
" super(self.__class__, self).image_reference(image_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class AirbusShipDetectionChallengeGPUConfig(Config):\n",
" \"\"\"\n",
" Configuration of Airbus Ship Detection Challenge Dataset \n",
" Overrides values in the base Config class.\n",
" From https://github.com/samlin001/Mask_RCNN/blob/master/mrcnn/config.py\n",
" \"\"\"\n",
" # https://www.kaggle.com/docs/kernels#technical-specifications\n",
" NAME = 'ASDC_GPU'\n",
" # NUMBER OF GPUs to use.\n",
" GPU_COUNT = 1\n",
" IMAGES_PER_GPU = 2\n",
" \n",
" NUM_CLASSES = 2 # ship or background\n",
" IMAGE_MIN_DIM = IMAGE_WIDTH\n",
" IMAGE_MAX_DIM = IMAGE_WIDTH\n",
" STEPS_PER_EPOCH = 300\n",
" VALIDATION_STEPS = 50\n",
" SAVE_BEST_ONLY = True\n",
" \n",
" # Minimum probability value to accept a detected instance\n",
" # ROIs below this threshold are skipped\n",
" DETECTION_MIN_CONFIDENCE = 0.95\n",
"\n",
" # Non-maximum suppression threshold for detection\n",
" # Keep it small to merge overlapping ROIs \n",
" DETECTION_NMS_THRESHOLD = 0.05\n",
"\n",
" \n",
"config = AirbusShipDetectionChallengeGPUConfig()\n",
"config.display()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"start_time = time.time()\n",
"# Training dataset.\n",
"dataset_train = AirbusShipDetectionChallengeDataset(image_file_dir=TRAIN_DATA_PATH, ids=train_ids, masks=masks)\n",
"dataset_train.prepare()\n",
"\n",
"# Validation dataset\n",
"dataset_val = AirbusShipDetectionChallengeDataset(image_file_dir=TRAIN_DATA_PATH, ids=val_ids, masks=masks)\n",
"dataset_val.prepare()\n",
"\n",
"# Load and display random samples\n",
"image_ids = np.random.choice(dataset_train.image_ids, 3)\n",
"for image_id in image_ids:\n",
" image = dataset_train.load_image(image_id)\n",
" mask, class_ids = dataset_train.load_mask(image_id)\n",
" visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names, limit=1)\n",
"\n",
"end_time = time.time() - start_time\n",
"print(\"dataset prepare: {}\".format(end_time))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"start_time = time.time()\n",
"model = modellib.MaskRCNN(mode=\"training\", config=config, model_dir=WORKING_DIR)\n",
"\n",
"import errno\n",
"try:\n",
" weights_path = model.find_last()\n",
" load_weights = True\n",
"except FileNotFoundError:\n",
" # if there is no previous trained weights, load COCO\n",
" load_weights = True\n",
" weights_path = COCO_WEIGHTS_PATH\n",
" utils.download_trained_weights(weights_path)\n",
" \n",
"if load_weights:\n",
" print(\"Loading weights: \", weights_path)\n",
" model.load_weights(weights_path, by_name=True, exclude=[\n",
" \"mrcnn_class_logits\", \"mrcnn_bbox_fc\",\n",
" \"mrcnn_bbox\", \"mrcnn_mask\"])\n",
"\n",
"end_time = time.time() - start_time\n",
"print(\"loading weights: {}\".format(end_time))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"Train the model.\"\"\"\n",
"start_time = time.time() \n",
"model.train(dataset_train, dataset_val,\n",
" learning_rate=config.LEARNING_RATE * 1.5,\n",
" epochs=21,\n",
" layers='all')\n",
"end_time = time.time() - start_time\n",
"print(\"Train model: {}\".format(end_time))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"from tensorflow.python.framework import graph_util\n",
"from tensorflow.python.framework import graph_io\n",
"import keras\n",
"from keras import backend as K\n",
"\n",
"#print(model.keras_model.output.name)\n",
"saver = tf.train.Saver()\n",
"saver.save(K.get_session(), os.path.join(OUTPUT_DIR, 'mask_rcnn_model.ckpt')\n",
"\n",
"#import tensorflow as tf\n",
"# This doesn't work because the of variables that are not constants\n",
"# Save tf.keras model in HDF5 format.\n",
"#keras_file = \"/notebooks/kaggle/output/keras_model.h5\"\n",
"\n",
"#tf.keras.models.save_model(model.keras_model, keras_file)\n",
"\n",
"\n",
"sess = K.get_session()\n",
"output_names=[out.op.name for out in model.keras_model.outputs]\n",
"constant_graph = graph_util.convert_variables_to_constants(\n",
" sess,\n",
" sess.graph.as_graph_def(),\n",
" output_names)\n",
"\n",
"graph_io.write_graph(constant_graph, OUTPUT_DIR, \"mask_rcnn_frozen_model.pb\", as_text=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class InferenceConfig(AirbusShipDetectionChallengeGPUConfig):\n",
" GPU_COUNT = 1\n",
" # 1 image for inference \n",
" IMAGES_PER_GPU = 1\n",
"\n",
"inference_config = InferenceConfig()\n",
"\n",
"# create a model in inference mode\n",
"infer_model = modellib.MaskRCNN(mode=\"inference\", \n",
" config=inference_config,\n",
" model_dir=WORKING_DIR)\n",
"\n",
"model_path = infer_model.find_last()\n",
"\n",
"# Load trained weights\n",
"print(\"Loading weights from \", model_path)\n",
"infer_model.load_weights(model_path, by_name=True)\n",
"\n",
"\n",
"# Test on a random image\n",
"image_id = np.random.choice(dataset_val.image_ids)\n",
"original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\\\n",
" modellib.load_image_gt(dataset_val, inference_config, \n",
" image_id, use_mini_mask=False)\n",
"\n",
"log(\"original_image\", original_image)\n",
"log(\"image_meta\", image_meta)\n",
"log(\"gt_class_id\", gt_class_id)\n",
"log(\"gt_bbox\", gt_bbox)\n",
"log(\"gt_mask\", gt_mask)\n",
"\n",
"visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, \n",
" dataset_train.class_names, figsize=(8, 8))\n",
"\n",
"results = infer_model.detect([original_image], verbose=1)\n",
"\n",
"r = results[0]\n",
"visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], \n",
" dataset_val.class_names, r['scores'])\n",
"\n",
"# Compute VOC-Style mean Average Precision @ IoU=0.5\n",
"# Running on a few images. Increase for better accuracy.\n",
"image_ids = np.random.choice(dataset_val.image_ids, 20)\n",
"APs = []\n",
"inference_start = time.time()\n",
"for image_id in image_ids:\n",
" # Load image and ground truth data\n",
" image, image_meta, gt_class_id, gt_bbox, gt_mask =\\\n",
" modellib.load_image_gt(dataset_val, inference_config,\n",
" image_id, use_mini_mask=False)\n",
" molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)\n",
" # Run object detection\n",
" results = infer_model.detect([image], verbose=1)\n",
" r = results[0]\n",
" visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], \n",
" dataset_val.class_names, r['scores'])\n",
"\n",
" # Compute AP\n",
" AP, precisions, recalls, overlaps =\\\n",
" utils.compute_ap(gt_bbox, gt_class_id, gt_mask,\n",
" r[\"rois\"], r[\"class_ids\"], r[\"scores\"], r['masks'])\n",
" APs.append(AP)\n",
"\n",
"inference_end = time.time()\n",
"print('Inference Time: %0.2f Minutes'%((inference_end - inference_start)/60))\n",
"print(\"mAP: \", np.mean(APs))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # plot & image processing
from skimage.morphology import label
from skimage.data import imread
import os
import time
import sys
# Configurations
# Split x ratio of train dataset for validation
TRAINING_VALIDATION_RATIO = 0.2
WORKING_DIR = '/notebooks/kaggle/working'
INPUT_DIR = '/notebooks/kaggle/input'
OUTPUT_DIR = '/notebooks/kaggle/output'
LOGS_DIR = os.path.join(WORKING_DIR, "logs")
TRAIN_DATA_PATH = os.path.join(INPUT_DIR, 'train_v2')
TEST_DATA_PATH = os.path.join(INPUT_DIR, 'test_v2')
SAMPLE_SUBMISSION_PATH = os.path.join(INPUT_DIR, 'sample_submission_v2.csv')
TRAIN_SHIP_SEGMENTATIONS_PATH = os.path.join(INPUT_DIR, 'train_ship_segmentations_v2.csv')
MASK_RCNN_PATH = os.path.join(WORKING_DIR, 'Mask_RCNN')
COCO_WEIGHTS_PATH = os.path.join(WORKING_DIR, "mask_rcnn_coco.h5")
SHIP_CLASS_NAME = 'ship'
IMAGE_WIDTH = 768
IMAGE_HEIGHT = 768
SHAPE = (IMAGE_WIDTH, IMAGE_HEIGHT)
test_ds = os.listdir(TEST_DATA_PATH)
train_ds = os.listdir(TRAIN_DATA_PATH)
print('Working Dir:', WORKING_DIR, os.listdir(WORKING_DIR))
print('Input Dir:', INPUT_DIR, os.listdir(INPUT_DIR))
print('train dataset from: {}, {}'.format(TRAIN_DATA_PATH, len(train_ds)))
print('test dataset from: {}, {}'.format(TRAIN_DATA_PATH, len(test_ds)))
print(TRAIN_SHIP_SEGMENTATIONS_PATH)
# In[ ]:
# Read mask encording from the input CSV file
masks = pd.read_csv(TRAIN_SHIP_SEGMENTATIONS_PATH)
masks.head()
# In[ ]:
# ref: https://www.kaggle.com/kmader/baseline-u-net-model-part-1
def multi_rle_encode(img):
labels = label(img[:, :, 0])
return [rle_encode(labels==k) for k in np.unique(labels[labels>0])]
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode
def rle_encode(img):
'''
img: numpy array, 1 - mask, 0 - background
Returns run length as string formated: [start0] [length0] [start1] [length1]... in 1d array
'''
# reshape to 1d array
pixels = img.T.flatten() # Needed to align to RLE direction
# pads the head & the tail with 0 & converts to ndarray
pixels = np.concatenate([[0], pixels, [0]])
# gets all start(0->1) & end(1->0) positions
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
# transforms end positions to lengths
runs[1::2] -= runs[::2]
# converts to the string formated: '[s0] [l0] [s1] [l1]...'
return ' '.join(str(x) for x in runs)
def rle_decode(mask_rle, shape=SHAPE):
'''
mask_rle: run-length as string formated: [start0] [length0] [start1] [length1]... in 1d array
shape: (height,width) of array to return
Returns numpy array according to the shape, 1 - mask, 0 - background
'''
s = mask_rle.split()
# gets starts & lengths 1d arrays
starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]
starts -= 1
# gets ends 1d array
ends = starts + lengths
# creates blank mask image 1d array
img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
# sets mark pixles
for lo, hi in zip(starts, ends):
img[lo:hi] = 1
# reshape as a 2d mask image
return img.reshape(shape).T # Needed to align to RLE direction
def masks_as_image(in_mask_list, shape=SHAPE):
'''Take the individual ship masks and create a single mask array for all ships
in_mask_list: pd Series: [idx0] [RLE string0]...
Returns numpy array as (shape.h, sahpe.w, 1)
'''
all_masks = np.zeros(shape, dtype = np.int16)
# if isinstance(in_mask_list, list):
for mask in in_mask_list:
if isinstance(mask, str):
all_masks += rle_decode(mask)
return np.expand_dims(all_masks, -1)
def shows_decode_encode(image_id, path=TRAIN_DATA_PATH):
'''Show image, ship mask, and encoded/decoded result
'''
fig, axarr = plt.subplots(1, 3, figsize = (10, 5))
# image
img_0 = imread(os.path.join(path, image_id))
axarr[0].imshow(img_0)
axarr[0].set_title(image_id)
# input mask
rle_1 = masks.query('ImageId=="{}"'.format(image_id))['EncodedPixels']
img_1 = masks_as_image(rle_1)
# takes 2d array (shape.h, sahpe.w)
axarr[1].imshow(img_1[:, :, 0])
axarr[1].set_title('Ship Mask')
# encode & decode mask
rle_2 = multi_rle_encode(img_1)
img_2 = masks_as_image(rle_2)
axarr[2].imshow(img_0)
axarr[2].imshow(img_2[:, :, 0], alpha=0.3)
axarr[2].set_title('Encoded & Decoded Mask')
plt.show()
print(image_id , ' Check Decoding->Encoding',
'RLE_0:', len(rle_1), '->',
'RLE_1:', len(rle_2))
# inspects a few example
shows_decode_encode('000155de5.jpg')
shows_decode_encode('00003e153.jpg')
print('It could be different when there is no mask.')
shows_decode_encode('00021ddc3.jpg')
print('It could be different when there are masks overlapped.')
# In[ ]:
# check if a mask has a ship
masks['ships'] = masks['EncodedPixels'].map(lambda encoded_pixels: 1 if isinstance(encoded_pixels, str) else 0)
# sum ship# by ImageId and create the unique image id/mask list
start_time = time.time()
unique_img_ids = masks.groupby('ImageId').agg({'ships': 'sum'})
unique_img_ids['RleMaskList'] = masks.groupby('ImageId')['EncodedPixels'].apply(list)
unique_img_ids = unique_img_ids.reset_index()
end_time = time.time() - start_time
print("unique_img_ids groupby took: {}".format(end_time))
# Only care image with ships
unique_img_ids = unique_img_ids[unique_img_ids['ships'] > 0]
unique_img_ids['ships'].hist()
unique_img_ids.sample(3)
# In[ ]:
# split to training & validation sets
from sklearn.model_selection import train_test_split
train_ids, val_ids = train_test_split(unique_img_ids,
test_size = TRAINING_VALIDATION_RATIO,
stratify = unique_img_ids['ships'])
print(train_ids.shape[0], 'training masks')
print(val_ids.shape[0], 'validation masks')
train_ids['ships'].hist()
val_ids['ships'].hist()
# In[ ]:
# if to clone Mask_R-CNN git when it exists
UPDATE_MASK_RCNN = False
os.chdir(WORKING_DIR)
if UPDATE_MASK_RCNN:
get_ipython().system('rm -rf {MASK_RCNN_PATH}')
# Downlaod Mask RCNN code to a local folder
if not os.path.exists(MASK_RCNN_PATH):
get_ipython().system(' git clone https://github.com/matterport/Mask_RCNN.git')
# get_ipython().system(' wget https://github.com/samlin001/Mask_RCNN/archive/master.zip -O Mask_RCNN-master.zip')
# get_ipython().system(" unzip Mask_RCNN-master.zip 'Mask_RCNN-master/mrcnn/*'")
# get_ipython().system(' rm Mask_RCNN-master.zip')
# Import Mask RCNN
sys.path.append(MASK_RCNN_PATH) # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
# In[ ]:
class AirbusShipDetectionChallengeDataset(utils.Dataset):
"""Airbus Ship Detection Challenge Dataset
"""
def __init__(self, image_file_dir, ids, masks, image_width=IMAGE_WIDTH, image_height=IMAGE_HEIGHT):
super().__init__(self)
self.image_file_dir = image_file_dir
self.ids = ids
self.masks = masks
self.image_width = image_width
self.image_height = image_height
# Add classes
self.add_class(SHIP_CLASS_NAME, 1, SHIP_CLASS_NAME)
self.load_dataset()
def load_dataset(self):
"""Load dataset from the path
"""
# Add images
for index, row in self.ids.iterrows():
image_id = row['ImageId']
image_path = os.path.join(self.image_file_dir, image_id)
rle_mask_list = row['RleMaskList']
#print(rle_mask_list)
self.add_image(
SHIP_CLASS_NAME,
image_id=image_id,
path=image_path,
width=self.image_width, height=self.image_height,
rle_mask_list=rle_mask_list)
def load_mask(self, image_id):
"""Generate instance masks for shapes of the given image ID.
"""
info = self.image_info[image_id]
rle_mask_list = info['rle_mask_list']
mask_count = len(rle_mask_list)
mask = np.zeros([info['height'], info['width'], mask_count],
dtype=np.uint8)
i = 0
for rel in rle_mask_list:
if isinstance(rel, str):
np.copyto(mask[:,:,i], rle_decode(rel))
i += 1
# Return mask, and array of class IDs of each instance. Since we have
# one class ID only, we return an array of 1s
return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
def image_reference(self, image_id):
"""Return the path of the image."""
info = self.image_info[image_id]
if info['source'] == SHIP_CLASS_NAME:
return info['path']
else:
super(self.__class__, self).image_reference(image_id)
# In[ ]:
class AirbusShipDetectionChallengeGPUConfig(Config):
"""
Configuration of Airbus Ship Detection Challenge Dataset
Overrides values in the base Config class.
From https://github.com/samlin001/Mask_RCNN/blob/master/mrcnn/config.py
"""
# https://www.kaggle.com/docs/kernels#technical-specifications
NAME = 'ASDC_GPU'
# NUMBER OF GPUs to use.
GPU_COUNT = 1
IMAGES_PER_GPU = 2
NUM_CLASSES = 2 # ship or background
IMAGE_MIN_DIM = IMAGE_WIDTH
IMAGE_MAX_DIM = IMAGE_WIDTH
STEPS_PER_EPOCH = 300
VALIDATION_STEPS = 50
SAVE_BEST_ONLY = True
# Minimum probability value to accept a detected instance
# ROIs below this threshold are skipped
DETECTION_MIN_CONFIDENCE = 0.95
# Non-maximum suppression threshold for detection
# Keep it small to merge overlapping ROIs
DETECTION_NMS_THRESHOLD = 0.05
config = AirbusShipDetectionChallengeGPUConfig()
config.display()
# In[ ]:
start_time = time.time()
# Training dataset.
dataset_train = AirbusShipDetectionChallengeDataset(image_file_dir=TRAIN_DATA_PATH, ids=train_ids, masks=masks)
dataset_train.prepare()
# Validation dataset
dataset_val = AirbusShipDetectionChallengeDataset(image_file_dir=TRAIN_DATA_PATH, ids=val_ids, masks=masks)
dataset_val.prepare()
# Load and display random samples
image_ids = np.random.choice(dataset_train.image_ids, 3)
for image_id in image_ids:
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names, limit=1)
end_time = time.time() - start_time
print("dataset prepare: {}".format(end_time))
# In[ ]:
start_time = time.time()
model = modellib.MaskRCNN(mode="training", config=config, model_dir=WORKING_DIR)
import errno
try:
weights_path = model.find_last()
load_weights = True
except FileNotFoundError:
# if there is no previous trained weights, load COCO
load_weights = True
weights_path = COCO_WEIGHTS_PATH
utils.download_trained_weights(weights_path)
if load_weights:
print("Loading weights: ", weights_path)
model.load_weights(weights_path, by_name=True, exclude=[
"mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
end_time = time.time() - start_time
print("loading weights: {}".format(end_time))
# In[ ]:
"""Train the model."""
start_time = time.time()
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE * 1.5,
epochs=21,
layers='all')
end_time = time.time() - start_time
print("Train model: {}".format(end_time))
# In[ ]:
import tensorflow as tf
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
import keras
from keras import backend as K
#print(model.keras_model.output.name)
saver = tf.train.Saver()
saver.save(K.get_session(), os.path.join(OUTPUT_DIR, 'mask_rcnn_model.ckpt')
#import tensorflow as tf
# This doesn't work because the of variables that are not constants
# Save tf.keras model in HDF5 format.
#keras_file = "/notebooks/kaggle/output/keras_model.h5"
#tf.keras.models.save_model(model.keras_model, keras_file)
sess = K.get_session()
output_names=[out.op.name for out in model.keras_model.outputs]
constant_graph = graph_util.convert_variables_to_constants(
sess,
sess.graph.as_graph_def(),
output_names)
graph_io.write_graph(constant_graph, OUTPUT_DIR, "mask_rcnn_frozen_model.pb", as_text=False)
# In[ ]:
class InferenceConfig(AirbusShipDetectionChallengeGPUConfig):
GPU_COUNT = 1
# 1 image for inference
IMAGES_PER_GPU = 1
inference_config = InferenceConfig()
# create a model in inference mode
infer_model = modellib.MaskRCNN(mode="inference",
config=inference_config,
model_dir=WORKING_DIR)
model_path = infer_model.find_last()
# Load trained weights
print("Loading weights from ", model_path)
infer_model.load_weights(model_path, by_name=True)
# Test on a random image
image_id = np.random.choice(dataset_val.image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(dataset_val, inference_config,
image_id, use_mini_mask=False)
log("original_image", original_image)
log("image_meta", image_meta)
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)
visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id,
dataset_train.class_names, figsize=(8, 8))
results = infer_model.detect([original_image], verbose=1)
r = results[0]
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'],
dataset_val.class_names, r['scores'])
# Compute VOC-Style mean Average Precision @ IoU=0.5
# Running on a few images. Increase for better accuracy.
image_ids = np.random.choice(dataset_val.image_ids, 20)
APs = []
inference_start = time.time()
for image_id in image_ids:
# Load image and ground truth data
image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(dataset_val, inference_config,
image_id, use_mini_mask=False)
molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
# Run object detection
results = infer_model.detect([image], verbose=1)
r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
dataset_val.class_names, r['scores'])
# Compute AP
AP, precisions, recalls, overlaps = utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
r["rois"], r["class_ids"], r["scores"], r['masks'])
APs.append(AP)
inference_end = time.time()
print('Inference Time: %0.2f Minutes'%((inference_end - inference_start)/60))
print("mAP: ", np.mean(APs))
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment