Last active
October 24, 2018 19:42
-
-
Save yang-zhang/17da223e1d511f6eb414be284115705f to your computer and use it in GitHub Desktop.
fastai-ImageMultiDataset-debug
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true, | |
"scrolled": true | |
}, | |
"cell_type": "code", | |
"source": "import fastai; fastai.__version__", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 1, | |
"data": { | |
"text/plain": "'1.0.12.dev0'" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "from fastai import *\nfrom fastai.vision import *", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "fns_trn=['1.jpg', '2.jpg']\nlbls_trn=[['a', 'b'], ['c']]\n\nfns_val=['3.jpg']\nlbls_val=[['a', 'd']]", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "train_ds=ImageMultiDataset(fns_trn, lbls_trn)", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "valid_ds=ImageMultiDataset(fns_val, lbls_val, train_ds.classes)", | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "error", | |
"ename": "KeyError", | |
"evalue": "'d'", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-5-bae797527f00>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvalid_ds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mImageMultiDataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfns_val\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlbls_val\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_ds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/data/git/fastai/fastai/vision/data.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, fns, labels, classes)\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m self.y = [np.array([self.class2idx[o] for o in l], dtype=np.int64)\n\u001b[0;32m--> 118\u001b[0;31m for l in labels]\n\u001b[0m\u001b[1;32m 119\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbinary_cross_entropy_with_logits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/data/git/fastai/fastai/vision/data.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m self.y = [np.array([self.class2idx[o] for o in l], dtype=np.int64)\n\u001b[0;32m--> 118\u001b[0;31m for l in labels]\n\u001b[0m\u001b[1;32m 119\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbinary_cross_entropy_with_logits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/data/git/fastai/fastai/vision/data.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclass2idx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mk\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 117\u001b[0;31m self.y = [np.array([self.class2idx[o] for o in l], dtype=np.int64)\n\u001b[0m\u001b[1;32m 118\u001b[0m for l in labels]\n\u001b[1;32m 119\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbinary_cross_entropy_with_logits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyError\u001b[0m: 'd'" | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Previously proposed fix in this closed [PR](https://github.com/fastai/fastai/pull/956). We would need a proper way to handle this. " | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "class NewImageMultiDataset(LabelDataset):\n \n def __init__(self, fns:FilePathList, labels:ImgLabels, classes:Optional[Classes]=None):\n self.classes = ifnone(classes, uniqueify(np.concatenate(labels)))\n self.class2idx = {v:k for k,v in enumerate(self.classes)}\n self.x = np.array(fns)\n self.y = [np.array([self.class2idx[o] for o in l], dtype=np.int64)\n for l in labels if l in self.classes]\n self.loss_func = F.binary_cross_entropy_with_logits", | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "train_ds=NewImageMultiDataset(fns_trn, lbls_trn)\nvalid_ds=NewImageMultiDataset(fns_val, lbls_val, train_ds.classes)", | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "train_ds, valid_ds", | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 8, | |
"data": { | |
"text/plain": "(NewImageMultiDataset of len 2, NewImageMultiDataset of len 1)" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "markdown", | |
"source": "More discussion on the forum\n\nhttps://forums.fast.ai/t/how-should-imagemultidataset-handle-labels-in-valid-ds-unseen-by-train-ds/27869/2?u=yang-zhang\n\nI think it is best to make no changes to the code. And when creating the `train_ds`, pass the union of the labels of training and validation labels." | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "lbs_all = set([j for i in lbls_trn+lbls_val for j in i])", | |
"execution_count": 16, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "train_ds=ImageMultiDataset(fns_trn, lbls_trn, lbs_all)", | |
"execution_count": 17, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "valid_ds=ImageMultiDataset(fns_val, lbls_val, lbs_all)", | |
"execution_count": 18, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "train_ds.classes, valid_ds.classes", | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 19, | |
"data": { | |
"text/plain": "({'a', 'b', 'c', 'd'}, {'a', 'b', 'c', 'd'})" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"_draft": { | |
"nbviewer_url": "https://gist.github.com/17da223e1d511f6eb414be284115705f" | |
}, | |
"gist": { | |
"id": "17da223e1d511f6eb414be284115705f", | |
"data": { | |
"description": "fastai-ImageMultiDataset-debug", | |
"public": true | |
} | |
}, | |
"kernelspec": { | |
"name": "conda-env-fastaidev-py", | |
"display_name": "Python [conda env:fastaidev]", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.7.0", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"toc": { | |
"nav_menu": {}, | |
"number_sections": true, | |
"sideBar": true, | |
"skip_h1_title": false, | |
"base_numbering": 1, | |
"title_cell": "Table of Contents", | |
"title_sidebar": "Contents", | |
"toc_cell": false, | |
"toc_position": {}, | |
"toc_section_display": true, | |
"toc_window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment