Skip to content

Instantly share code, notes, and snippets.

@yang-zhang
Last active October 24, 2018 19:42
Show Gist options
  • Save yang-zhang/17da223e1d511f6eb414be284115705f to your computer and use it in GitHub Desktop.
Save yang-zhang/17da223e1d511f6eb414be284115705f to your computer and use it in GitHub Desktop.
fastai-ImageMultiDataset-debug
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true,
"scrolled": true
},
"cell_type": "code",
"source": "import fastai; fastai.__version__",
"execution_count": 1,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 1,
"data": {
"text/plain": "'1.0.12.dev0'"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "from fastai import *\nfrom fastai.vision import *",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "fns_trn=['1.jpg', '2.jpg']\nlbls_trn=[['a', 'b'], ['c']]\n\nfns_val=['3.jpg']\nlbls_val=[['a', 'd']]",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "train_ds=ImageMultiDataset(fns_trn, lbls_trn)",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "valid_ds=ImageMultiDataset(fns_val, lbls_val, train_ds.classes)",
"execution_count": 5,
"outputs": [
{
"output_type": "error",
"ename": "KeyError",
"evalue": "'d'",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-bae797527f00>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mvalid_ds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mImageMultiDataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfns_val\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlbls_val\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_ds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/data/git/fastai/fastai/vision/data.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, fns, labels, classes)\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m self.y = [np.array([self.class2idx[o] for o in l], dtype=np.int64)\n\u001b[0;32m--> 118\u001b[0;31m for l in labels]\n\u001b[0m\u001b[1;32m 119\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbinary_cross_entropy_with_logits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/data/git/fastai/fastai/vision/data.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m self.y = [np.array([self.class2idx[o] for o in l], dtype=np.int64)\n\u001b[0;32m--> 118\u001b[0;31m for l in labels]\n\u001b[0m\u001b[1;32m 119\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbinary_cross_entropy_with_logits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/data/git/fastai/fastai/vision/data.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclass2idx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mk\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclasses\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 116\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 117\u001b[0;31m self.y = [np.array([self.class2idx[o] for o in l], dtype=np.int64)\n\u001b[0m\u001b[1;32m 118\u001b[0m for l in labels]\n\u001b[1;32m 119\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_func\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbinary_cross_entropy_with_logits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyError\u001b[0m: 'd'"
]
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Previously proposed fix in this closed [PR](https://github.com/fastai/fastai/pull/956). We would need a proper way to handle this. "
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "class NewImageMultiDataset(LabelDataset):\n \n def __init__(self, fns:FilePathList, labels:ImgLabels, classes:Optional[Classes]=None):\n self.classes = ifnone(classes, uniqueify(np.concatenate(labels)))\n self.class2idx = {v:k for k,v in enumerate(self.classes)}\n self.x = np.array(fns)\n self.y = [np.array([self.class2idx[o] for o in l], dtype=np.int64)\n for l in labels if l in self.classes]\n self.loss_func = F.binary_cross_entropy_with_logits",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "train_ds=NewImageMultiDataset(fns_trn, lbls_trn)\nvalid_ds=NewImageMultiDataset(fns_val, lbls_val, train_ds.classes)",
"execution_count": 7,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "train_ds, valid_ds",
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 8,
"data": {
"text/plain": "(NewImageMultiDataset of len 2, NewImageMultiDataset of len 1)"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "markdown",
"source": "More discussion on the forum\n\nhttps://forums.fast.ai/t/how-should-imagemultidataset-handle-labels-in-valid-ds-unseen-by-train-ds/27869/2?u=yang-zhang\n\nI think it is best to make no changes to the code. And when creating the `train_ds`, pass the union of the labels of training and validation labels."
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "lbs_all = set([j for i in lbls_trn+lbls_val for j in i])",
"execution_count": 16,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "train_ds=ImageMultiDataset(fns_trn, lbls_trn, lbs_all)",
"execution_count": 17,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "valid_ds=ImageMultiDataset(fns_val, lbls_val, lbs_all)",
"execution_count": 18,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "train_ds.classes, valid_ds.classes",
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"execution_count": 19,
"data": {
"text/plain": "({'a', 'b', 'c', 'd'}, {'a', 'b', 'c', 'd'})"
},
"metadata": {}
}
]
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/17da223e1d511f6eb414be284115705f"
},
"gist": {
"id": "17da223e1d511f6eb414be284115705f",
"data": {
"description": "fastai-ImageMultiDataset-debug",
"public": true
}
},
"kernelspec": {
"name": "conda-env-fastaidev-py",
"display_name": "Python [conda env:fastaidev]",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.7.0",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"base_numbering": 1,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment