omad · March 21, 2018 01:33
diff --git a/Grouping Datasets by Solar Day.ipynb b/Grouping Datasets by Solar Day.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# How to manually group datasets by _solar day_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import datacube\n",
    "from datacube.api.query import query_group_by\n",
    "\n",
    "dc = datacube.Datacube()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## First, run a query to find all the relevant datasets.\n",
    "It may be better to search for \"nbar_scene\", as there'll be less contributing datasets for a particular area.\n",
    "\n",
    "This returns a python `list` of datasets, in no particular order."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Dataset <id=7f303131-955d-458d-a6ab-d5d07a9aad49 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170107235316500000_v1508306845.nc>,\n",
       " Dataset <id=89a2e0cb-3b2f-42c8-913b-adaa6bb65a6b type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170107235316500000_v1508439326.nc>,\n",
       " Dataset <id=bee1bea1-0b7c-44bf-9156-b3011cc2bb06 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170107235316500000_v1508439326.nc>,\n",
       " Dataset <id=b9649694-4777-4403-8450-a876e378d2d0 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170123235311500000_v1508306845.nc>,\n",
       " Dataset <id=1b614cb8-ca99-4cbd-9071-382dda2b8ce7 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170123235311500000_v1508306845.nc>,\n",
       " Dataset <id=c8868edb-1ca9-4ee2-b427-0db421b78e34 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170123235311500000_v1508439326.nc>,\n",
       " Dataset <id=81fe18d8-f9d6-48d3-bd78-d42982daad4d type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-51/LS8_OLI_NBAR_3577_13_-51_20170123235311500000_v1508439326.nc>]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datasets = dc.find_datasets(product='ls8_nbar_albers', time=('2017-01-01', '2017-02-01'), lon=(147, 148), lat=(-45, -46))\n",
    "\n",
    "datasets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## The use the `group_datasets` method, to group by solar day"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\u001b[0;31mSignature:\u001b[0m \u001b[0mdc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroup_datasets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdatasets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroup_by\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;31mDocstring:\u001b[0m\n",
       "Group datasets along defined non-spatial dimensions (ie. time).\n",
       "\n",
       ":param datasets: a list of datasets, typically from :meth:`find_datasets`\n",
       ":param GroupBy group_by: Contains:\n",
       "    - a function that returns a label for a dataset\n",
       "    - name of the new dimension\n",
       "    - unit for the new dimension\n",
       "    - function to sort by before grouping\n",
       ":rtype: xarray.DataArray\n",
       "\n",
       ".. seealso:: :meth:`find_datasets`, :meth:`load_data`, :meth:`query_group_by`\n",
       "\u001b[0;31mFile:\u001b[0m      /g/data/v10/public/modules/dea/20180309/lib/python3.6/site-packages/datacube/api/core.py\n",
       "\u001b[0;31mType:\u001b[0m      function\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "dc.group_datasets?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<xarray.DataArray (time: 2)>\n",
       "array([(Dataset <id=7f303131-955d-458d-a6ab-d5d07a9aad49 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170107235316500000_v1508306845.nc>, Dataset <id=89a2e0cb-3b2f-42c8-913b-adaa6bb65a6b type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170107235316500000_v1508439326.nc>, Dataset <id=bee1bea1-0b7c-44bf-9156-b3011cc2bb06 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170107235316500000_v1508439326.nc>),\n",
       "       (Dataset <id=b9649694-4777-4403-8450-a876e378d2d0 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170123235311500000_v1508306845.nc>, Dataset <id=1b614cb8-ca99-4cbd-9071-382dda2b8ce7 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170123235311500000_v1508306845.nc>, Dataset <id=c8868edb-1ca9-4ee2-b427-0db421b78e34 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170123235311500000_v1508439326.nc>, Dataset <id=81fe18d8-f9d6-48d3-bd78-d42982daad4d type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-51/LS8_OLI_NBAR_3577_13_-51_20170123235311500000_v1508439326.nc>)],\n",
       "      dtype=object)\n",
       "Coordinates:\n",
       "  * time     (time) datetime64[ns] 2017-01-07T23:53:16.500000 ..."
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group_by_solarday = query_group_by('solar_day')\n",
    "grouped = dc.group_datasets(datasets, group_by=group_by_solarday)\n",
    "grouped"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## This has returned a rather awkward data structure, which is an `xarray.DataArray`, with a time dimension, where each element of the array is the set of data which contributes to that *grouped* observation time\n",
    "\n",
    "We can see what the times are that we have ended up with."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['2017-01-07T23:53:16.500000000', '2017-01-23T23:53:11.500000000'],\n",
       "      dtype='datetime64[ns]')"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.time.data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## We can see all the datasets that contribute to the data for the first time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(Dataset <id=7f303131-955d-458d-a6ab-d5d07a9aad49 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170107235316500000_v1508306845.nc>,\n",
       " Dataset <id=89a2e0cb-3b2f-42c8-913b-adaa6bb65a6b type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170107235316500000_v1508439326.nc>,\n",
       " Dataset <id=bee1bea1-0b7c-44bf-9156-b3011cc2bb06 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170107235316500000_v1508439326.nc>)"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.isel(time=0).data.item()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## And extract the first dataset from the first time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Dataset <id=7f303131-955d-458d-a6ab-d5d07a9aad49 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170107235316500000_v1508306845.nc>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped.isel(time=0).data.item()[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It's also possible to view the datasets on the DEA Dashboard, by copying the `id` to the end of the dashboard URL.\n",
    "\n",
    "eg. https://data.dea.gadevs.ga/dataset/7f303131-955d-458d-a6ab-d5d07a9aad49"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# How to manually group datasets by _solar day_"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import datacube\n",
	"from datacube.api.query import query_group_by\n",
	"\n",
	"dc = datacube.Datacube()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## First, run a query to find all the relevant datasets.\n",
	"It may be better to search for \"nbar_scene\", as there'll be less contributing datasets for a particular area.\n",
	"\n",
	"This returns a python `list` of datasets, in no particular order."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[Dataset <id=7f303131-955d-458d-a6ab-d5d07a9aad49 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170107235316500000_v1508306845.nc>,\n",
	" Dataset <id=89a2e0cb-3b2f-42c8-913b-adaa6bb65a6b type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170107235316500000_v1508439326.nc>,\n",
	" Dataset <id=bee1bea1-0b7c-44bf-9156-b3011cc2bb06 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170107235316500000_v1508439326.nc>,\n",
	" Dataset <id=b9649694-4777-4403-8450-a876e378d2d0 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170123235311500000_v1508306845.nc>,\n",
	" Dataset <id=1b614cb8-ca99-4cbd-9071-382dda2b8ce7 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170123235311500000_v1508306845.nc>,\n",
	" Dataset <id=c8868edb-1ca9-4ee2-b427-0db421b78e34 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170123235311500000_v1508439326.nc>,\n",
	" Dataset <id=81fe18d8-f9d6-48d3-bd78-d42982daad4d type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-51/LS8_OLI_NBAR_3577_13_-51_20170123235311500000_v1508439326.nc>]"
	]
	},
	"execution_count": 30,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"datasets = dc.find_datasets(product='ls8_nbar_albers', time=('2017-01-01', '2017-02-01'), lon=(147, 148), lat=(-45, -46))\n",
	"\n",
	"datasets"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## The use the `group_datasets` method, to group by solar day"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"\u001b[0;31mSignature:\u001b[0m \u001b[0mdc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroup_datasets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdatasets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroup_by\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;31mDocstring:\u001b[0m\n",
	"Group datasets along defined non-spatial dimensions (ie. time).\n",
	"\n",
	":param datasets: a list of datasets, typically from :meth:`find_datasets`\n",
	":param GroupBy group_by: Contains:\n",
	" - a function that returns a label for a dataset\n",
	" - name of the new dimension\n",
	" - unit for the new dimension\n",
	" - function to sort by before grouping\n",
	":rtype: xarray.DataArray\n",
	"\n",
	".. seealso:: :meth:`find_datasets`, :meth:`load_data`, :meth:`query_group_by`\n",
	"\u001b[0;31mFile:\u001b[0m /g/data/v10/public/modules/dea/20180309/lib/python3.6/site-packages/datacube/api/core.py\n",
	"\u001b[0;31mType:\u001b[0m function\n"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"dc.group_datasets?"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"<xarray.DataArray (time: 2)>\n",
	"array([(Dataset <id=7f303131-955d-458d-a6ab-d5d07a9aad49 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170107235316500000_v1508306845.nc>, Dataset <id=89a2e0cb-3b2f-42c8-913b-adaa6bb65a6b type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170107235316500000_v1508439326.nc>, Dataset <id=bee1bea1-0b7c-44bf-9156-b3011cc2bb06 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170107235316500000_v1508439326.nc>),\n",
	" (Dataset <id=b9649694-4777-4403-8450-a876e378d2d0 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170123235311500000_v1508306845.nc>, Dataset <id=1b614cb8-ca99-4cbd-9071-382dda2b8ce7 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170123235311500000_v1508306845.nc>, Dataset <id=c8868edb-1ca9-4ee2-b427-0db421b78e34 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170123235311500000_v1508439326.nc>, Dataset <id=81fe18d8-f9d6-48d3-bd78-d42982daad4d type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-51/LS8_OLI_NBAR_3577_13_-51_20170123235311500000_v1508439326.nc>)],\n",
	" dtype=object)\n",
	"Coordinates:\n",
	" * time (time) datetime64[ns] 2017-01-07T23:53:16.500000 ..."
	]
	},
	"execution_count": 16,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"group_by_solarday = query_group_by('solar_day')\n",
	"grouped = dc.group_datasets(datasets, group_by=group_by_solarday)\n",
	"grouped"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## This has returned a rather awkward data structure, which is an `xarray.DataArray`, with a time dimension, where each element of the array is the set of data which contributes to that grouped observation time\n",
	"\n",
	"We can see what the times are that we have ended up with."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"array(['2017-01-07T23:53:16.500000000', '2017-01-23T23:53:11.500000000'],\n",
	" dtype='datetime64[ns]')"
	]
	},
	"execution_count": 31,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"grouped.time.data"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## We can see all the datasets that contribute to the data for the first time"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 32,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(Dataset <id=7f303131-955d-458d-a6ab-d5d07a9aad49 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170107235316500000_v1508306845.nc>,\n",
	" Dataset <id=89a2e0cb-3b2f-42c8-913b-adaa6bb65a6b type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/13_-50/LS8_OLI_NBAR_3577_13_-50_20170107235316500000_v1508439326.nc>,\n",
	" Dataset <id=bee1bea1-0b7c-44bf-9156-b3011cc2bb06 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-51/LS8_OLI_NBAR_3577_12_-51_20170107235316500000_v1508439326.nc>)"
	]
	},
	"execution_count": 32,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"grouped.isel(time=0).data.item()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## And extract the first dataset from the first time"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 33,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Dataset <id=7f303131-955d-458d-a6ab-d5d07a9aad49 type=ls8_nbar_albers location=/g/data/rs0/datacube/002/LS8_OLI_NBAR/12_-50/LS8_OLI_NBAR_3577_12_-50_20170107235316500000_v1508306845.nc>"
	]
	},
	"execution_count": 33,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"grouped.isel(time=0).data.item()[0]"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"It's also possible to view the datasets on the DEA Dashboard, by copying the `id` to the end of the dashboard URL.\n",
	"\n",
	"eg. https://data.dea.gadevs.ga/dataset/7f303131-955d-458d-a6ab-d5d07a9aad49"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}