Skip to content

Instantly share code, notes, and snippets.

@metasim
Created March 5, 2020 19:07
Show Gist options
  • Select an option

  • Save metasim/1734fee3eefc4474a0f269aa976394a0 to your computer and use it in GitHub Desktop.

Select an option

Save metasim/1734fee3eefc4474a0f269aa976394a0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"small_size = 1 # choose 0 or 1."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pyrasterframes\n",
"from pyrasterframes.rasterfunctions import *\n",
"\n",
"from pyspark.sql.functions import lit\n",
"import pyspark.sql.functions as F\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from pyrasterframes.utils import create_rf_spark_session\n",
"spark = create_rf_spark_session(**{\n",
" 'spark.app.name': 'fire scale small' if small_size else 'fire scale LARGE',\n",
" 'spark.driver.memory': '8G'\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'GDAL 2.4.4, released 2020/01/08'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from pyrasterframes.utils import gdal_version\n",
"gdal_version()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'scalaVersion': '2.11.12', 'sbtVersion': '1.3.7', 'name': 'core', 'rfSparkVersion': '2.4.4', 'rfGeoMesaVersion': '2.2.1', 'GDAL': 'GDAL 2.4.4, released 2020/01/08', 'rfGeoTrellisVersion': '3.2.0', 'version': '0.9.0-SNAPSHOT'}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from pyrasterframes.utils import build_info\n",
"build_info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The configured Spark resouces"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'GDALOptionsConfig(Map(CPL_VSIL_CURL_CHUNK_SIZE -> 1000000, CPL_VSIL_CURL_ALLOWED_EXTENSIONS -> .tif,.tiff,.jp2,.mrf,.idx,.lrc,.mrf.aux.xml,.vrt, AWS_REQUEST_PAYER -> requester, GDAL_HTTP_MAX_RETRY -> 4, GDAL_PAM_ENABLED -> NO, GDAL_DISABLE_READDIR_ON_OPEN -> YES, GDAL_CACHEMAX -> 512, GDAL_HTTP_RETRY_DELAY -> 1),List(SOURCE, WARPED),2147483647)'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"spark._jvm.geotrellis.raster.gdal.config.GDALOptionsConfig.conf().toString()\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"catalog = pd.read_csv('small_catalog.csv' if small_size else 'large_catalog.csv')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id_before</th>\n",
" <th>datetime_before</th>\n",
" <th>eod_grid_id</th>\n",
" <th>eo_cloud_cover_before</th>\n",
" <th>B08_60m_before</th>\n",
" <th>B12_60m_before</th>\n",
" <th>id_after</th>\n",
" <th>datetime_after</th>\n",
" <th>eo_cloud_cover_after</th>\n",
" <th>B08_60m_after</th>\n",
" <th>B12_60m_after</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2B_OPER_MSI_L2A_TL_SGS__20190715T192218_A0123...</td>\n",
" <td>2019-07-15 13:45:27.879246+00:00</td>\n",
" <td>0.006991</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/15/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/15/0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2A_OPER_MSI_L2A_TL_MTI__20190720T210944_A0212...</td>\n",
" <td>2019-07-20 13:45:24.255419+00:00</td>\n",
" <td>0.007976</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/20/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/20/0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190620T174802_A0208...</td>\n",
" <td>2019-06-20 13:45:23.021022+00:00</td>\n",
" <td>0.245723</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/20/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/20/0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2B_OPER_MSI_L2A_TL_SGS__20190705T154915_A0121...</td>\n",
" <td>2019-07-05 13:45:27.866311+00:00</td>\n",
" <td>0.012515</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/5/0/...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/5/0/...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2B_OPER_MSI_L2A_TL_SGS__20190725T173051_A0124...</td>\n",
" <td>2019-07-25 13:45:27.496120+00:00</td>\n",
" <td>0.008431</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/25/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/25/0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190809T160333_A0215...</td>\n",
" <td>2019-08-09 13:45:22.985921+00:00</td>\n",
" <td>0.009283</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/9/0/...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/9/0/...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190630T160759_A0209...</td>\n",
" <td>2019-06-30 13:45:23.836332+00:00</td>\n",
" <td>0.031211</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/30/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/30/0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190710T160807_A0211...</td>\n",
" <td>2019-07-10 13:45:24.243807+00:00</td>\n",
" <td>0.008530</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/10/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/10/0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2B_OPER_MSI_L2A_TL_SGS__20190814T173608_A0127...</td>\n",
" <td>2019-08-14 13:45:25.552396+00:00</td>\n",
" <td>0.945800</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/14/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/14/0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2B_OPER_MSI_L2A_TL_SGS__20190605T191825_A0117...</td>\n",
" <td>2019-06-05 13:45:27.125328+00:00</td>\n",
" <td>0.010959</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/5/0/...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/5/0/...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2B_OPER_MSI_L2A_TL_SGS__20190625T192347_A0120...</td>\n",
" <td>2019-06-25 13:45:27.381812+00:00</td>\n",
" <td>0.008593</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/25/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/25/0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190819T192739_A0217...</td>\n",
" <td>2019-08-19 13:45:21.772834+00:00</td>\n",
" <td>0.007379</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/19/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/19/0...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2B_OPER_MSI_L2A_TL_SGS__20190903T174210_A0130...</td>\n",
" <td>2019-09-03 13:45:21.980196+00:00</td>\n",
" <td>0.068185</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/3/0/...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/3/0/...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190908T160833_A0220...</td>\n",
" <td>2019-09-08 13:45:18.003007+00:00</td>\n",
" <td>0.019068</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/8/0/...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/8/0/...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205...</td>\n",
" <td>2019-05-31 13:45:21.658163+00:00</td>\n",
" <td>MGRS-22LEP</td>\n",
" <td>0.856069</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0...</td>\n",
" <td>S2B_OPER_MSI_L2A_TL_SGS__20190913T191931_A0131...</td>\n",
" <td>2019-09-13 13:45:19.525334+00:00</td>\n",
" <td>0.012502</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0...</td>\n",
" <td>s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id_before \\\n",
"0 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"1 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"2 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"3 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"4 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"5 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"6 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"7 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"8 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"9 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"10 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"11 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"12 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"13 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"14 S2A_OPER_MSI_L2A_TL_SGS__20190531T161101_A0205... \n",
"\n",
" datetime_before eod_grid_id eo_cloud_cover_before \\\n",
"0 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"1 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"2 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"3 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"4 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"5 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"6 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"7 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"8 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"9 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"10 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"11 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"12 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"13 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"14 2019-05-31 13:45:21.658163+00:00 MGRS-22LEP 0.856069 \n",
"\n",
" B08_60m_before \\\n",
"0 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"1 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"2 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"3 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"4 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"5 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"6 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"7 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"8 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"9 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"10 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"11 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"12 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"13 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"14 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"\n",
" B12_60m_before \\\n",
"0 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"1 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"2 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"3 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"4 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"5 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"6 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"7 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"8 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"9 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"10 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"11 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"12 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"13 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"14 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0... \n",
"\n",
" id_after \\\n",
"0 S2B_OPER_MSI_L2A_TL_SGS__20190715T192218_A0123... \n",
"1 S2A_OPER_MSI_L2A_TL_MTI__20190720T210944_A0212... \n",
"2 S2A_OPER_MSI_L2A_TL_SGS__20190620T174802_A0208... \n",
"3 S2B_OPER_MSI_L2A_TL_SGS__20190705T154915_A0121... \n",
"4 S2B_OPER_MSI_L2A_TL_SGS__20190725T173051_A0124... \n",
"5 S2A_OPER_MSI_L2A_TL_SGS__20190809T160333_A0215... \n",
"6 S2A_OPER_MSI_L2A_TL_SGS__20190630T160759_A0209... \n",
"7 S2A_OPER_MSI_L2A_TL_SGS__20190710T160807_A0211... \n",
"8 S2B_OPER_MSI_L2A_TL_SGS__20190814T173608_A0127... \n",
"9 S2B_OPER_MSI_L2A_TL_SGS__20190605T191825_A0117... \n",
"10 S2B_OPER_MSI_L2A_TL_SGS__20190625T192347_A0120... \n",
"11 S2A_OPER_MSI_L2A_TL_SGS__20190819T192739_A0217... \n",
"12 S2B_OPER_MSI_L2A_TL_SGS__20190903T174210_A0130... \n",
"13 S2A_OPER_MSI_L2A_TL_SGS__20190908T160833_A0220... \n",
"14 S2B_OPER_MSI_L2A_TL_SGS__20190913T191931_A0131... \n",
"\n",
" datetime_after eo_cloud_cover_after \\\n",
"0 2019-07-15 13:45:27.879246+00:00 0.006991 \n",
"1 2019-07-20 13:45:24.255419+00:00 0.007976 \n",
"2 2019-06-20 13:45:23.021022+00:00 0.245723 \n",
"3 2019-07-05 13:45:27.866311+00:00 0.012515 \n",
"4 2019-07-25 13:45:27.496120+00:00 0.008431 \n",
"5 2019-08-09 13:45:22.985921+00:00 0.009283 \n",
"6 2019-06-30 13:45:23.836332+00:00 0.031211 \n",
"7 2019-07-10 13:45:24.243807+00:00 0.008530 \n",
"8 2019-08-14 13:45:25.552396+00:00 0.945800 \n",
"9 2019-06-05 13:45:27.125328+00:00 0.010959 \n",
"10 2019-06-25 13:45:27.381812+00:00 0.008593 \n",
"11 2019-08-19 13:45:21.772834+00:00 0.007379 \n",
"12 2019-09-03 13:45:21.980196+00:00 0.068185 \n",
"13 2019-09-08 13:45:18.003007+00:00 0.019068 \n",
"14 2019-09-13 13:45:19.525334+00:00 0.012502 \n",
"\n",
" B08_60m_after \\\n",
"0 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/15/0... \n",
"1 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/20/0... \n",
"2 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/20/0... \n",
"3 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/5/0/... \n",
"4 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/25/0... \n",
"5 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/9/0/... \n",
"6 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/30/0... \n",
"7 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/10/0... \n",
"8 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/14/0... \n",
"9 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/5/0/... \n",
"10 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/25/0... \n",
"11 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/19/0... \n",
"12 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/3/0/... \n",
"13 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/8/0/... \n",
"14 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0... \n",
"\n",
" B12_60m_after \n",
"0 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/15/0... \n",
"1 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/20/0... \n",
"2 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/20/0... \n",
"3 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/5/0/... \n",
"4 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/25/0... \n",
"5 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/9/0/... \n",
"6 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/30/0... \n",
"7 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/7/10/0... \n",
"8 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/14/0... \n",
"9 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/5/0/... \n",
"10 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/6/25/0... \n",
"11 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/8/19/0... \n",
"12 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/3/0/... \n",
"13 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/8/0/... \n",
"14 s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0... "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"catalog"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read the raster data with before and after columns in it."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"df = spark.read.raster(catalog,\n",
" ['B08_60m_before', 'B12_60m_before',\n",
" 'B08_60m_after', 'B12_60m_after'])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['B08_60m_before_path',\n",
" 'B12_60m_before_path',\n",
" 'B08_60m_after_path',\n",
" 'B12_60m_after_path',\n",
" 'B08_60m_before',\n",
" 'B12_60m_before',\n",
" 'B08_60m_after',\n",
" 'B12_60m_after',\n",
" 'id_before',\n",
" 'datetime_before',\n",
" 'eod_grid_id',\n",
" 'eo_cloud_cover_before',\n",
" 'id_after',\n",
" 'datetime_after',\n",
" 'eo_cloud_cover_after']"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Compute the normalized burn ratio."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"df2 = df.withColumn('nbr_before', \n",
" rf_normalized_difference('B08_60m_before', 'B12_60m_before')) \\\n",
" .withColumn('nbr_after', \n",
" rf_normalized_difference('B08_60m_after', 'B12_60m_after')) \\\n",
" .drop('B08_60m_before', 'B12_60m_before',\n",
" 'B08_60m_after', 'B12_60m_after')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"ename": "Py4JJavaError",
"evalue": "An error occurred while calling o123.collectToPython.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 135 in stage 1.0 failed 1 times, most recent failure: Lost task 135.0 in stage 1.0 (TID 192, localhost, executor driver): java.lang.IllegalArgumentException: Error fetching data for one of: GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0/R60m/B08.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0/R60m/B12.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0/R60m/B08.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0/R60m/B12.jp2)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.eval(RasterSourceToRasterRefs.scala:81)\n\tat org.apache.spark.sql.execution.GenerateExec$$anonfun$1$$anonfun$3.apply(GenerateExec.scala:95)\n\tat org.apache.spark.sql.execution.GenerateExec$$anonfun$1$$anonfun$3.apply(GenerateExec.scala:92)\n\tat scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)\n\tat scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)\n\tat scala.collection.Iterator$JoinIterator.hasNext(Iterator.scala:212)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:288)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:123)\n\tat org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: geotrellis.raster.gdal.MalformedDataException: Unable to construct a RasterExtent from the Transformation given. GDAL Error Code: 4\n\tat geotrellis.raster.gdal.GDALDataset$.rasterExtent$extension1(GDALDataset.scala:143)\n\tat geotrellis.raster.gdal.GDALRasterSource.gridExtent$lzycompute(GDALRasterSource.scala:93)\n\tat geotrellis.raster.gdal.GDALRasterSource.gridExtent(GDALRasterSource.scala:93)\n\tat geotrellis.raster.RasterMetadata$class.cols(RasterMetadata.scala:52)\n\tat geotrellis.raster.RasterSource.cols(RasterSource.scala:44)\n\tat org.locationtech.rasterframes.ref.SimpleRasterInfo$.apply(SimpleRasterInfo.scala:71)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource$$anonfun$tiffInfo$1.apply(GDALRasterSource.scala:53)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource$$anonfun$tiffInfo$1.apply(GDALRasterSource.scala:53)\n\tat scala.compat.java8.functionConverterImpls.AsJavaFunction.apply(FunctionConverters.scala:262)\n\tat com.github.benmanes.caffeine.cache.LocalCache.lambda$statsAware$0(LocalCache.java:139)\n\tat com.github.benmanes.caffeine.cache.UnboundedLocalCache.lambda$computeIfAbsent$2(UnboundedLocalCache.java:238)\n\tat java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660)\n\tat com.github.benmanes.caffeine.cache.UnboundedLocalCache.computeIfAbsent(UnboundedLocalCache.java:234)\n\tat com.github.benmanes.caffeine.cache.LocalCache.computeIfAbsent(LocalCache.java:108)\n\tat com.github.benmanes.caffeine.cache.LocalManualCache.get(LocalManualCache.java:62)\n\tat com.github.blemale.scaffeine.Cache.get(Cache.scala:40)\n\tat org.locationtech.rasterframes.ref.SimpleRasterInfo$.apply(SimpleRasterInfo.scala:49)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource.tiffInfo(GDALRasterSource.scala:53)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource.extent(GDALRasterSource.scala:57)\n\tat org.locationtech.rasterframes.ref.RFRasterSource.rasterExtent(RFRasterSource.scala:71)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs$$anonfun$1.apply(RasterSourceToRasterRefs.scala:65)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs$$anonfun$1.apply(RasterSourceToRasterRefs.scala:63)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\n\tat scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:74)\n\tat scala.collection.TraversableLike$class.map(TraversableLike.scala:234)\n\tat scala.collection.AbstractTraversable.map(Traversable.scala:104)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.eval(RasterSourceToRasterRefs.scala:63)\n\t... 29 more\n\nDriver stacktrace:\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876)\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)\n\tat scala.Option.foreach(Option.scala:257)\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:945)\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:363)\n\tat org.apache.spark.rdd.RDD.collect(RDD.scala:944)\n\tat org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)\n\tat org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3263)\n\tat org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3260)\n\tat org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)\n\tat org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)\n\tat org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)\n\tat org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)\n\tat org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3260)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: java.lang.IllegalArgumentException: Error fetching data for one of: GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0/R60m/B08.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0/R60m/B12.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0/R60m/B08.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0/R60m/B12.jp2)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.eval(RasterSourceToRasterRefs.scala:81)\n\tat org.apache.spark.sql.execution.GenerateExec$$anonfun$1$$anonfun$3.apply(GenerateExec.scala:95)\n\tat org.apache.spark.sql.execution.GenerateExec$$anonfun$1$$anonfun$3.apply(GenerateExec.scala:92)\n\tat scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)\n\tat scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)\n\tat scala.collection.Iterator$JoinIterator.hasNext(Iterator.scala:212)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:288)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:123)\n\tat org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\t... 1 more\nCaused by: geotrellis.raster.gdal.MalformedDataException: Unable to construct a RasterExtent from the Transformation given. GDAL Error Code: 4\n\tat geotrellis.raster.gdal.GDALDataset$.rasterExtent$extension1(GDALDataset.scala:143)\n\tat geotrellis.raster.gdal.GDALRasterSource.gridExtent$lzycompute(GDALRasterSource.scala:93)\n\tat geotrellis.raster.gdal.GDALRasterSource.gridExtent(GDALRasterSource.scala:93)\n\tat geotrellis.raster.RasterMetadata$class.cols(RasterMetadata.scala:52)\n\tat geotrellis.raster.RasterSource.cols(RasterSource.scala:44)\n\tat org.locationtech.rasterframes.ref.SimpleRasterInfo$.apply(SimpleRasterInfo.scala:71)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource$$anonfun$tiffInfo$1.apply(GDALRasterSource.scala:53)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource$$anonfun$tiffInfo$1.apply(GDALRasterSource.scala:53)\n\tat scala.compat.java8.functionConverterImpls.AsJavaFunction.apply(FunctionConverters.scala:262)\n\tat com.github.benmanes.caffeine.cache.LocalCache.lambda$statsAware$0(LocalCache.java:139)\n\tat com.github.benmanes.caffeine.cache.UnboundedLocalCache.lambda$computeIfAbsent$2(UnboundedLocalCache.java:238)\n\tat java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660)\n\tat com.github.benmanes.caffeine.cache.UnboundedLocalCache.computeIfAbsent(UnboundedLocalCache.java:234)\n\tat com.github.benmanes.caffeine.cache.LocalCache.computeIfAbsent(LocalCache.java:108)\n\tat com.github.benmanes.caffeine.cache.LocalManualCache.get(LocalManualCache.java:62)\n\tat com.github.blemale.scaffeine.Cache.get(Cache.scala:40)\n\tat org.locationtech.rasterframes.ref.SimpleRasterInfo$.apply(SimpleRasterInfo.scala:49)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource.tiffInfo(GDALRasterSource.scala:53)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource.extent(GDALRasterSource.scala:57)\n\tat org.locationtech.rasterframes.ref.RFRasterSource.rasterExtent(RFRasterSource.scala:71)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs$$anonfun$1.apply(RasterSourceToRasterRefs.scala:65)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs$$anonfun$1.apply(RasterSourceToRasterRefs.scala:63)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\n\tat scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:74)\n\tat scala.collection.TraversableLike$class.map(TraversableLike.scala:234)\n\tat scala.collection.AbstractTraversable.map(Traversable.scala:104)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.eval(RasterSourceToRasterRefs.scala:63)\n\t... 29 more\n",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mPy4JJavaError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-d0e5e1145736>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf2\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoPandas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/usr/local/spark/python/pyspark/sql/dataframe.py\u001b[0m in \u001b[0;36mtoPandas\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 2141\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2142\u001b[0m \u001b[0;31m# Below is toPandas without Arrow optimization.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2143\u001b[0;31m \u001b[0mpdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_records\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcollect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2145\u001b[0m \u001b[0mdtype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/spark/python/pyspark/sql/dataframe.py\u001b[0m in \u001b[0;36mcollect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 532\u001b[0m \"\"\"\n\u001b[1;32m 533\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mSCCallSiteSync\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sc\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mcss\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 534\u001b[0;31m \u001b[0msock_info\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcollectToPython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 535\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_load_from_socket\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msock_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBatchedSerializer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mPickleSerializer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 536\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1255\u001b[0m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1256\u001b[0m return_value = get_return_value(\n\u001b[0;32m-> 1257\u001b[0;31m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[1;32m 1258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1259\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/spark/python/pyspark/sql/utils.py\u001b[0m in \u001b[0;36mdeco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mdeco\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 63\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 64\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0ms\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjava_exception\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoString\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 326\u001b[0m raise Py4JJavaError(\n\u001b[1;32m 327\u001b[0m \u001b[0;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 328\u001b[0;31m format(target_id, \".\", name), value)\n\u001b[0m\u001b[1;32m 329\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 330\u001b[0m raise Py4JError(\n",
"\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o123.collectToPython.\n: org.apache.spark.SparkException: Job aborted due to stage failure: Task 135 in stage 1.0 failed 1 times, most recent failure: Lost task 135.0 in stage 1.0 (TID 192, localhost, executor driver): java.lang.IllegalArgumentException: Error fetching data for one of: GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0/R60m/B08.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0/R60m/B12.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0/R60m/B08.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0/R60m/B12.jp2)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.eval(RasterSourceToRasterRefs.scala:81)\n\tat org.apache.spark.sql.execution.GenerateExec$$anonfun$1$$anonfun$3.apply(GenerateExec.scala:95)\n\tat org.apache.spark.sql.execution.GenerateExec$$anonfun$1$$anonfun$3.apply(GenerateExec.scala:92)\n\tat scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)\n\tat scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)\n\tat scala.collection.Iterator$JoinIterator.hasNext(Iterator.scala:212)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:288)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:123)\n\tat org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: geotrellis.raster.gdal.MalformedDataException: Unable to construct a RasterExtent from the Transformation given. GDAL Error Code: 4\n\tat geotrellis.raster.gdal.GDALDataset$.rasterExtent$extension1(GDALDataset.scala:143)\n\tat geotrellis.raster.gdal.GDALRasterSource.gridExtent$lzycompute(GDALRasterSource.scala:93)\n\tat geotrellis.raster.gdal.GDALRasterSource.gridExtent(GDALRasterSource.scala:93)\n\tat geotrellis.raster.RasterMetadata$class.cols(RasterMetadata.scala:52)\n\tat geotrellis.raster.RasterSource.cols(RasterSource.scala:44)\n\tat org.locationtech.rasterframes.ref.SimpleRasterInfo$.apply(SimpleRasterInfo.scala:71)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource$$anonfun$tiffInfo$1.apply(GDALRasterSource.scala:53)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource$$anonfun$tiffInfo$1.apply(GDALRasterSource.scala:53)\n\tat scala.compat.java8.functionConverterImpls.AsJavaFunction.apply(FunctionConverters.scala:262)\n\tat com.github.benmanes.caffeine.cache.LocalCache.lambda$statsAware$0(LocalCache.java:139)\n\tat com.github.benmanes.caffeine.cache.UnboundedLocalCache.lambda$computeIfAbsent$2(UnboundedLocalCache.java:238)\n\tat java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660)\n\tat com.github.benmanes.caffeine.cache.UnboundedLocalCache.computeIfAbsent(UnboundedLocalCache.java:234)\n\tat com.github.benmanes.caffeine.cache.LocalCache.computeIfAbsent(LocalCache.java:108)\n\tat com.github.benmanes.caffeine.cache.LocalManualCache.get(LocalManualCache.java:62)\n\tat com.github.blemale.scaffeine.Cache.get(Cache.scala:40)\n\tat org.locationtech.rasterframes.ref.SimpleRasterInfo$.apply(SimpleRasterInfo.scala:49)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource.tiffInfo(GDALRasterSource.scala:53)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource.extent(GDALRasterSource.scala:57)\n\tat org.locationtech.rasterframes.ref.RFRasterSource.rasterExtent(RFRasterSource.scala:71)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs$$anonfun$1.apply(RasterSourceToRasterRefs.scala:65)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs$$anonfun$1.apply(RasterSourceToRasterRefs.scala:63)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\n\tat scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:74)\n\tat scala.collection.TraversableLike$class.map(TraversableLike.scala:234)\n\tat scala.collection.AbstractTraversable.map(Traversable.scala:104)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.eval(RasterSourceToRasterRefs.scala:63)\n\t... 29 more\n\nDriver stacktrace:\n\tat org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876)\n\tat scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)\n\tat scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)\n\tat org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)\n\tat org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)\n\tat scala.Option.foreach(Option.scala:257)\n\tat org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)\n\tat org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)\n\tat org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)\n\tat org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)\n\tat org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)\n\tat org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:945)\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)\n\tat org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)\n\tat org.apache.spark.rdd.RDD.withScope(RDD.scala:363)\n\tat org.apache.spark.rdd.RDD.collect(RDD.scala:944)\n\tat org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)\n\tat org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3263)\n\tat org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3260)\n\tat org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)\n\tat org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)\n\tat org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)\n\tat org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)\n\tat org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)\n\tat org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3260)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n\tat java.lang.reflect.Method.invoke(Method.java:498)\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\n\tat py4j.Gateway.invoke(Gateway.java:282)\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\n\tat java.lang.Thread.run(Thread.java:748)\nCaused by: java.lang.IllegalArgumentException: Error fetching data for one of: GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0/R60m/B08.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/5/31/0/R60m/B12.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0/R60m/B08.jp2), GDALRasterSource(s3://sentinel-s2-l2a/tiles/22/L/EP/2019/9/13/0/R60m/B12.jp2)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.eval(RasterSourceToRasterRefs.scala:81)\n\tat org.apache.spark.sql.execution.GenerateExec$$anonfun$1$$anonfun$3.apply(GenerateExec.scala:95)\n\tat org.apache.spark.sql.execution.GenerateExec$$anonfun$1$$anonfun$3.apply(GenerateExec.scala:92)\n\tat scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)\n\tat scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)\n\tat scala.collection.Iterator$JoinIterator.hasNext(Iterator.scala:212)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)\n\tat org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)\n\tat org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636)\n\tat scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)\n\tat org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)\n\tat org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)\n\tat org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\n\tat org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)\n\tat org.apache.spark.rdd.RDD.iterator(RDD.scala:288)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:123)\n\tat org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)\n\tat org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n\t... 1 more\nCaused by: geotrellis.raster.gdal.MalformedDataException: Unable to construct a RasterExtent from the Transformation given. GDAL Error Code: 4\n\tat geotrellis.raster.gdal.GDALDataset$.rasterExtent$extension1(GDALDataset.scala:143)\n\tat geotrellis.raster.gdal.GDALRasterSource.gridExtent$lzycompute(GDALRasterSource.scala:93)\n\tat geotrellis.raster.gdal.GDALRasterSource.gridExtent(GDALRasterSource.scala:93)\n\tat geotrellis.raster.RasterMetadata$class.cols(RasterMetadata.scala:52)\n\tat geotrellis.raster.RasterSource.cols(RasterSource.scala:44)\n\tat org.locationtech.rasterframes.ref.SimpleRasterInfo$.apply(SimpleRasterInfo.scala:71)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource$$anonfun$tiffInfo$1.apply(GDALRasterSource.scala:53)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource$$anonfun$tiffInfo$1.apply(GDALRasterSource.scala:53)\n\tat scala.compat.java8.functionConverterImpls.AsJavaFunction.apply(FunctionConverters.scala:262)\n\tat com.github.benmanes.caffeine.cache.LocalCache.lambda$statsAware$0(LocalCache.java:139)\n\tat com.github.benmanes.caffeine.cache.UnboundedLocalCache.lambda$computeIfAbsent$2(UnboundedLocalCache.java:238)\n\tat java.util.concurrent.ConcurrentHashMap.computeIfAbsent(ConcurrentHashMap.java:1660)\n\tat com.github.benmanes.caffeine.cache.UnboundedLocalCache.computeIfAbsent(UnboundedLocalCache.java:234)\n\tat com.github.benmanes.caffeine.cache.LocalCache.computeIfAbsent(LocalCache.java:108)\n\tat com.github.benmanes.caffeine.cache.LocalManualCache.get(LocalManualCache.java:62)\n\tat com.github.blemale.scaffeine.Cache.get(Cache.scala:40)\n\tat org.locationtech.rasterframes.ref.SimpleRasterInfo$.apply(SimpleRasterInfo.scala:49)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource.tiffInfo(GDALRasterSource.scala:53)\n\tat org.locationtech.rasterframes.ref.GDALRasterSource.extent(GDALRasterSource.scala:57)\n\tat org.locationtech.rasterframes.ref.RFRasterSource.rasterExtent(RFRasterSource.scala:71)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs$$anonfun$1.apply(RasterSourceToRasterRefs.scala:65)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs$$anonfun$1.apply(RasterSourceToRasterRefs.scala:63)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\n\tat scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)\n\tat scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:74)\n\tat scala.collection.TraversableLike$class.map(TraversableLike.scala:234)\n\tat scala.collection.AbstractTraversable.map(Traversable.scala:104)\n\tat org.locationtech.rasterframes.expressions.generators.RasterSourceToRasterRefs.eval(RasterSourceToRasterRefs.scala:63)\n\t... 29 more\n"
]
}
],
"source": [
"df2.toPandas()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment