bennyistanto · October 23, 2023 01:57
diff --git a/PhenoMetrics_Merge_Output.ipynb b/PhenoMetrics_Merge_Output.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ac39c421",
   "metadata": {},
   "source": [
    "The phenological metrics data extraction consist of:\n",
    "1. Extract seasonality variable from TIMESAT with naming convention `{iso3}_data_{yyyy}_{phenos}_season{1/2}`\n",
    "2. Translate the BIL image into GeoTIFF\n",
    "3. Remove unnecessary 250m pixel by multiplying with ESA Croplands 10m\n",
    "4. Reclassify the 8-days based value on the pixel into monthly information\n",
    "5. Do the Zonal Histogram to get count of pixel for each pixel value (month) by admin boundary\n",
    "6. Convert the DBF output from Zonal Histogram into csv\n",
    "\n",
    "Below script will read output from point 6, then calculate total of hectares on each pheno class month-by-month"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "1549fa22",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "# Define the input directory\n",
    "input_directory = \"/mnt/x/Temp/modis/mmr/gee/11_timesat_raw/geotiff/step04_zonalhistogram/all/output\"\n",
    "\n",
    "# Change the current working directory to the input directory\n",
    "os.chdir(input_directory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4576c9e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:04<00:00, 12.77file/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing completed!\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "years = range(2003, 2024)\n",
    "phenos = [\"sos\", \"mos\", \"eos\"]\n",
    "\n",
    "total_files = len(years) * len(phenos)\n",
    "progress_bar = tqdm(total=total_files, desc=\"Processing\", unit=\"file\")\n",
    "\n",
    "for year in years:\n",
    "    for pheno in phenos:\n",
    "        season1_file = os.path.join(input_directory, f\"data_mmr_{year}_{pheno}_season1.csv\")\n",
    "        season2_file = os.path.join(input_directory, f\"data_mmr_{year}_{pheno}_season2.csv\")\n",
    "        output_file = os.path.join(input_directory, f\"data_mmr_{year}_{pheno}.csv\")\n",
    "\n",
    "        if os.path.exists(season1_file) and os.path.exists(season2_file):\n",
    "            # Read the CSV files\n",
    "            season1 = pd.read_csv(season1_file, delimiter='\\t')\n",
    "            season2 = pd.read_csv(season2_file, delimiter='\\t')\n",
    "\n",
    "            # Transpose the dataframes\n",
    "            season1_T = season1.set_index(\"LABEL\").transpose()\n",
    "            season2_T = season2.set_index(\"LABEL\").transpose()\n",
    "\n",
    "            # Rename the columns\n",
    "            season1_T.columns = [f\"{month}_S1\" for month in season1_T.columns]\n",
    "            season2_T.columns = [f\"{month}_S2\" for month in season2_T.columns]\n",
    "\n",
    "            # Create a list of all possible admin codes\n",
    "            all_admin_codes = set(season1_T.index).union(set(season2_T.index))\n",
    "\n",
    "            # Create a list of all months\n",
    "            all_months = list(range(1, 13))\n",
    "\n",
    "            # Create a dataframe with all possible combinations of Admin and Month\n",
    "            template_df = pd.DataFrame(index=pd.Index(sorted(list(all_admin_codes))), columns=[f\"{month}_S1\" for month in all_months] + [f\"{month}_S2\" for month in all_months])\n",
    "\n",
    "            # Fill the template dataframe with values from season1_T and season2_T, filling missing values with 0\n",
    "            for col in season1_T.columns:\n",
    "                template_df[col] = season1_T[col]\n",
    "\n",
    "            for col in season2_T.columns:\n",
    "                template_df[col] = season2_T[col]\n",
    "\n",
    "            template_df.fillna(0, inplace=True)\n",
    "\n",
    "            # Reset the index\n",
    "            merged = template_df.reset_index()\n",
    "\n",
    "            # Rename the index column to \"ADM3_PCODE\"\n",
    "            merged.rename(columns={\"index\": \"DT_PCODE\"}, inplace=True)\n",
    "            \n",
    "            # Add Jan-{yyyy} to Dec-{yyyy} columns\n",
    "            for month in range(1, 13):\n",
    "                merged[f\"{month}_{year}\"] = (merged[f\"{month}_S1\"] + merged[f\"{month}_S2\"]) * 0.09\n",
    "                # 0.01 came from ESA croplands spatial resolution 10m. Each pixel has 100 sqm, converted to hectares\n",
    "                # by multipling it with 0.01\n",
    "                # 0.09 came from USGS LGRIP30 spatial resolution 30m. Each pixel has 900 sqm, converted to hectares\n",
    "                # by multipling it with 0.09\n",
    "\n",
    "            # Save the merged dataframe to a new CSV file\n",
    "            merged.to_csv(output_file, index=False)\n",
    "\n",
    "        progress_bar.update(1)\n",
    "\n",
    "progress_bar.close()\n",
    "print(\"Processing completed!\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a0f5a05b",
   "metadata": {},
   "source": [
    "Then this will be compiling all the output (concatenating the columns that result from merging Season1 and Season2 and multiplying by 0.01) into one."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b69e412e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 42.68file/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Processing completed!\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "years = range(2003, 2024)\n",
    "phenos = [\"sos\", \"mos\", \"eos\"]\n",
    "\n",
    "# Create empty dataframes for each phenological phase\n",
    "all_phenos_data = {pheno: pd.DataFrame() for pheno in phenos}\n",
    "\n",
    "total_files = len(years) * len(phenos)\n",
    "progress_bar = tqdm(total=total_files, desc=\"Processing\", unit=\"file\")\n",
    "\n",
    "for year in years:\n",
    "    for pheno in phenos:\n",
    "        input_file = os.path.join(input_directory, f\"data_mmr_{year}_{pheno}.csv\")\n",
    "\n",
    "        if os.path.exists(input_file):\n",
    "            # Read the CSV file\n",
    "            data = pd.read_csv(input_file)\n",
    "\n",
    "            # Select only the columns that result from merging Season1 and Season2 and multiplying by 0.01\n",
    "            data = data[[\"DT_PCODE\"] + [f\"{month}_{year}\" for month in range(1, 13)]]\n",
    "\n",
    "            # Check if the all_phenos_data[pheno] dataframe is empty, if yes assign the current dataframe\n",
    "            if all_phenos_data[pheno].empty:\n",
    "                all_phenos_data[pheno] = data\n",
    "            else:\n",
    "                # Merge the data with the existing dataframe based on the \"ADM3_PCODE\" column\n",
    "                all_phenos_data[pheno] = all_phenos_data[pheno].merge(data, on=\"DT_PCODE\", how=\"outer\")\n",
    "\n",
    "        progress_bar.update(1)\n",
    "\n",
    "progress_bar.close()\n",
    "\n",
    "# Save the concatenated dataframes to new CSV files\n",
    "for pheno in phenos:\n",
    "    all_phenos_data[pheno].to_csv(f\"data_mmr_{pheno}.csv\", index=False)\n",
    "\n",
    "print(\"Processing completed!\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b8e5ee0c",
   "metadata": {},
   "source": [
    "Calculates the mean for each month across all the years, ignoring zeros, and saves the results in a new CSV file for each pheno phase"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "066f373b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean data for sos saved to data_mmr_sos_mean.csv\n",
      "Mean data for mos saved to data_mmr_mos_mean.csv\n",
      "Mean data for eos saved to data_mmr_eos_mean.csv\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "import numpy as np\n",
    "from tqdm import tqdm\n",
    "\n",
    "phenos = [\"sos\", \"mos\", \"eos\"]\n",
    "\n",
    "def mean_ignore_zeros(arr):\n",
    "    arr_no_zeros = arr[arr != 0]\n",
    "    return np.mean(arr_no_zeros)\n",
    "\n",
    "for pheno in phenos:\n",
    "    input_file = f\"data_mmr_{pheno}.csv\"\n",
    "    \n",
    "    if os.path.exists(input_file):\n",
    "        data = pd.read_csv(input_file)\n",
    "        mean_data = pd.DataFrame()\n",
    "        \n",
    "        mean_data[\"DT_PCODE\"] = data[\"DT_PCODE\"]\n",
    "\n",
    "        for month in range(1, 13):\n",
    "            # Get the columns for the same month across all years\n",
    "            month_columns = [f\"{month}_{year}\" for year in range(2003, 2022)]\n",
    "            month_data = data[month_columns]\n",
    "            \n",
    "            # Calculate the mean ignoring zeros\n",
    "            mean_month_data = month_data.apply(mean_ignore_zeros, axis=1)\n",
    "            \n",
    "            # Add the mean data to the mean_data dataframe with the corresponding month name\n",
    "            mean_data[f\"{month:02d}\"] = mean_month_data\n",
    "        \n",
    "        mean_data.to_csv(f\"data_mmr_{pheno}_mean.csv\", index=False)\n",
    "        print(f\"Mean data for {pheno} saved to data_mmr_{pheno}_mean.csv\")\n",
    "    else:\n",
    "        print(f\"File {input_file} not found.\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "02ac7c0c",
   "metadata": {},
   "source": [
    "Calculates the ratio anomaly for each pheno phase using the input data from data_syr_{pheno}.csv and data_syr_{pheno}_mean.csv, and saves the results in a new CSV file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f4ad9a80",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ratio anomaly data for sos saved to data_mmr_sos_ratio_anomaly.csv\n",
      "Ratio anomaly data for mos saved to data_mmr_mos_ratio_anomaly.csv\n",
      "Ratio anomaly data for eos saved to data_mmr_eos_ratio_anomaly.csv\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "from tqdm import tqdm\n",
    "\n",
    "phenos = [\"sos\", \"mos\", \"eos\"]\n",
    "\n",
    "for pheno in phenos:\n",
    "    data_file = f\"data_mmr_{pheno}.csv\"\n",
    "    mean_file = f\"data_mmr_{pheno}_mean.csv\"\n",
    "    \n",
    "    if os.path.exists(data_file) and os.path.exists(mean_file):\n",
    "        data = pd.read_csv(data_file)\n",
    "        mean_data = pd.read_csv(mean_file)\n",
    "        \n",
    "        ratio_anomaly_data = [data[[\"DT_PCODE\"]].copy()]\n",
    "        \n",
    "        for year in range(2003, 2024):\n",
    "            for month in range(1, 13):\n",
    "                data_col = f\"{month}_{year}\"\n",
    "                mean_col = f\"{month:02d}\"\n",
    "                \n",
    "                ratio_anomaly = 100 * data[data_col] / mean_data[mean_col]\n",
    "                ratio_anomaly_data.append(pd.DataFrame(ratio_anomaly, columns=[data_col]))\n",
    "        \n",
    "        ratio_anomaly_data = pd.concat(ratio_anomaly_data, axis=1)\n",
    "        ratio_anomaly_data.to_csv(f\"data_mmr_{pheno}_ratio_anomaly.csv\", index=False)\n",
    "        print(f\"Ratio anomaly data for {pheno} saved to data_mmr_{pheno}_ratio_anomaly.csv\")\n",
    "    else:\n",
    "        print(f\"Files {data_file} or {mean_file} not found.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a0427ca",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
	{
	"cells": [
	{
	"cell_type": "markdown",
	"id": "ac39c421",
	"metadata": {},
	"source": [
	"The phenological metrics data extraction consist of:\n",
	"1. Extract seasonality variable from TIMESAT with naming convention `{iso3}_data_{yyyy}_{phenos}_season{1/2}`\n",
	"2. Translate the BIL image into GeoTIFF\n",
	"3. Remove unnecessary 250m pixel by multiplying with ESA Croplands 10m\n",
	"4. Reclassify the 8-days based value on the pixel into monthly information\n",
	"5. Do the Zonal Histogram to get count of pixel for each pixel value (month) by admin boundary\n",
	"6. Convert the DBF output from Zonal Histogram into csv\n",
	"\n",
	"Below script will read output from point 6, then calculate total of hectares on each pheno class month-by-month"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"id": "1549fa22",
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"\n",
	"# Define the input directory\n",
	"input_directory = \"/mnt/x/Temp/modis/mmr/gee/11_timesat_raw/geotiff/step04_zonalhistogram/all/output\"\n",
	"\n",
	"# Change the current working directory to the input directory\n",
	"os.chdir(input_directory)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"id": "4576c9e4",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Processing: 100%\|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 63/63 [00:04<00:00, 12.77file/s]"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Processing completed!\n"
	]
	},
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"\n"
	]
	}
	],
	"source": [
	"import pandas as pd\n",
	"from tqdm import tqdm\n",
	"\n",
	"years = range(2003, 2024)\n",
	"phenos = [\"sos\", \"mos\", \"eos\"]\n",
	"\n",
	"total_files = len(years) * len(phenos)\n",
	"progress_bar = tqdm(total=total_files, desc=\"Processing\", unit=\"file\")\n",
	"\n",
	"for year in years:\n",
	" for pheno in phenos:\n",
	" season1_file = os.path.join(input_directory, f\"data_mmr_{year}_{pheno}_season1.csv\")\n",
	" season2_file = os.path.join(input_directory, f\"data_mmr_{year}_{pheno}_season2.csv\")\n",
	" output_file = os.path.join(input_directory, f\"data_mmr_{year}_{pheno}.csv\")\n",
	"\n",
	" if os.path.exists(season1_file) and os.path.exists(season2_file):\n",
	" # Read the CSV files\n",
	" season1 = pd.read_csv(season1_file, delimiter='\\t')\n",
	" season2 = pd.read_csv(season2_file, delimiter='\\t')\n",
	"\n",
	" # Transpose the dataframes\n",
	" season1_T = season1.set_index(\"LABEL\").transpose()\n",
	" season2_T = season2.set_index(\"LABEL\").transpose()\n",
	"\n",
	" # Rename the columns\n",
	" season1_T.columns = [f\"{month}_S1\" for month in season1_T.columns]\n",
	" season2_T.columns = [f\"{month}_S2\" for month in season2_T.columns]\n",
	"\n",
	" # Create a list of all possible admin codes\n",
	" all_admin_codes = set(season1_T.index).union(set(season2_T.index))\n",
	"\n",
	" # Create a list of all months\n",
	" all_months = list(range(1, 13))\n",
	"\n",
	" # Create a dataframe with all possible combinations of Admin and Month\n",
	" template_df = pd.DataFrame(index=pd.Index(sorted(list(all_admin_codes))), columns=[f\"{month}_S1\" for month in all_months] + [f\"{month}_S2\" for month in all_months])\n",
	"\n",
	" # Fill the template dataframe with values from season1_T and season2_T, filling missing values with 0\n",
	" for col in season1_T.columns:\n",
	" template_df[col] = season1_T[col]\n",
	"\n",
	" for col in season2_T.columns:\n",
	" template_df[col] = season2_T[col]\n",
	"\n",
	" template_df.fillna(0, inplace=True)\n",
	"\n",
	" # Reset the index\n",
	" merged = template_df.reset_index()\n",
	"\n",
	" # Rename the index column to \"ADM3_PCODE\"\n",
	" merged.rename(columns={\"index\": \"DT_PCODE\"}, inplace=True)\n",
	" \n",
	" # Add Jan-{yyyy} to Dec-{yyyy} columns\n",
	" for month in range(1, 13):\n",
	" merged[f\"{month}_{year}\"] = (merged[f\"{month}_S1\"] + merged[f\"{month}_S2\"]) * 0.09\n",
	" # 0.01 came from ESA croplands spatial resolution 10m. Each pixel has 100 sqm, converted to hectares\n",
	" # by multipling it with 0.01\n",
	" # 0.09 came from USGS LGRIP30 spatial resolution 30m. Each pixel has 900 sqm, converted to hectares\n",
	" # by multipling it with 0.09\n",
	"\n",
	" # Save the merged dataframe to a new CSV file\n",
	" merged.to_csv(output_file, index=False)\n",
	"\n",
	" progress_bar.update(1)\n",
	"\n",
	"progress_bar.close()\n",
	"print(\"Processing completed!\")\n"
	]
	},
	{
	"cell_type": "markdown",
	"id": "a0f5a05b",
	"metadata": {},
	"source": [
	"Then this will be compiling all the output (concatenating the columns that result from merging Season1 and Season2 and multiplying by 0.01) into one."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"id": "b69e412e",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Processing: 100%\|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 63/63 [00:01<00:00, 42.68file/s]\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Processing completed!\n"
	]
	}
	],
	"source": [
	"import pandas as pd\n",
	"from tqdm import tqdm\n",
	"\n",
	"years = range(2003, 2024)\n",
	"phenos = [\"sos\", \"mos\", \"eos\"]\n",
	"\n",
	"# Create empty dataframes for each phenological phase\n",
	"all_phenos_data = {pheno: pd.DataFrame() for pheno in phenos}\n",
	"\n",
	"total_files = len(years) * len(phenos)\n",
	"progress_bar = tqdm(total=total_files, desc=\"Processing\", unit=\"file\")\n",
	"\n",
	"for year in years:\n",
	" for pheno in phenos:\n",
	" input_file = os.path.join(input_directory, f\"data_mmr_{year}_{pheno}.csv\")\n",
	"\n",
	" if os.path.exists(input_file):\n",
	" # Read the CSV file\n",
	" data = pd.read_csv(input_file)\n",
	"\n",
	" # Select only the columns that result from merging Season1 and Season2 and multiplying by 0.01\n",
	" data = data[[\"DT_PCODE\"] + [f\"{month}_{year}\" for month in range(1, 13)]]\n",
	"\n",
	" # Check if the all_phenos_data[pheno] dataframe is empty, if yes assign the current dataframe\n",
	" if all_phenos_data[pheno].empty:\n",
	" all_phenos_data[pheno] = data\n",
	" else:\n",
	" # Merge the data with the existing dataframe based on the \"ADM3_PCODE\" column\n",
	" all_phenos_data[pheno] = all_phenos_data[pheno].merge(data, on=\"DT_PCODE\", how=\"outer\")\n",
	"\n",
	" progress_bar.update(1)\n",
	"\n",
	"progress_bar.close()\n",
	"\n",
	"# Save the concatenated dataframes to new CSV files\n",
	"for pheno in phenos:\n",
	" all_phenos_data[pheno].to_csv(f\"data_mmr_{pheno}.csv\", index=False)\n",
	"\n",
	"print(\"Processing completed!\")\n"
	]
	},
	{
	"cell_type": "markdown",
	"id": "b8e5ee0c",
	"metadata": {},
	"source": [
	"Calculates the mean for each month across all the years, ignoring zeros, and saves the results in a new CSV file for each pheno phase"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"id": "066f373b",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Mean data for sos saved to data_mmr_sos_mean.csv\n",
	"Mean data for mos saved to data_mmr_mos_mean.csv\n",
	"Mean data for eos saved to data_mmr_eos_mean.csv\n"
	]
	}
	],
	"source": [
	"import pandas as pd\n",
	"import os\n",
	"import numpy as np\n",
	"from tqdm import tqdm\n",
	"\n",
	"phenos = [\"sos\", \"mos\", \"eos\"]\n",
	"\n",
	"def mean_ignore_zeros(arr):\n",
	" arr_no_zeros = arr[arr != 0]\n",
	" return np.mean(arr_no_zeros)\n",
	"\n",
	"for pheno in phenos:\n",
	" input_file = f\"data_mmr_{pheno}.csv\"\n",
	" \n",
	" if os.path.exists(input_file):\n",
	" data = pd.read_csv(input_file)\n",
	" mean_data = pd.DataFrame()\n",
	" \n",
	" mean_data[\"DT_PCODE\"] = data[\"DT_PCODE\"]\n",
	"\n",
	" for month in range(1, 13):\n",
	" # Get the columns for the same month across all years\n",
	" month_columns = [f\"{month}_{year}\" for year in range(2003, 2022)]\n",
	" month_data = data[month_columns]\n",
	" \n",
	" # Calculate the mean ignoring zeros\n",
	" mean_month_data = month_data.apply(mean_ignore_zeros, axis=1)\n",
	" \n",
	" # Add the mean data to the mean_data dataframe with the corresponding month name\n",
	" mean_data[f\"{month:02d}\"] = mean_month_data\n",
	" \n",
	" mean_data.to_csv(f\"data_mmr_{pheno}_mean.csv\", index=False)\n",
	" print(f\"Mean data for {pheno} saved to data_mmr_{pheno}_mean.csv\")\n",
	" else:\n",
	" print(f\"File {input_file} not found.\")\n"
	]
	},
	{
	"cell_type": "markdown",
	"id": "02ac7c0c",
	"metadata": {},
	"source": [
	"Calculates the ratio anomaly for each pheno phase using the input data from data_syr_{pheno}.csv and data_syr_{pheno}_mean.csv, and saves the results in a new CSV file"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"id": "f4ad9a80",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Ratio anomaly data for sos saved to data_mmr_sos_ratio_anomaly.csv\n",
	"Ratio anomaly data for mos saved to data_mmr_mos_ratio_anomaly.csv\n",
	"Ratio anomaly data for eos saved to data_mmr_eos_ratio_anomaly.csv\n"
	]
	}
	],
	"source": [
	"import pandas as pd\n",
	"import os\n",
	"from tqdm import tqdm\n",
	"\n",
	"phenos = [\"sos\", \"mos\", \"eos\"]\n",
	"\n",
	"for pheno in phenos:\n",
	" data_file = f\"data_mmr_{pheno}.csv\"\n",
	" mean_file = f\"data_mmr_{pheno}_mean.csv\"\n",
	" \n",
	" if os.path.exists(data_file) and os.path.exists(mean_file):\n",
	" data = pd.read_csv(data_file)\n",
	" mean_data = pd.read_csv(mean_file)\n",
	" \n",
	" ratio_anomaly_data = [data[[\"DT_PCODE\"]].copy()]\n",
	" \n",
	" for year in range(2003, 2024):\n",
	" for month in range(1, 13):\n",
	" data_col = f\"{month}_{year}\"\n",
	" mean_col = f\"{month:02d}\"\n",
	" \n",
	" ratio_anomaly = 100 * data[data_col] / mean_data[mean_col]\n",
	" ratio_anomaly_data.append(pd.DataFrame(ratio_anomaly, columns=[data_col]))\n",
	" \n",
	" ratio_anomaly_data = pd.concat(ratio_anomaly_data, axis=1)\n",
	" ratio_anomaly_data.to_csv(f\"data_mmr_{pheno}_ratio_anomaly.csv\", index=False)\n",
	" print(f\"Ratio anomaly data for {pheno} saved to data_mmr_{pheno}_ratio_anomaly.csv\")\n",
	" else:\n",
	" print(f\"Files {data_file} or {mean_file} not found.\")\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "6a0427ca",
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}