bennyistanto · October 23, 2023 15:19
diff --git a/Join_from_ZonalStats_NDVI.ipynb b/Join_from_ZonalStats_NDVI.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "cd0c6a56",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing files for MIN: 100%|████████████████████████████████████| 1000/1000 [00:49<00:00, 20.06it/s]\n",
      "Processing files for MAX: 100%|████████████████████████████████████| 1000/1000 [00:46<00:00, 21.45it/s]\n",
      "Processing files for MEAN: 100%|███████████████████████████████████| 1000/1000 [00:43<00:00, 23.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "from simpledbf import Dbf5\n",
    "from tqdm import tqdm\n",
    "\n",
    "# ISO3 Country Code\n",
    "iso3 = \"syr\" # syr, tur, lbn, mmr\n",
    "\n",
    "# Directory containing the dbf files\n",
    "dbf_dir = f'/mnt/x/Temp/modis/{iso3}/gee/13_tables/zonal/evi_all/orig'\n",
    "\n",
    "# Path to the other CSV file\n",
    "csv_file = f'/mnt/x/Temp/modis/{iso3}/bnd/{iso3}_adm3_join.csv'\n",
    "\n",
    "# Load the other CSV file into a DataFrame\n",
    "base_csv_df = pd.read_csv(csv_file, sep=';')\n",
    "\n",
    "# Get a list of all dbf files in the directory\n",
    "dbf_files = [f for f in os.listdir(dbf_dir) if f.endswith('.dbf')]\n",
    "\n",
    "def process_files(column_name):\n",
    "    # Create an empty DataFrame to store the merged data\n",
    "    merged_df = pd.DataFrame()\n",
    "\n",
    "    # Iterate over each file in the directory\n",
    "    for filename in tqdm(dbf_files, desc=f\"Processing files for {column_name}\"):\n",
    "        # Extract the date from the filename\n",
    "        date = filename.rsplit('_', 1)[-1].rsplit('.', 1)[0]\n",
    "\n",
    "        # Load the dbf file into a DataFrame\n",
    "        dbf = Dbf5(os.path.join(dbf_dir, filename))\n",
    "        df = dbf.to_dataframe()\n",
    "\n",
    "        # Rename the column to the extracted date\n",
    "        df = df.rename(columns={column_name: date})\n",
    "\n",
    "        # Adjust the key field name for joining purpose\n",
    "        df = df[['ADM3_PCODE', date]] \n",
    "\n",
    "        # If the merged DataFrame is empty, copy the current DataFrame to it\n",
    "        if merged_df.empty:\n",
    "            merged_df = df.copy()\n",
    "        else:\n",
    "            # Otherwise, merge the current DataFrame with the merged DataFrame\n",
    "            merged_df = pd.merge(merged_df, df, on='ADM3_PCODE', how='outer')\n",
    "\n",
    "    # Join the merged DataFrame with the other CSV DataFrame based on 'ADM3_PCODE' columns\n",
    "    final_df = pd.merge(base_csv_df, merged_df, on='ADM3_PCODE', how='left')  # Change 'inner' to 'left'\n",
    "\n",
    "    # Replace NaN values with an empty string\n",
    "    final_df.fillna('', inplace=True)\n",
    "\n",
    "    # Save the final DataFrame to a csv file\n",
    "    final_df.to_csv(f'/mnt/x/Temp/modis/{iso3}/gee/13_tables/zonal/csv/8day/{iso3}_phy_evi_all_{column_name}_mxd13q1_adm3.csv', index=False)\n",
    "\n",
    "# Call the function for each of the three columns\n",
    "process_files('MIN')\n",
    "process_files('MAX')\n",
    "process_files('MEAN')\n",
    "\n",
    "print('Completed')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "8dca9846",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processing files for MIN: 100%|████████████████████████████████████| 1000/1000 [00:46<00:00, 21.72it/s]\n",
      "Processing files for MAX: 100%|████████████████████████████████████| 1000/1000 [00:47<00:00, 20.89it/s]\n",
      "Processing files for MEAN: 100%|███████████████████████████████████| 1000/1000 [00:51<00:00, 19.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Completed\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "from simpledbf import Dbf5\n",
    "from tqdm import tqdm\n",
    "\n",
    "# ISO3 Country Code\n",
    "iso3 = \"syr\" # syr, tur, lbn, mmr\n",
    "\n",
    "# Directory containing the dbf files\n",
    "dbf_dir = f'/mnt/x/Temp/modis/{iso3}/gee/13_tables/zonal/evi_cropland/orig'\n",
    "\n",
    "# Path to the other CSV file\n",
    "csv_file = f'/mnt/x/Temp/modis/{iso3}/bnd/{iso3}_adm3_join.csv'\n",
    "\n",
    "# Load the other CSV file into a DataFrame\n",
    "base_csv_df = pd.read_csv(csv_file, sep=';')\n",
    "\n",
    "# Get a list of all dbf files in the directory\n",
    "dbf_files = [f for f in os.listdir(dbf_dir) if f.endswith('.dbf')]\n",
    "\n",
    "def process_files(column_name):\n",
    "    # Create an empty DataFrame to store the merged data\n",
    "    merged_df = pd.DataFrame()\n",
    "\n",
    "    # Iterate over each file in the directory\n",
    "    for filename in tqdm(dbf_files, desc=f\"Processing files for {column_name}\"):\n",
    "        # Extract the date from the filename\n",
    "        date = filename.rsplit('_', 1)[-1].rsplit('.', 1)[0]\n",
    "\n",
    "        # Load the dbf file into a DataFrame\n",
    "        dbf = Dbf5(os.path.join(dbf_dir, filename))\n",
    "        df = dbf.to_dataframe()\n",
    "\n",
    "        # Rename the column to the extracted date\n",
    "        df = df.rename(columns={column_name: date})\n",
    "\n",
    "        # Adjust the key field name for joining purpose\n",
    "        df = df[['ADM3_PCODE', date]] \n",
    "\n",
    "        # If the merged DataFrame is empty, copy the current DataFrame to it\n",
    "        if merged_df.empty:\n",
    "            merged_df = df.copy()\n",
    "        else:\n",
    "            # Otherwise, merge the current DataFrame with the merged DataFrame\n",
    "            merged_df = pd.merge(merged_df, df, on='ADM3_PCODE', how='outer')\n",
    "\n",
    "    # Join the merged DataFrame with the other CSV DataFrame based on 'ADM3_PCODE' columns\n",
    "    final_df = pd.merge(base_csv_df, merged_df, on='ADM3_PCODE', how='left')  # Change 'inner' to 'left'\n",
    "\n",
    "    # Replace NaN values with an empty string\n",
    "    final_df.fillna('', inplace=True)\n",
    "\n",
    "    # Save the final DataFrame to a csv file\n",
    "    final_df.to_csv(f'/mnt/x/Temp/modis/{iso3}/gee/13_tables/zonal/csv/8day/{iso3}_phy_evi_cropland_{column_name}_mxd13q1_adm3.csv', index=False)\n",
    "\n",
    "# Call the function for each of the three columns\n",
    "process_files('MIN')\n",
    "process_files('MAX')\n",
    "process_files('MEAN')\n",
    "\n",
    "print('Completed')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1106579",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 34,
	"id": "cd0c6a56",
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Processing files for MIN: 100%\|████████████████████████████████████\| 1000/1000 [00:49<00:00, 20.06it/s]\n",
	"Processing files for MAX: 100%\|████████████████████████████████████\| 1000/1000 [00:46<00:00, 21.45it/s]\n",
	"Processing files for MEAN: 100%\|███████████████████████████████████\| 1000/1000 [00:43<00:00, 23.23it/s]\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Completed\n"
	]
	}
	],
	"source": [
	"import os\n",
	"import pandas as pd\n",
	"from simpledbf import Dbf5\n",
	"from tqdm import tqdm\n",
	"\n",
	"# ISO3 Country Code\n",
	"iso3 = \"syr\" # syr, tur, lbn, mmr\n",
	"\n",
	"# Directory containing the dbf files\n",
	"dbf_dir = f'/mnt/x/Temp/modis/{iso3}/gee/13_tables/zonal/evi_all/orig'\n",
	"\n",
	"# Path to the other CSV file\n",
	"csv_file = f'/mnt/x/Temp/modis/{iso3}/bnd/{iso3}_adm3_join.csv'\n",
	"\n",
	"# Load the other CSV file into a DataFrame\n",
	"base_csv_df = pd.read_csv(csv_file, sep=';')\n",
	"\n",
	"# Get a list of all dbf files in the directory\n",
	"dbf_files = [f for f in os.listdir(dbf_dir) if f.endswith('.dbf')]\n",
	"\n",
	"def process_files(column_name):\n",
	" # Create an empty DataFrame to store the merged data\n",
	" merged_df = pd.DataFrame()\n",
	"\n",
	" # Iterate over each file in the directory\n",
	" for filename in tqdm(dbf_files, desc=f\"Processing files for {column_name}\"):\n",
	" # Extract the date from the filename\n",
	" date = filename.rsplit('_', 1)[-1].rsplit('.', 1)[0]\n",
	"\n",
	" # Load the dbf file into a DataFrame\n",
	" dbf = Dbf5(os.path.join(dbf_dir, filename))\n",
	" df = dbf.to_dataframe()\n",
	"\n",
	" # Rename the column to the extracted date\n",
	" df = df.rename(columns={column_name: date})\n",
	"\n",
	" # Adjust the key field name for joining purpose\n",
	" df = df[['ADM3_PCODE', date]] \n",
	"\n",
	" # If the merged DataFrame is empty, copy the current DataFrame to it\n",
	" if merged_df.empty:\n",
	" merged_df = df.copy()\n",
	" else:\n",
	" # Otherwise, merge the current DataFrame with the merged DataFrame\n",
	" merged_df = pd.merge(merged_df, df, on='ADM3_PCODE', how='outer')\n",
	"\n",
	" # Join the merged DataFrame with the other CSV DataFrame based on 'ADM3_PCODE' columns\n",
	" final_df = pd.merge(base_csv_df, merged_df, on='ADM3_PCODE', how='left') # Change 'inner' to 'left'\n",
	"\n",
	" # Replace NaN values with an empty string\n",
	" final_df.fillna('', inplace=True)\n",
	"\n",
	" # Save the final DataFrame to a csv file\n",
	" final_df.to_csv(f'/mnt/x/Temp/modis/{iso3}/gee/13_tables/zonal/csv/8day/{iso3}_phy_evi_all_{column_name}_mxd13q1_adm3.csv', index=False)\n",
	"\n",
	"# Call the function for each of the three columns\n",
	"process_files('MIN')\n",
	"process_files('MAX')\n",
	"process_files('MEAN')\n",
	"\n",
	"print('Completed')\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 35,
	"id": "8dca9846",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Processing files for MIN: 100%\|████████████████████████████████████\| 1000/1000 [00:46<00:00, 21.72it/s]\n",
	"Processing files for MAX: 100%\|████████████████████████████████████\| 1000/1000 [00:47<00:00, 20.89it/s]\n",
	"Processing files for MEAN: 100%\|███████████████████████████████████\| 1000/1000 [00:51<00:00, 19.47it/s]\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Completed\n"
	]
	}
	],
	"source": [
	"import os\n",
	"import pandas as pd\n",
	"from simpledbf import Dbf5\n",
	"from tqdm import tqdm\n",
	"\n",
	"# ISO3 Country Code\n",
	"iso3 = \"syr\" # syr, tur, lbn, mmr\n",
	"\n",
	"# Directory containing the dbf files\n",
	"dbf_dir = f'/mnt/x/Temp/modis/{iso3}/gee/13_tables/zonal/evi_cropland/orig'\n",
	"\n",
	"# Path to the other CSV file\n",
	"csv_file = f'/mnt/x/Temp/modis/{iso3}/bnd/{iso3}_adm3_join.csv'\n",
	"\n",
	"# Load the other CSV file into a DataFrame\n",
	"base_csv_df = pd.read_csv(csv_file, sep=';')\n",
	"\n",
	"# Get a list of all dbf files in the directory\n",
	"dbf_files = [f for f in os.listdir(dbf_dir) if f.endswith('.dbf')]\n",
	"\n",
	"def process_files(column_name):\n",
	" # Create an empty DataFrame to store the merged data\n",
	" merged_df = pd.DataFrame()\n",
	"\n",
	" # Iterate over each file in the directory\n",
	" for filename in tqdm(dbf_files, desc=f\"Processing files for {column_name}\"):\n",
	" # Extract the date from the filename\n",
	" date = filename.rsplit('_', 1)[-1].rsplit('.', 1)[0]\n",
	"\n",
	" # Load the dbf file into a DataFrame\n",
	" dbf = Dbf5(os.path.join(dbf_dir, filename))\n",
	" df = dbf.to_dataframe()\n",
	"\n",
	" # Rename the column to the extracted date\n",
	" df = df.rename(columns={column_name: date})\n",
	"\n",
	" # Adjust the key field name for joining purpose\n",
	" df = df[['ADM3_PCODE', date]] \n",
	"\n",
	" # If the merged DataFrame is empty, copy the current DataFrame to it\n",
	" if merged_df.empty:\n",
	" merged_df = df.copy()\n",
	" else:\n",
	" # Otherwise, merge the current DataFrame with the merged DataFrame\n",
	" merged_df = pd.merge(merged_df, df, on='ADM3_PCODE', how='outer')\n",
	"\n",
	" # Join the merged DataFrame with the other CSV DataFrame based on 'ADM3_PCODE' columns\n",
	" final_df = pd.merge(base_csv_df, merged_df, on='ADM3_PCODE', how='left') # Change 'inner' to 'left'\n",
	"\n",
	" # Replace NaN values with an empty string\n",
	" final_df.fillna('', inplace=True)\n",
	"\n",
	" # Save the final DataFrame to a csv file\n",
	" final_df.to_csv(f'/mnt/x/Temp/modis/{iso3}/gee/13_tables/zonal/csv/8day/{iso3}_phy_evi_cropland_{column_name}_mxd13q1_adm3.csv', index=False)\n",
	"\n",
	"# Call the function for each of the three columns\n",
	"process_files('MIN')\n",
	"process_files('MAX')\n",
	"process_files('MEAN')\n",
	"\n",
	"print('Completed')\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "c1106579",
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}