Skip to content

Instantly share code, notes, and snippets.

@shimo164
Created January 3, 2025 08:54
Show Gist options
  • Save shimo164/9e72f6dffebce31f6503c1f529da4e1e to your computer and use it in GitHub Desktop.
Save shimo164/9e72f6dffebce31f6503c1f529da4e1e to your computer and use it in GitHub Desktop.
Python script to extract the part of csv columns
"""
Python script to extract the part of csv columns
Params:
input_file: str : file path for original cost-explore report csv
output_file: str : file path for extracted csv
columns_to_keep: list[str] : list of columns to extract
"""
import sys
import pandas as pd
# Define the input and output file paths
input_file = "/path/to/my-daily-report-00001.csv"
output_file = "/path/to/Downloads/my-daily-report-00001-extracted.csv"
# Specify the columns you want to keep
columns_to_keep = [
"lineItem/UsageStartDate",
"lineItem/UsageEndDate",
"lineItem/ProductCode",
"lineItem/UsageType",
"lineItem/Operation",
"lineItem/AvailabilityZone",
"lineItem/ResourceId",
"lineItem/UsageAmount",
"lineItem/NormalizationFactor",
"lineItem/NormalizedUsageAmount",
"lineItem/CurrencyCode",
"lineItem/BlendedCost",
"lineItem/LineItemDescription",
]
# ignore mixed dtype error with low_memory disabled
df = pd.read_csv(input_file, low_memory=False)
# Check if all target columns are present in the DataFrame
missing_columns = [col for col in columns_to_keep if col not in df.columns]
if missing_columns:
print(f"Error: The following columns are missing in the input file: {missing_columns}")
sys.exit()
df_extracted = df[columns_to_keep]
# Save the extracted DataFrame to a new CSV file without the index
df_extracted.to_csv(output_file, index=False)
print(f"Extraction complete. Saved to: {output_file}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment