Skip to content

Instantly share code, notes, and snippets.

@major
Created May 25, 2023 19:24
Show Gist options
  • Save major/888692cfd7fae59d102149fc37c05d2b to your computer and use it in GitHub Desktop.
Save major/888692cfd7fae59d102149fc37c05d2b to your computer and use it in GitHub Desktop.
EC2 Linux pricing into CSV
#!/usr/bin/env python
import json
import pandas as pd
df = pd.read_csv(
"us-east-1.csv",
header=5,
usecols=[
"Instance Type",
"vCPU",
"TermType",
"PricePerUnit",
"Tenancy",
"Operating System",
"CapacityStatus",
"Pre Installed S/W",
"License Model",
],
)
# Filter it down to just shared, on-demand, no license required, and no extra software.
df = (
df.loc[
(df["Operating System"].isna() == False)
& (df["Tenancy"] == "Shared")
& (df["TermType"] == "OnDemand")
& (df["CapacityStatus"] == "Used")
& (df["Pre Installed S/W"].isna())
& (df["License Model"] == "No License required")
& (df["Operating System"] != "Red Hat Enterprise Linux with HA")
]
.sort_values(by=["Instance Type"], ascending=True)
.reset_index(drop=True)
)
# Create a very basic view.
df = df[["Instance Type", "vCPU", "PricePerUnit", "Operating System"]]
# Get a list of operating systems.
operating_systems = df["Operating System"].unique()
os_dataframes = {}
for os in operating_systems:
print(os)
df_os = df.loc[(df["Operating System"] == os)]
os_string = os.replace(" ", "_").lower()
df_os = df_os.drop(columns=["vCPU", "Operating System"])
df_os = df_os.rename(columns={"PricePerUnit": f"{os_string}_per_hour"})
os_dataframes[os] = df_os
# Start with a base dataframe to join up the other dataframes.
base_df = df[["Instance Type", "vCPU"]].drop_duplicates()
for os, os_df in os_dataframes.items():
base_df = base_df.merge(os_df, on="Instance Type", how="outer")
base_df.to_csv("all_os_pricing.csv", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment