Skip to content

Instantly share code, notes, and snippets.

@ncalm
Last active April 9, 2023 15:33
Show Gist options
  • Select an option

  • Save ncalm/aab7c59371d70e4668cf9d3b6738d338 to your computer and use it in GitHub Desktop.

Select an option

Save ncalm/aab7c59371d70e4668cf9d3b6738d338 to your computer and use it in GitHub Desktop.
Python solution to Excel BI PQ68 challenge
import pandas as pd
import calendar
url_raw = 'https://onedrive.live.com/view.aspx?resid=E11B26EEAACB7947!8228&ithint=file%2cxlsx&authkey=!AOypSEkMdUjdBN0'
url = url_raw.replace('view.aspx','download')
df = pd.read_excel(url, sheet_name = 'Sheet1')
def combine_dicts(dicts):
result = {}
for d in dicts:
result |= d
return dict(sorted(result.items()))
df_output = (
df.iloc[:, :4]
.assign(
BD=lambda x: x.apply(lambda row: pd.date_range(row['From'], row['To'], freq='B').date, axis=1),
Months=lambda x: x.apply(lambda row: {date.month: sum(date.month == d.month for d in row['BD']) for date in row['BD']}, axis=1)
)
.drop(['From', 'To', 'BD'], axis=1)
.groupby('Company').agg({'Months': combine_dicts, 'Sales': 'sum'})
.reset_index().set_index(['Company', 'Sales'])['Months'].apply(pd.Series)
.rename(columns=calendar.month_abbr.__getitem__).reset_index()
.assign(**{'Row Total': lambda x: x.loc[:, 'Jan':'Dec'].sum(axis=1)})
)
df_output.loc['Total'] = df_output.sum(numeric_only=True)
df_output.at['Total', 'Company'] = 'Column Total'
print(df_output.to_string(index=False))
import pandas as pd
import calendar
url_raw = 'https://onedrive.live.com/view.aspx?resid=E11B26EEAACB7947!8228&ithint=file%2cxlsx&authkey=!AOypSEkMdUjdBN0'
url = url_raw.replace('view.aspx','download')
df = pd.read_excel(url, sheet_name = 'Sheet1')
def combine_dicts(dicts):
result = {}
for d in dicts:
result |= d
return dict(sorted(result.items()))
df_output = (
df.iloc[:, :4]
# first create a list of business days between From and To
# then create a dictionary where the key is the month number and the value is the count of business days in that month
.assign(
BD=lambda x: x.apply(lambda row: pd.date_range(row['From'], row['To'], freq='B').date, axis=1),
Months=lambda x: x.apply(lambda row: {date.month: sum(date.month == d.month for d in row['BD']) for date in row['BD']}, axis=1)
)
# don't need these columns any more
.drop(['From', 'To', 'BD'], axis=1)
# combine the dictionaries on each company row and sum the sales for each company
.groupby('Company').agg({'Months': combine_dicts, 'Sales': 'sum'})
# pd.Series here converts each dictionary into a Series where the key becomes the index of the series
# and the values of the dictionary become the values of the series
# pd.Series is applied to the column of dictionaries to create one series per unique key
# these are then combined into data frame columns
.reset_index().set_index(['Company', 'Sales'])['Months'].apply(pd.Series)
# convert the numeric month column headers to the three letter abbr. of the month (e.g. 1=Jan)
.rename(columns=calendar.month_abbr.__getitem__).reset_index()
# create the row total column by summing the month columns
.assign(**{'Row Total': lambda x: x.loc[:, 'Jan':'Dec'].sum(axis=1)})
)
# create the column total row
df_output.loc['Total'] = df_output.sum(numeric_only=True)
df_output.at['Total', 'Company'] = 'Column Total'
print(df_output.to_string(index=False))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment