Last active
April 9, 2023 15:33
-
-
Save ncalm/aab7c59371d70e4668cf9d3b6738d338 to your computer and use it in GitHub Desktop.
Python solution to Excel BI PQ68 challenge
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import calendar | |
| url_raw = 'https://onedrive.live.com/view.aspx?resid=E11B26EEAACB7947!8228&ithint=file%2cxlsx&authkey=!AOypSEkMdUjdBN0' | |
| url = url_raw.replace('view.aspx','download') | |
| df = pd.read_excel(url, sheet_name = 'Sheet1') | |
| def combine_dicts(dicts): | |
| result = {} | |
| for d in dicts: | |
| result |= d | |
| return dict(sorted(result.items())) | |
| df_output = ( | |
| df.iloc[:, :4] | |
| .assign( | |
| BD=lambda x: x.apply(lambda row: pd.date_range(row['From'], row['To'], freq='B').date, axis=1), | |
| Months=lambda x: x.apply(lambda row: {date.month: sum(date.month == d.month for d in row['BD']) for date in row['BD']}, axis=1) | |
| ) | |
| .drop(['From', 'To', 'BD'], axis=1) | |
| .groupby('Company').agg({'Months': combine_dicts, 'Sales': 'sum'}) | |
| .reset_index().set_index(['Company', 'Sales'])['Months'].apply(pd.Series) | |
| .rename(columns=calendar.month_abbr.__getitem__).reset_index() | |
| .assign(**{'Row Total': lambda x: x.loc[:, 'Jan':'Dec'].sum(axis=1)}) | |
| ) | |
| df_output.loc['Total'] = df_output.sum(numeric_only=True) | |
| df_output.at['Total', 'Company'] = 'Column Total' | |
| print(df_output.to_string(index=False)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import calendar | |
| url_raw = 'https://onedrive.live.com/view.aspx?resid=E11B26EEAACB7947!8228&ithint=file%2cxlsx&authkey=!AOypSEkMdUjdBN0' | |
| url = url_raw.replace('view.aspx','download') | |
| df = pd.read_excel(url, sheet_name = 'Sheet1') | |
| def combine_dicts(dicts): | |
| result = {} | |
| for d in dicts: | |
| result |= d | |
| return dict(sorted(result.items())) | |
| df_output = ( | |
| df.iloc[:, :4] | |
| # first create a list of business days between From and To | |
| # then create a dictionary where the key is the month number and the value is the count of business days in that month | |
| .assign( | |
| BD=lambda x: x.apply(lambda row: pd.date_range(row['From'], row['To'], freq='B').date, axis=1), | |
| Months=lambda x: x.apply(lambda row: {date.month: sum(date.month == d.month for d in row['BD']) for date in row['BD']}, axis=1) | |
| ) | |
| # don't need these columns any more | |
| .drop(['From', 'To', 'BD'], axis=1) | |
| # combine the dictionaries on each company row and sum the sales for each company | |
| .groupby('Company').agg({'Months': combine_dicts, 'Sales': 'sum'}) | |
| # pd.Series here converts each dictionary into a Series where the key becomes the index of the series | |
| # and the values of the dictionary become the values of the series | |
| # pd.Series is applied to the column of dictionaries to create one series per unique key | |
| # these are then combined into data frame columns | |
| .reset_index().set_index(['Company', 'Sales'])['Months'].apply(pd.Series) | |
| # convert the numeric month column headers to the three letter abbr. of the month (e.g. 1=Jan) | |
| .rename(columns=calendar.month_abbr.__getitem__).reset_index() | |
| # create the row total column by summing the month columns | |
| .assign(**{'Row Total': lambda x: x.loc[:, 'Jan':'Dec'].sum(axis=1)}) | |
| ) | |
| # create the column total row | |
| df_output.loc['Total'] = df_output.sum(numeric_only=True) | |
| df_output.at['Total', 'Company'] = 'Column Total' | |
| print(df_output.to_string(index=False)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment