Created
April 8, 2023 15:24
-
-
Save ncalm/12da3e494ad33b13fa351e74ef8541cd to your computer and use it in GitHub Desktop.
Excel BI PQ67 in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| url = 'https://onedrive.live.com/download?resid=E11B26EEAACB7947!8221&ithint=file%2cxlsx&authkey=!AB0T-fSTNVawR3g' | |
| df = pd.read_excel(url, sheet_name = 'Sheet1') | |
| # fills down a column in a dataframe with the first non-blank value above | |
| # used for House and Name, so defined as a function here | |
| fill_down = lambda column : lambda df: df[column].ffill() | |
| df_pivot = (df[['Data1','Data2']] | |
| .pivot(columns='Data1', values='Data2') | |
| .rename(columns={'House No.': 'House'}) | |
| .assign(House=fill_down('House')) | |
| # Drops rows where the only non-blank column is House | |
| .dropna(subset=['Gender', 'Age', 'Profession', 'Name'], how='all') | |
| .assign(Name=fill_down('Name')) | |
| # first() uses the first non-blank value in each of the non-group index columns | |
| # effectively collapses the gender, age and profession into a single row | |
| .groupby(['House','Name'], as_index=False).first() | |
| .reindex(columns=['House', 'Name', 'Gender', 'Age', 'Profession'])) | |
| print(df_pivot.to_string(index=False)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment