ncalm · April 8, 2023 15:24
diff --git a/excelbi-pq67.py b/excelbi-pq67.py
 import pandas as pd

 url = 'https://onedrive.live.com/download?resid=E11B26EEAACB7947!8221&ithint=file%2cxlsx&authkey=!AB0T-fSTNVawR3g'
 df = pd.read_excel(url, sheet_name = 'Sheet1')

 # fills down a column in a dataframe with the first non-blank value above
 # used for House and Name, so defined as a function here
 fill_down = lambda column : lambda df: df[column].ffill()

 df_pivot = (df[['Data1','Data2']]
            .pivot(columns='Data1', values='Data2')
            .rename(columns={'House No.': 'House'}) 
            .assign(House=fill_down('House'))

            # Drops rows where the only non-blank column is House
            .dropna(subset=['Gender', 'Age', 'Profession', 'Name'], how='all')
            .assign(Name=fill_down('Name'))

            # first() uses the first non-blank value in each of the non-group index columns
            # effectively collapses the gender, age and profession into a single row
            .groupby(['House','Name'], as_index=False).first()
            .reindex(columns=['House', 'Name', 'Gender', 'Age', 'Profession']))
 print(df_pivot.to_string(index=False))
	import pandas as pd

	url = 'https://onedrive.live.com/download?resid=E11B26EEAACB7947!8221&ithint=file%2cxlsx&authkey=!AB0T-fSTNVawR3g'
	df = pd.read_excel(url, sheet_name = 'Sheet1')

	# fills down a column in a dataframe with the first non-blank value above
	# used for House and Name, so defined as a function here
	fill_down = lambda column : lambda df: df[column].ffill()

	df_pivot = (df[['Data1','Data2']]
	.pivot(columns='Data1', values='Data2')
	.rename(columns={'House No.': 'House'})
	.assign(House=fill_down('House'))

	# Drops rows where the only non-blank column is House
	.dropna(subset=['Gender', 'Age', 'Profession', 'Name'], how='all')
	.assign(Name=fill_down('Name'))

	# first() uses the first non-blank value in each of the non-group index columns
	# effectively collapses the gender, age and profession into a single row
	.groupby(['House','Name'], as_index=False).first()
	.reindex(columns=['House', 'Name', 'Gender', 'Age', 'Profession']))
	print(df_pivot.to_string(index=False))
No results found