Created
June 23, 2022 05:06
-
-
Save sjtalkar/fdf38dbb198701997d630390ea9653b6 to your computer and use it in GitHub Desktop.
Use List.Generate to loop through columns in a table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let | |
Source = Pdf.Tables(File.Contents("G:\My Drive\BBSI\Paychex 2022-01-28 Payroll_Journal_Wed_Jan_26_14_02_00_EST_2022.pdf"), [Implementation="1.3"]), | |
this_page = Source{[Id="Page015"]}[Data], | |
//There is consistently two header and two footer rows that need to be removed | |
remove_botttom_three = Table.RemoveLastN(this_page,3), | |
remove_wccode = Table.ReplaceValue( remove_botttom_three, each [Column1], each if Text.StartsWith([Column1],"(") then null else [Column1] , Replacer.ReplaceValue,{"Column1"} ), | |
remove_title_and_company_name = Table.Skip(remove_wccode,2), | |
replace_with_blanks_for_totals = Table.ReplaceValue(remove_title_and_company_name,null,"",Replacer.ReplaceValue,{"Column1", "Column4", "Column5", "Column6"}), | |
//Remove Rows pattern | |
identify_companytotal_rows_to_remove = Table.AddColumn(replace_with_blanks_for_totals, "ROWS_TO_REMOVE", each if Text.StartsWith([Column1], "COMPANY") then "COMPANY" else null), | |
fill_company_totals_down = Table.FillDown(identify_companytotal_rows_to_remove,{"ROWS_TO_REMOVE"}), | |
remove_company_total_row = Table.SelectRows(fill_company_totals_down, each [ROWS_TO_REMOVE] = null), | |
remove_comptot_rows_meta = Table.RemoveColumns(remove_company_total_row,{"ROWS_TO_REMOVE"}), | |
identify_tot_rows_to_remove = Table.AddColumn(remove_comptot_rows_meta, "ROWS_TO_REMOVE", each if Text.Contains([Column4], "EMPLOYEE") | |
or Text.Contains([Column5], "EMPLOYEE") | |
or Text.Contains([Column6], "EMPLOYEE") | |
then "REMOVE ROW" | |
else "KEEP ROW"), | |
remove_employee_total_row = Table.SelectRows(identify_tot_rows_to_remove, each ([ROWS_TO_REMOVE] = "KEEP ROW")), | |
remove_tot_rows_meta = Table.RemoveColumns(remove_employee_total_row,{"ROWS_TO_REMOVE"}), | |
replace_ID_header = Table.ReplaceValue(remove_tot_rows_meta,"ID","",Replacer.ReplaceText,{"Column1"}), | |
recognize_company_end = Table.ReplaceValue(replace_ID_header,"TOTAL"," END_COMPANY ",Replacer.ReplaceText,{"Column1"}), | |
recognize_start_company = Table.ReplaceValue(recognize_company_end,"***","START_COMPANY ",Replacer.ReplaceText,{"Column1"}), | |
replace_nulls_with_blanks = Table.ReplaceValue(recognize_start_company,null,"",Replacer.ReplaceValue,{"Column1", "Column3", "Column18", "Column4"}), | |
identify_row_to_keep = Table.AddColumn(replace_nulls_with_blanks, "ROWS_TO_REMOVE", each if | |
Text.Contains([Column1], "**") | |
or Text.Contains([Column1], "TOTAL") | |
or Text.Contains([Column4], "EMPLOYEE") | |
or Text.Contains([Column3], "TOTAL") | |
or Text.Contains([Column1], "Person") | |
or Text.Contains([Column1], "Transaction") | |
or Text.Contains([Column1], "Run Date") | |
or Text.Contains([Column18], "LIABILITY") | |
then "REMOVE ROW" | |
else "KEEP ROW"), | |
keep_identified_rows = Table.SelectRows(identify_row_to_keep, each ([ROWS_TO_REMOVE] = "KEEP ROW")), | |
remove_keep_rows_meta = Table.RemoveColumns(keep_identified_rows,{"ROWS_TO_REMOVE"}), | |
add_index_for_self_merge_1 = Table.AddIndexColumn(remove_keep_rows_meta, "Index", 0, 1, Int64.Type), | |
add_index_for_self_merge_2 = Table.AddIndexColumn(add_index_for_self_merge_1, "Index.1", 1, 1, Int64.Type), | |
self_merge = Table.NestedJoin(add_index_for_self_merge_2, {"Index.1"}, add_index_for_self_merge_2, {"Index"}, "add_index_for_self_merge_2", JoinKind.LeftOuter), | |
extract_employee_id = Table.ExpandTableColumn(self_merge, "add_index_for_self_merge_2", {"Column1"}, {"add_index_for_self_merge_2.Column1"}), | |
create_empname_column = Table.AddColumn(extract_employee_id, "EMPLOYEE_NAME", each Text.Select([Column1], {"A".."Z", "a".."z", ",", "-"})), | |
identify_id_column = Table.RenameColumns(create_empname_column,{{"add_index_for_self_merge_2.Column1", "IDENTIFY_ID"}}), | |
remove_id_remnants = Table.ReplaceValue( identify_id_column, each [EMPLOYEE_NAME], each if Text.Length([EMPLOYEE_NAME]) <= 1 then null else [EMPLOYEE_NAME], Replacer.ReplaceValue, {"EMPLOYEE_NAME"}), | |
prepare_for_fill_down = Table.ReplaceValue(remove_id_remnants,"",null,Replacer.ReplaceValue,{"EMPLOYEE_NAME", "IDENTIFY_ID"}), | |
fill_values_down = Table.FillDown(prepare_for_fill_down,{"EMPLOYEE_NAME", "IDENTIFY_ID"}), | |
// Remove rows relating to company total information | |
identify_company_rows = Table.AddColumn(fill_values_down, "ROWS_TO_REMOVE", each if Text.Contains([EMPLOYEE_NAME], "COMPANY") then "REMOVE ROW" else "KEEP ROW"), | |
remove_company_totals = Table.SelectRows(identify_company_rows, each ([ROWS_TO_REMOVE] = "KEEP ROW")), | |
remove_company_meta = Table.RemoveColumns(remove_company_totals,{"ROWS_TO_REMOVE"}), | |
create_empid_column = Table.AddColumn(remove_company_meta, "ID", each if [EMPLOYEE_NAME] = "EMPLOYEENAME" or Text.Contains([IDENTIFY_ID], "COMPANY") or Text.Contains([IDENTIFY_ID], ",") then null | |
else [IDENTIFY_ID]), | |
replace_empid_blanks_with_null = Table.ReplaceValue(create_empid_column,"",null,Replacer.ReplaceValue,{"ID"}), | |
fill_empid_down = Table.FillDown(replace_empid_blanks_with_null,{"ID"}), | |
merge_name_id = Table.CombineColumns(fill_empid_down,{"EMPLOYEE_NAME", "ID"},Combiner.CombineTextByDelimiter("|", QuoteStyle.None),"EMPLOYEE_NAME_ID"), | |
remove_meta_columns = Table.RemoveColumns(merge_name_id,{"Index", "Index.1", "IDENTIFY_ID", "Column1"}), | |
//Move EMPLOYEE_NAME_ID to the beginning | |
with_empname_table_column_names = Table.ColumnNames(remove_meta_columns), | |
reorder_empname_list = List.Combine({{"EMPLOYEE_NAME_ID"},List.FirstN(with_empname_table_column_names,List.Count(with_empname_table_column_names)-1)}), | |
reordered_withemp_table = Table.ReorderColumns(remove_meta_columns,reorder_empname_list), | |
//Transpose the table | |
transpose_table = Table.Transpose(remove_meta_columns), | |
clean_columns = Table.ReplaceValue(transpose_table,null,"",Replacer.ReplaceValue,{"Column4", "Column1", "Column5", "Column6", "Column7", "Column8", "Column9" }), | |
clean_columns_column_names = Table.ColumnNames(clean_columns), | |
all_withholdings = Table.Column(WITHHOLDINGS, "WITHHOLDINGS"), | |
all_deductions = Table.Column(DEDUCTIONS, "DEDUCTIONS"), | |
all_net_pay_allocations = Table.Column(NET_PAY_ALLOCATIONS, "NET_PAY_ALLOCATIONS"), | |
withholdings_col_list = List.Generate( | |
() => [counter = 0, x = clean_columns_column_names, // x is an increasing list | |
y = all_withholdings , result="False"] , // y returns date | |
each [counter] < List.Count([x]), // for a 7 number sequence | |
each [counter=[counter]+1 , | |
x= clean_columns_column_names, | |
y=all_withholdings, | |
result= if List.ContainsAny(Table.Column(clean_columns, [x]{counter}), y) then "True" else "False" | |
], | |
each [result] ), | |
pos_of_withholding = if List.PositionOf( withholdings_col_list, "True") > 0 then Table.ColumnNames(clean_columns){List.PositionOf( withholdings_col_list, "True")} else "No Section", | |
deductions_col_list = List.Generate( | |
() => [counter = 0, x = clean_columns_column_names, // x is an increasing list | |
y = all_deductions , result="False"] , // y returns date | |
each [counter] < List.Count([x]), // for a 7 number sequence | |
each [counter=[counter]+1 , | |
x= clean_columns_column_names, | |
y=all_deductions, | |
result= if List.ContainsAny(Table.Column(clean_columns, [x]{counter}), y) then "True" else "False" | |
], | |
each [result] ), | |
pos_of_deductions = if List.PositionOf( deductions_col_list, "True") > 0 then Table.ColumnNames(clean_columns){List.PositionOf( deductions_col_list, "True")} else "No Section", | |
net_pay_allocations_col_list = List.Generate( | |
() => [counter = 0, x = clean_columns_column_names, // x is an increasing list | |
y = all_net_pay_allocations , result="False"] , // y returns date | |
each [counter] < List.Count([x]), // for a 7 number sequence | |
each [counter=[counter]+1 , | |
x= clean_columns_column_names, | |
y=all_net_pay_allocations, | |
result= if List.ContainsAny(Table.Column(clean_columns, [x]{counter}), y) then "True" else "False" | |
], | |
each [result] ), | |
pos_of_net_pay_allocations = if List.PositionOf( net_pay_allocations_col_list, "True") > 0 then Table.ColumnNames(clean_columns){List.PositionOf( net_pay_allocations_col_list, "True")} else "No Section", | |
// Identy a section by its possible subsections. This is required since a page can contain partial info for an individual and will not always start with Social Security | |
//Some pages are read with a lot of empty columns and so check further out | |
identify_sections = Table.AddColumn(clean_columns, "SET_SECTIONS", each | |
if pos_of_withholding <> "No Section" and List.AnyTrue( | |
List.Transform( | |
all_withholdings, | |
(listItem) => Text.Contains( | |
Record.Field(_, pos_of_withholding), | |
listItem, | |
Comparer.OrdinalIgnoreCase | |
) | |
) | |
) | |
then "WITHHOLDINGS" | |
else if pos_of_deductions <> "No Section" and List.AnyTrue( | |
List.Transform( | |
all_deductions, | |
(listItem) => Text.Contains( | |
Record.Field(_, pos_of_deductions), | |
listItem, | |
Comparer.OrdinalIgnoreCase | |
) | |
) | |
) | |
then "DEDUCTIONS" | |
else if pos_of_net_pay_allocations <> "No Section" and List.AnyTrue( | |
List.Transform( | |
all_net_pay_allocations, | |
(listItem) => Text.Contains( | |
Record.Field(_, pos_of_net_pay_allocations), | |
listItem, | |
Comparer.OrdinalIgnoreCase | |
) | |
) | |
) | |
then "NET PAY ALLOCATIONS" | |
else if Text.Contains([Column1], "EMPLOYEENAME") | |
then "EMPLOYEE_NAME_ID" | |
else null), | |
fill_section_down = Table.FillDown(identify_sections,{"SET_SECTIONS"}), | |
replace_hours_earnings = Table.ReplaceValue(fill_section_down,null,"HOURS_EARNINGS",Replacer.ReplaceValue,{"SET_SECTIONS"}), | |
replace_null_with_blanks = Table.ReplaceValue(replace_hours_earnings,null,"",Replacer.ReplaceValue,{"Column1","Column2"}), | |
//Identify columns that need to merge together with or without a pipe (for header:value) | |
identify_common_cols = Table.AddColumn(replace_null_with_blanks, "COMMON_SECTIONS", each if Text.Contains([Column2], "HOURS") and Text.Contains([SET_SECTIONS], "HOURS_EARNINGS") then "HOURS" | |
else if Text.Contains([Column2], "EARNINGS") and Text.Contains([SET_SECTIONS], "HOURS_EARNINGS") then "EARNINGS" | |
else if Text.Contains([Column2], "REIMB & OTHER") and Text.Contains([SET_SECTIONS], "HOURS_EARNINGS") then "REIMB & OTHER" | |
else if Text.Contains([Column2], "ALLOCATIONS") and Text.Contains([SET_SECTIONS], "NET PAY ALLOCATIONS") then "NET PAY ALLOCATIONS" | |
else [SET_SECTIONS]), | |
#"Replaced Value2" = Table.ReplaceValue(identify_common_cols,"HOURS_EARNINGS",null,Replacer.ReplaceValue,{"COMMON_SECTIONS"}), | |
fill_down = Table.FillDown(#"Replaced Value2",{"COMMON_SECTIONS"}), | |
merge_cols = Table.CombineColumns(fill_down,{"COMMON_SECTIONS", "Column3"},Combiner.CombineTextByDelimiter("", QuoteStyle.None),"MERGE_COMMON_SECTIONS"), | |
remove_meta = Table.RemoveColumns(merge_cols,{"Column1", "SET_SECTIONS", "Column2"}), | |
replaced_with_blank = Table.ReplaceValue(remove_meta,"",null,Replacer.ReplaceValue,{"MERGE_COMMON_SECTIONS"}), | |
filled_section_down = Table.FillDown(replaced_with_blank,{"MERGE_COMMON_SECTIONS"}), | |
//identify headers | |
identify_sections_headers = Table.AddColumn(filled_section_down, "SET_SECTION_HEADER", | |
each if Text.Contains([MERGE_COMMON_SECTIONS], "WITHHOLDINGS") and pos_of_withholding <> "No Section" and | |
List.AnyTrue( | |
List.Transform( | |
all_withholdings, | |
(listItem) => Text.Contains( | |
Record.Field(_, pos_of_withholding), | |
listItem, | |
Comparer.OrdinalIgnoreCase | |
) | |
) | |
) | |
then "WITHHOLDINGS HEADER" | |
else if Text.Contains([MERGE_COMMON_SECTIONS], "DEDUCTIONS") and pos_of_deductions <> "No Section" and | |
List.AnyTrue( | |
List.Transform( | |
all_deductions, | |
(listItem) => Text.Contains( | |
Record.Field(_, pos_of_deductions), | |
listItem, | |
Comparer.OrdinalIgnoreCase | |
) | |
) | |
) | |
then "DEDUCTIONS HEADER" | |
else if Text.Contains([MERGE_COMMON_SECTIONS], "ALLOCATION") and pos_of_net_pay_allocations <> "No Section" and | |
List.AnyTrue( | |
List.Transform( | |
all_net_pay_allocations, | |
(listItem) => Text.Contains( | |
Record.Field(_, pos_of_net_pay_allocations), | |
listItem, | |
Comparer.OrdinalIgnoreCase | |
) | |
) | |
) | |
then "NET PAY ALLOCATIONS HEADER" | |
else [MERGE_COMMON_SECTIONS]), | |
curr_table_column_names = Table.ColumnNames(identify_sections_headers), | |
reordered_list = List.Combine({{"SET_SECTION_HEADER"},List.FirstN(curr_table_column_names,List.Count(curr_table_column_names)-1)}), | |
reordered_table = Table.ReorderColumns(identify_sections_headers,reordered_list), | |
#"Removed Columns" = Table.RemoveColumns(reordered_table,{"MERGE_COMMON_SECTIONS"}), | |
rename_extra_top_row = Table.Skip(#"Removed Columns",1), | |
fill_desc_up = Table.FillUp(rename_extra_top_row,{"SET_SECTION_HEADER"}), | |
table_reflects_repeating_subsections = Table.Transpose(fill_desc_up), | |
#"Filtered Rows" = Table.SelectRows(table_reflects_repeating_subsections, each ([Column6] <> "OTAL") and ([Column19] <> "EmployerLiabilities")), | |
promote_headers = Table.PromoteHeaders(#"Filtered Rows", [PromoteAllScalars=true]), | |
//Determine Columns to merge | |
table_column_names = Table.ColumnNames(promote_headers), | |
last_element = List.Last(table_column_names,1), | |
rename_columns_1 = Table.RenameColumns(promote_headers,{{last_element, "EMPLOYEE_NAME_ID"}}), | |
renamed_column_names = Table.ColumnNames(rename_columns_1), | |
desc_list = List.FindText(renamed_column_names, "DESC"), | |
rate_list = List.FindText(renamed_column_names, "RATE"), | |
hours_list = List.FindText(renamed_column_names, "HOURS"), | |
earnings_list = List.FindText(renamed_column_names, "EARNINGS"), | |
reimb_list = List.FindText(renamed_column_names, "REIMB"), | |
//Separate the column in sections where there is a header column followed by a value column | |
all_withholding_list = List.FindText(renamed_column_names, "WITHHOLDINGS"), | |
withholding_header_list = List.FindText(renamed_column_names, "WITHHOLDINGS HEADER"), | |
withholding_list = List.Difference(all_withholding_list, withholding_header_list), | |
deductions_header_list = List.FindText(renamed_column_names, "DEDUCTIONS HEADER"), | |
all_deductions_list = List.FindText(renamed_column_names, "DEDUCTIONS"), | |
deductions_list = List.Difference(all_deductions_list, deductions_header_list), | |
all_netallocation = List.FindText(renamed_column_names, "NET PAY"), | |
netallocation_header_list = List.FindText(renamed_column_names, "NET PAY ALLOCATIONS HEADER"), | |
netallocation_list = List.Difference(all_netallocation, netallocation_header_list), | |
merge_desc = Table.CombineColumns(promote_headers,desc_list,Combiner.CombineTextByDelimiter("", QuoteStyle.None),"DESCRIPTIONS"), | |
merge_rate = Table.CombineColumns(merge_desc,rate_list,Combiner.CombineTextByDelimiter("", QuoteStyle.None),"RATES"), | |
merge_hours = Table.CombineColumns(merge_rate,hours_list,Combiner.CombineTextByDelimiter("", QuoteStyle.None),"HOURS"), | |
merge_earnings = Table.CombineColumns(merge_hours,earnings_list,Combiner.CombineTextByDelimiter("", QuoteStyle.None),"EARNINGS"), | |
merge_reimb = Table.CombineColumns(merge_earnings,reimb_list,Combiner.CombineTextByDelimiter("", QuoteStyle.None),"REIMB & OTHER"), | |
merge_withholding = Table.CombineColumns(merge_reimb, withholding_list, Combiner.CombineTextByDelimiter("", QuoteStyle.None),"WITHHOLDINGS"), | |
new_withholdings = List.Combine({withholding_header_list, {"WITHHOLDINGS"}}), | |
merge_withholding_header = Table.CombineColumns(merge_withholding,new_withholdings, Combiner.CombineTextByDelimiter("|", QuoteStyle.None),"WITHHOLDINGS"), | |
merge_deductions = Table.CombineColumns(merge_withholding_header,deductions_list,Combiner.CombineTextByDelimiter("", QuoteStyle.None),"DEDUCTIONS"), | |
new_deductions = List.Combine({deductions_header_list, {"DEDUCTIONS"}}), | |
merge_deductions_header = Table.CombineColumns(merge_deductions,new_deductions, Combiner.CombineTextByDelimiter("|", QuoteStyle.None),"DEDUCTIONS"), | |
merge_netallocation = Table.CombineColumns(merge_deductions_header,netallocation_list,Combiner.CombineTextByDelimiter("", QuoteStyle.None),"NET_ALLOCATIONS"), | |
new_allocations = List.Combine({netallocation_header_list, {"NET_ALLOCATIONS"}}), | |
merge_allocations_header = Table.CombineColumns(merge_netallocation,new_allocations, Combiner.CombineTextByDelimiter("|", QuoteStyle.None),"NET_ALLOCATIONS"), | |
merge_hours_earnings = Table.CombineColumns(merge_allocations_header,{"DESCRIPTIONS", "RATES", "HOURS", "EARNINGS", "REIMB & OTHER"},Combiner.CombineTextByDelimiter("|", QuoteStyle.None),"HOURS & EARNINGS"), | |
unpivot_other_columns = Table.UnpivotOtherColumns(merge_hours_earnings, {last_element}, "Attribute", "Value"), | |
split_columns_by_pipe = Table.SplitColumn(unpivot_other_columns, last_element, Splitter.SplitTextByDelimiter("|", QuoteStyle.None), {"EMPLOYEE_NAME_ID.1", "EMPLOYEE_NAME_ID.2"}), | |
split_columns_by_pipe1 = Table.SplitColumn(split_columns_by_pipe, "Value", Splitter.SplitTextByDelimiter("|", QuoteStyle.None), {"Value.1", "Value.2", "Value.3", "Value.4", "Value.5"}), | |
#"Renamed Columns" = Table.RenameColumns(split_columns_by_pipe1,{{"Value.1", "Description"}, {"Value.2", "Rate"}, {"Value.3", "Hours"}, {"Value.4", "Earnings"}}), | |
#"Sorted Rows" = Table.Sort(#"Renamed Columns",{{"EMPLOYEE_NAME_ID.1", Order.Ascending}, {"Attribute", Order.Ascending}}), | |
remove_blank_rows = Table.SelectRows(#"Sorted Rows", each ([Description] <> "")), | |
set_final_column_headers = Table.RenameColumns(remove_blank_rows,{{"Attribute", "SECTION"}, {"Description", "SUBSECTION"}, {"Rate", "SUBSECTION VALUE/RATE"}, {"EMPLOYEE_NAME_ID.1", "EMPLOYEE_NAME"}, {"EMPLOYEE_NAME_ID.2", "EMPLOYEE_ID"}, {"Hours", "HOURS"}, {"Earnings", "EARNINGS"}}), | |
replace_space_in_values = Table.ReplaceValue(set_final_column_headers," ","",Replacer.ReplaceText,{"SUBSECTION VALUE/RATE", "HOURS", "EARNINGS"}), | |
rename_final_columns = Table.RenameColumns(replace_space_in_values,{{"Value.5", "REIMBURSEMENT"}}), | |
merge_last_two = Table.CombineColumns(rename_final_columns,{"EARNINGS", "REIMBURSEMENT"},Combiner.CombineTextByDelimiter("", QuoteStyle.None),"EARNINGS & REIMBURSEMENT") | |
in | |
merge_last_two |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment