Step 1: Export the file to csv format.
Step 2: Remove the empty rows from the file by running below command:
grep -iv ",,,,," file_name.csv > data_clean.csv
Step 3: Run below script
#!/usr/bin/env bash
echo "" > data_cleaned.csv
counter=0
cat data_clean.csv | while read line
do
gstin=$(echo "$line" | awk -F"," '{print $1}')
company=$(echo "$line" | awk -F"," '{print $2}')
values=$(echo "$line" | awk -F"," '{for (i=3; i<NF; i++) printf $i","; print $NF}')
if [[ -z "$gstin" ]]
then
echo "$prevgstin","$prevcompany","$values" >> data_cleaned.csv
else
prevgstin=${gstin}
prevcompany=${company}
echo "$line" >> data_cleaned.csv
fi
((counter=counter+1))
if [[ $((counter%50)) == 0 ]]
then
echo $counter
fi
done
Step 4: Grab the uniq ID (GSTIN)
cat data_cleaned.csv | awk -F"," '{print $1}' | sort | uniq > GSTIN.txt
Step 5: Run the below script to get the final data
#!/usr/bin/env bash
echo "" > final.csv
counter=0
cat GSTIN.txt | while read gstid
do
company_name=$(cat data_cleaned.csv | grep -i "$gstid" | head -1 | awk -F, '{print $2}')
invoice_value=$(grep -i "$gstid" data_cleaned.csv | awk -F, '{sum+=$3;} END{print sum;}')
taxable_value=$(grep -i "$gstid" data_cleaned.csv | awk -F, '{sum+=$4;} END{print sum;}')
igst=$(grep -i "$gstid" data_cleaned.csv | awk -F, '{sum+=$5;} END{print sum;}')
cgst=$(grep -i "$gstid" data_cleaned.csv | awk -F, '{sum+=$6;} END{print sum;}')
sgst=$(grep -i "$gstid" data_cleaned.csv | awk -F, '{sum+=$7;} END{print sum;}')
cess=$(grep -i "$gstid" data_cleaned.csv | awk -F, '{sum+=$8;} END{print sum;}')
echo "$gstid","$company_name","$invoice_value","$taxable_value","$igst","$cgst","$sgst","$cess" >> final.csv
((counter=counter+1))
if [[ $((counter%50)) == 0 ]]
then
echo $counter
fi
done