Created
August 15, 2020 11:34
-
-
Save aylmerbritto/657b31bb4c3390cbd110770b78fa101d to your computer and use it in GitHub Desktop.
So in yknot we took this initiative of distributing T-shirts to our college peeps. We collected information of people who were interested in the beginning to choose their affordable price range, colour and sizes. And in the second information we collected their payment details. At the end of the day we had to spend a lot of time to manually pic…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def findDuplicates(df,key): | |
duplicates = df[df.duplicated([key], keep=False)] | |
print(duplicates) | |
return duplicates | |
def notPaidList(wholeSheet,paidSheet, Key): | |
#print("=================================") | |
whSheetMerged= wholeSheet.merge(paidSheet, on=Key, how="left", indicator=True) | |
final = whSheetMerged[whSheetMerged["_merge"] == "left_only"].drop(columns=["_merge"]) | |
#print("=================================") | |
print("%d people yet to pay \n\n \n" %(len(final)-1)) | |
#print("=================================") | |
return final | |
def findCulprits(wholeSheet,paidSheet): | |
#print("=================================") | |
whSheetMerged= paidSheet.merge(wholeSheet, on=['Mobile Number', 'Email Address'], how="left", indicator=True) | |
final = whSheetMerged[whSheetMerged["_merge"] == "both"].drop(columns=["_merge"]) | |
print("same phone number and mail address") | |
print("=================================") | |
print("%d people have paid" %(len(final)-1)) | |
notPaidList(wholeSheet,paidSheet,['Mobile Number', 'Email Address']) | |
final = final.drop(columns=["Name_y"]) | |
final = final.rename(columns = {'Name_x':'Name'}) | |
#print("=================================") | |
whSheetMerged= paidSheet.merge(wholeSheet, on=['Email Address'], how="left", indicator=True) | |
correctMail = whSheetMerged[whSheetMerged["_merge"] == "both"].drop(columns=["_merge"]) | |
#print(final) | |
print("same mail address and different phone number") | |
print("=============================================") | |
print("%d people have paid with same mail address and different phone number" %(len(correctMail)-1)) | |
notPaidList(wholeSheet,paidSheet,['Email Address']) | |
tempMail = correctMail.loc[correctMail["Mobile Number_x"] != correctMail["Mobile Number_y"]] | |
tempMail = tempMail.drop(columns=["Mobile Number_y","Name_y"]) | |
tempMail = tempMail.rename(columns = {'Name_x':'Name','Mobile Number_x':'Mobile Number'}) | |
#print("=================================") | |
whSheetMerged= paidSheet.merge(wholeSheet, on=['Mobile Number'], how="left", indicator=True) | |
correctPhone = whSheetMerged[whSheetMerged["_merge"] == "both"].drop(columns=["_merge"]) | |
#print(final) | |
print("with same phone number and different mail address") | |
print("==================================================") | |
print("%d people have paid with same phone number and different mail address" %(len(correctPhone)-1)) | |
notPaidList(wholeSheet,paidSheet,['Mobile Number']) | |
tempPhone = correctPhone.loc[correctPhone["Email Address_x"] != correctPhone["Email Address_y"]] | |
tempPhone = tempPhone.drop(columns=["Email Address_y","Name_y"]) | |
tempPhone = tempPhone.rename(columns = {'Name_x':'Name','Email Address_x':'Email Address'}) | |
final = final.append(tempMail) | |
final = final.append(tempPhone) | |
mergePaid = paidSheet.merge(final, how="left", indicator=True) | |
dumbestCulprits = mergePaid.loc[mergePaid["_merge"] == "left_only"].drop(columns=["_merge"]) | |
#final = final.append(dumbestCulprits) | |
print(dumbestCulprits) | |
final['Mobile Number'] = [str(int(x))[-10:] for x in final['Mobile Number']] | |
final.to_csv('paidAndShirtSize.csv') | |
print(len(final),len(paidSheet)) | |
def findSize(): | |
final = pd.read_csv('paidAndShirtSize.csv') | |
sizeData = final.pivot_table(index=['Shirt size'], aggfunc='size') | |
print(sizeData) | |
print("=================================") | |
sheet1 = 'sheets.csv' | |
sheet2 = 'paid.csv' | |
responses = pd.read_csv(sheet1) | |
responses = responses.dropna() | |
paidList = pd.read_csv(sheet2) | |
paidList = paidList.dropna() | |
#notPaid=notPaidList(responses,paidList,'Mobile Number') | |
#Removes Country codes | |
#notPaid['Mobile Number'] = [str(int(x))[-10:] for x in notPaid['Mobile Number']] | |
#notPaid.to_csv('notPaid.csv') | |
#findCulprits(responses,paidList) | |
findSize() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment