khannasarthak · December 18, 2018 19:10
diff --git a/marketDialScriptModular.py b/marketDialScriptModular.py
 import pandas as pd
 pd.options.display.float_format = '{:.2f}'.format # Setting float precision   

 # Reading CSV's and parsing dates
 sales = pd.read_csv('take_home_sales.csv', sep='|',index_col=False,parse_dates=['timestamp'])
 storeStatus = pd.read_csv('take_home_store_status.csv', sep='|',index_col=False)
 stores = pd.read_csv('take_home_stores.csv', sep='|',index_col=False)


 def cleanCSV():
    # Product ID cleaning: Removing IDs with not alpha numeric characters
    product_idCleaning = sales[sales.product_id.str.isalnum()] #filtering without alnum

    #Remove the closed stores from all stores
    toDeleteStores = stores[stores.tags == 'ZF'] # List of store to delete
    cleaneddf= product_idCleaning[~product_idCleaning['store_id'].isin(toDeleteStores['store_id'])] #Deleted stores
    return cleaneddf



 def sumCSV(cleaneddf):
    # Removing '-' from some rows in revenue column, assuming that the '-' at the end was an error. 
    # Summing the revenue after removing '-'
    cleaneddf = cleaneddf.copy() # To avoid the SettingWithCopyWarning issues
    cleaneddf['revenue']=cleaneddf['revenue'].astype(str).str.rstrip('-')
    # Converting revenue column to float to sum.
    cleaneddf.revenue = cleaneddf.revenue.astype(float)
    # Calculating total
    total = cleaneddf['revenue'].sum()
    print ('*** TOTAL SUM: ',total)


    # Store level total
    storeLevelSum = cleaneddf.groupby(['store_id'])['revenue'].sum().reset_index(name='store_sum') # grouping by store_id
    print ('*** STORE LEVEL SUMS (First 5 stores, to see all, refer generated csv.')
    print (storeLevelSum.head())
    print ('TOTAL STORE LEVEL SUM: ',storeLevelSum.store_sum.sum())
    # Writing store level sum to a CSV
    storeLevelSum.to_csv('storeLevelSum.csv',index = False)

    # Date Level Total
    dateLevelSum = (cleaneddf.reset_index().set_index("timestamp").groupby([pd.Grouper(freq='W-THU',closed='left',label='left')])["revenue"].sum().reset_index())
    # Reference 1:  https://stackoverflow.com/questions/46562401/group-python-pandas-dataframe-per-weeks-starting-on-monday
    #               http://pandas.pydata.org/pandas-docs/stable/timeseries.html#anchored-offsets
    # label: includes the week on the start, left includes the starting week, right includes the ending week
    # closed: whether to include given day or not. left includes, right doesnt

    dateLevelSum['timestamp']= dateLevelSum['timestamp'].dt.strftime('%m/%d/%Y') # Converint to date format as asked
    print ('*** DATE LEVEL SUMS')
    print (dateLevelSum)
    print ('TOTAL DATE LEVEL SUM: ',dateLevelSum.revenue.sum())
    # Writing date level sum to a CSV
    dateLevelSum.to_csv('dateLevelSum.csv',index = False)



 sumCSV(cleanCSV())
	import pandas as pd
	pd.options.display.float_format = '{:.2f}'.format # Setting float precision

	# Reading CSV's and parsing dates
	sales = pd.read_csv('take_home_sales.csv', sep='\|',index_col=False,parse_dates=['timestamp'])
	storeStatus = pd.read_csv('take_home_store_status.csv', sep='\|',index_col=False)
	stores = pd.read_csv('take_home_stores.csv', sep='\|',index_col=False)


	def cleanCSV():
	# Product ID cleaning: Removing IDs with not alpha numeric characters
	product_idCleaning = sales[sales.product_id.str.isalnum()] #filtering without alnum

	#Remove the closed stores from all stores
	toDeleteStores = stores[stores.tags == 'ZF'] # List of store to delete
	cleaneddf= product_idCleaning[~product_idCleaning['store_id'].isin(toDeleteStores['store_id'])] #Deleted stores
	return cleaneddf



	def sumCSV(cleaneddf):
	# Removing '-' from some rows in revenue column, assuming that the '-' at the end was an error.
	# Summing the revenue after removing '-'
	cleaneddf = cleaneddf.copy() # To avoid the SettingWithCopyWarning issues
	cleaneddf['revenue']=cleaneddf['revenue'].astype(str).str.rstrip('-')
	# Converting revenue column to float to sum.
	cleaneddf.revenue = cleaneddf.revenue.astype(float)
	# Calculating total
	total = cleaneddf['revenue'].sum()
	print ('*** TOTAL SUM: ',total)


	# Store level total
	storeLevelSum = cleaneddf.groupby(['store_id'])['revenue'].sum().reset_index(name='store_sum') # grouping by store_id
	print ('*** STORE LEVEL SUMS (First 5 stores, to see all, refer generated csv.')
	print (storeLevelSum.head())
	print ('TOTAL STORE LEVEL SUM: ',storeLevelSum.store_sum.sum())
	# Writing store level sum to a CSV
	storeLevelSum.to_csv('storeLevelSum.csv',index = False)

	# Date Level Total
	dateLevelSum = (cleaneddf.reset_index().set_index("timestamp").groupby([pd.Grouper(freq='W-THU',closed='left',label='left')])["revenue"].sum().reset_index())
	# Reference 1: https://stackoverflow.com/questions/46562401/group-python-pandas-dataframe-per-weeks-starting-on-monday
	# http://pandas.pydata.org/pandas-docs/stable/timeseries.html#anchored-offsets
	# label: includes the week on the start, left includes the starting week, right includes the ending week
	# closed: whether to include given day or not. left includes, right doesnt

	dateLevelSum['timestamp']= dateLevelSum['timestamp'].dt.strftime('%m/%d/%Y') # Converint to date format as asked
	print ('*** DATE LEVEL SUMS')
	print (dateLevelSum)
	print ('TOTAL DATE LEVEL SUM: ',dateLevelSum.revenue.sum())
	# Writing date level sum to a CSV
	dateLevelSum.to_csv('dateLevelSum.csv',index = False)



	sumCSV(cleanCSV())