Last active
December 18, 2018 19:10
-
-
Save khannasarthak/ab8ca4a21dec69da892cb5ff49477b20 to your computer and use it in GitHub Desktop.
scripts for market dial
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
pd.options.display.float_format = '{:.2f}'.format # Setting float precision | |
# Reading CSV's and parsing dates | |
sales = pd.read_csv('take_home_sales.csv', sep='|',index_col=False,parse_dates=['timestamp']) | |
storeStatus = pd.read_csv('take_home_store_status.csv', sep='|',index_col=False) | |
stores = pd.read_csv('take_home_stores.csv', sep='|',index_col=False) | |
def cleanCSV(): | |
# Product ID cleaning: Removing IDs with not alpha numeric characters | |
product_idCleaning = sales[sales.product_id.str.isalnum()] #filtering without alnum | |
#Remove the closed stores from all stores | |
toDeleteStores = stores[stores.tags == 'ZF'] # List of store to delete | |
cleaneddf= product_idCleaning[~product_idCleaning['store_id'].isin(toDeleteStores['store_id'])] #Deleted stores | |
return cleaneddf | |
def sumCSV(cleaneddf): | |
# Removing '-' from some rows in revenue column, assuming that the '-' at the end was an error. | |
# Summing the revenue after removing '-' | |
cleaneddf = cleaneddf.copy() # To avoid the SettingWithCopyWarning issues | |
cleaneddf['revenue']=cleaneddf['revenue'].astype(str).str.rstrip('-') | |
# Converting revenue column to float to sum. | |
cleaneddf.revenue = cleaneddf.revenue.astype(float) | |
# Calculating total | |
total = cleaneddf['revenue'].sum() | |
print ('*** TOTAL SUM: ',total) | |
# Store level total | |
storeLevelSum = cleaneddf.groupby(['store_id'])['revenue'].sum().reset_index(name='store_sum') # grouping by store_id | |
print ('*** STORE LEVEL SUMS (First 5 stores, to see all, refer generated csv.') | |
print (storeLevelSum.head()) | |
print ('TOTAL STORE LEVEL SUM: ',storeLevelSum.store_sum.sum()) | |
# Writing store level sum to a CSV | |
storeLevelSum.to_csv('storeLevelSum.csv',index = False) | |
# Date Level Total | |
dateLevelSum = (cleaneddf.reset_index().set_index("timestamp").groupby([pd.Grouper(freq='W-THU',closed='left',label='left')])["revenue"].sum().reset_index()) | |
# Reference 1: https://stackoverflow.com/questions/46562401/group-python-pandas-dataframe-per-weeks-starting-on-monday | |
# http://pandas.pydata.org/pandas-docs/stable/timeseries.html#anchored-offsets | |
# label: includes the week on the start, left includes the starting week, right includes the ending week | |
# closed: whether to include given day or not. left includes, right doesnt | |
dateLevelSum['timestamp']= dateLevelSum['timestamp'].dt.strftime('%m/%d/%Y') # Converint to date format as asked | |
print ('*** DATE LEVEL SUMS') | |
print (dateLevelSum) | |
print ('TOTAL DATE LEVEL SUM: ',dateLevelSum.revenue.sum()) | |
# Writing date level sum to a CSV | |
dateLevelSum.to_csv('dateLevelSum.csv',index = False) | |
sumCSV(cleanCSV()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment