Created
July 6, 2020 18:23
-
-
Save broschke/44241aa58f695eba5c4fe1d020b76f0e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import datetime | |
# capture current week and same week last year | |
date_list = [] | |
date = datetime.datetime.now().date() | |
for i in range(7): | |
date_list.append(date - datetime.timedelta(days=i)) | |
date_list.append((date - datetime.timedelta(days=364)) - datetime.timedelta(days=i)) | |
# create date dict | |
date_dict = {'date': date_list} | |
# create brand and chain scale dict for company data | |
brand = {'brand': | |
['Brand_1', 'Brand_2', 'Brand_3', 'Brand_4', 'Brand_5', | |
'Brand_6', 'Brand_7', 'Brand_8', 'Brand_9', 'Brand_10'], | |
'chain_scale': | |
['Upper Midscale', 'Midscale', 'Midscale', 'Economy', 'Midscale', | |
'Economy', 'Upper Midscale', 'Upscale', 'Upscale', 'Economy'] | |
} | |
# create market type dict | |
market_type = {'market_type': ['Resort', 'Airport', 'Urban', 'Interstate', 'Small Town', 'Suburban']} | |
# create state and distric of columbia dict | |
state = {'state': | |
['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', | |
'GA', 'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', | |
'MD', 'ME', 'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', | |
'NH', 'NJ', 'NM', 'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', | |
'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY'] | |
} | |
# create chain scale dict for industry data | |
chain_scale = {'chain_scale': ['Upscale', 'Upper Middle', 'Midscale', 'Economy']} | |
def census(state): | |
'''Assigns census division based on State value.''' | |
if (state in ['IL', 'IN', 'MI', 'OH', 'WI']): | |
return 'East North Central' | |
if (state in ['AL', 'KY', 'MS', 'TN']): | |
return 'East South Central' | |
if (state in ['NJ', 'NY', 'PA']): | |
return 'Middle Atlantic' | |
if (state in ['AZ', 'CO', 'ID', 'MT', 'NM', 'NV', 'UT', 'WY']): | |
return 'Mountain' | |
if (state in ['CT', 'MA', 'ME', 'NH', 'RI', 'VT']): | |
return 'New England' | |
if (state in ['AK', 'CA', 'HI', 'OR', 'WA']): | |
return 'Pacific' | |
if (state in ['DC', 'DE', 'FL', 'GA', 'MD', 'NC', 'SC', 'VA', 'WV']): | |
return 'South Atlantic' | |
if (state in ['IA', 'KS', 'MN', 'MO', 'ND', 'NE', 'SD']): | |
return 'West North Central' | |
if (state in ['AR', 'LA', 'OK', 'TX']): | |
return 'West South Central' | |
def merge_frames(df1, df2): | |
''' | |
Used to create a cartesian product of two dataframes | |
Parameters: | |
:param df1: (Pandas df) df1 | |
:param df2: (Pandas df) df2 | |
Returns: | |
:returns: (Pandas df) df | |
''' | |
df1['tmp'] = '1' | |
df2['tmp'] = '1' | |
df = df1.merge(df2, on='tmp') | |
df = df.drop('tmp', axis=1) | |
return df | |
def random_data(df, rand_type, min, max): | |
""" | |
Used to generate random values to fill dataframe column. | |
Parameters: | |
:param df: (Pandas df) df | |
:param rand_type: np.random.randint for intergers or np.random.uniform for float | |
:min: Minimum value of random data | |
:max: Maximum value of random data | |
Returns: | |
:returns: (Pandas column) | |
""" | |
if rand_type == 'randint': | |
col = np.random.randint(min, max, size=len(df)) | |
else: | |
col = np.random.uniform(low=min, high=max, size=len(df)) | |
return col | |
# assemble dataframes | |
df_ind = pd.DataFrame({'brand': ['Industry']}) | |
df_ind_cs = pd.DataFrame(chain_scale) | |
df_brand = pd.DataFrame(brand) | |
df_state = pd.DataFrame(state) | |
df_date = pd.DataFrame(date_dict) | |
df_market = pd.DataFrame(market_type) | |
#create cartesian industry frame | |
df_industry = merge_frames(df_ind, df_ind_cs) | |
df_industry = merge_frames(df_industry, df_date) | |
df_industry['source'] = 'industry' | |
#create cartesian company frame | |
df_company = merge_frames(df_brand, df_state) | |
df_company = merge_frames(df_company, df_date) | |
df_company = merge_frames(df_company, df_market) | |
df_company['source'] = 'choice' | |
#create cartesian company frame | |
df_compset = merge_frames(df_brand, df_state) | |
df_compset = merge_frames(df_compset, df_date) | |
df_compset = merge_frames(df_compset, df_market) | |
df_compset['source'] = 'compset' | |
#apply census function to company and compset frame | |
df_company['census_division'] = df_company['state'].apply(census) | |
df_compset['census_division'] = df_compset['state'].apply(census) | |
#create random values for revenue, demand and supply for company data | |
df_company['revenue'] = random_data(df_company, 'randint', 5000, 10000) | |
df_company['supply'] = random_data(df_company, 'randint', 50, 100) | |
df_company['demand_multiplier'] = random_data(df_company, 'uniform', 0.3, 1) | |
df_company['demand'] = (df_company.supply * df_company.demand_multiplier).round(0) | |
#create random values for revenue, demand and supply for compset data | |
df_compset['revenue'] = random_data(df_compset, 'randint', 5000, 10000) | |
df_compset['supply'] = random_data(df_compset, 'randint', 50, 100) | |
df_compset['demand_multiplier'] = random_data(df_compset, 'uniform', 0.3, 1) | |
df_compset['demand'] = (df_compset.supply * df_compset.demand_multiplier).round(0) | |
#create random values for revenue, demand and supply for industry data | |
df_industry['revenue'] = random_data(df_industry, 'randint', 5000, 8000) | |
df_industry['supply'] = random_data(df_industry, 'randint', 50, 100) | |
df_industry['demand_multiplier'] = random_data(df_industry, 'uniform', 0.3, 1) | |
df_industry['demand'] = (df_industry.supply * df_industry.demand_multiplier).round(0) | |
# combine data frames | |
df = pd.concat([df_company, df_industry, df_compset], axis=0, ignore_index=True) | |
# export to csv | |
df.to_csv('df.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment