Skip to content

Instantly share code, notes, and snippets.

@wolframalpha
Created February 1, 2018 12:48
Show Gist options
  • Select an option

  • Save wolframalpha/0e31a48c9a644cf4c366a4f02f6745f0 to your computer and use it in GitHub Desktop.

Select an option

Save wolframalpha/0e31a48c9a644cf4c366a4f02f6745f0 to your computer and use it in GitHub Desktop.
columns = ['store_id',
'sale_type',
'is_online_sale',
'is_pick_up',
'pick_up_store_id',
'sku_id',
'tot_unit_sold',
'tot_promotion_price',
'tot_reg_price',
'tot_otd_price',
'tot_unit_cost',
'tot_shipping_cost',
'tot_coupon_count',
'tot_coupon_amount',
'tot_views',
'tot_opened',
'tot_cart_abandonment',
'avg_product_rating',
'avg_seller_rating',
'tot_cummulative_complaints',
'tot_landing_page',
'tot_inventory_count',
'week',
'week_number',
'week_start_date',
'month_abbr',
'year',
'is_holiday',
'holiday_name',
'cat_lvl_code_1',
'cat_lvl_code_2',
'cat_lvl_code_3',
'cat_lvl_code_4',
'cat_lvl_code_5',
'cat_lvl_code_6',
'cat_lvl_code_7',
'cat_lvl_code_8',
'cat_lvl_code_9',
'cat_lvl_code_10',
'cat_lvl_code_11',
'cat_lvl_code_12',
'cat_lvl_code_13',
'cat_lvl_code_14',
'cat_lvl_code_15',
'cat_lvl_code_16',
'cat_lvl_code_17',
'cat_lvl_code_18',
'cat_lvl_code_19',
'cat_lvl_code_20',
'cat_lvl_desc_1',
'cat_lvl_desc_2',
'cat_lvl_desc_3',
'cat_lvl_desc_4',
'cat_lvl_desc_5',
'cat_lvl_desc_6',
'cat_lvl_desc_7',
'cat_lvl_desc_8',
'cat_lvl_desc_9',
'cat_lvl_desc_10',
'cat_lvl_desc_11',
'cat_lvl_desc_12',
'cat_lvl_desc_13',
'cat_lvl_desc_14',
'cat_lvl_desc_15',
'cat_lvl_desc_16',
'cat_lvl_desc_17',
'cat_lvl_desc_18',
'cat_lvl_desc_19',
'cat_lvl_desc_20',
'style_code',
'style_desc',
'sku_desc',
'sku_upc',
'sku_status',
'sku_start_date',
'sku_end_date',
'cat_attribute_code_1',
'cat_attribute_code_2',
'cat_attribute_code_3',
'cat_attribute_code_4',
'cat_attribute_code_5',
'cat_attribute_code_6',
'cat_attribute_code_7',
'cat_attribute_code_8',
'cat_attribute_code_9',
'cat_attribute_code_10',
'cat_attribute_desc_1',
'cat_attribute_desc_2',
'cat_attribute_desc_3',
'cat_attribute_desc_4',
'cat_attribute_desc_5',
'cat_attribute_desc_6',
'cat_attribute_desc_7',
'cat_attribute_desc_8',
'cat_attribute_desc_9',
'cat_attribute_desc_10',
'avg_otd_unit',
'avg_promotion_price',
'avg_cost_unit',
'avg_profit_unit',
'avg_reg_price',
'tot_transaction_count',
'avg_margin_rate',
'comp_sku_id',
'tot_reg_price_comp_1',
'tot_promotion_price_comp_1',
'tot_shipping_cost_comp_1',
'tot_reg_price_comp_2',
'tot_promotion_price_comp_2',
'tot_shipping_cost_comp_2',
'tot_reg_price_comp_3',
'tot_promotion_price_comp_3',
'tot_shipping_cost_comp_3',
'avg__reg_price_comp_1',
'avg_promotion_price_comp_1',
'avg_shipping_cost_comp_1',
'avg_reg_price_comp_2',
'avg_promotion_price_comp_2',
'avg_shipping_cost_comp_2',
'avg_reg_price_comp_3',
'avg_promotion_price_comp_3',
'avg_shipping_cost_comp_3',
'store_name',
'store_type',
'store_city',
'store_region',
'store_state',
'store_country',
'store_zip',
'store_open_date',
'store_size',
'store_comp_dma_count',
'store_comp_type_1',
'store_comp_type_2',
'store_comp_type_3',
'store_comp_type_4',
'store_comp_type_5',
'store_comp_type_6',
'store_comp_type_7',
'store_comp_type_8',
'store_comp_type_9',
'store_comp_type_10',
'store_comp_type_count_1',
'store_comp_type_count_2',
'store_comp_type_count_3',
'store_comp_type_count_4',
'store_comp_type_count_5',
'store_comp_type_count_6',
'store_comp_type_count_7',
'store_comp_type_count_8',
'store_comp_type_count_9',
'store_comp_type_count_10']
from pyspark.sql.types import *
transaction_schema = StructType([
StructField("cdTimeStamp", TimestampType(), False),
StructField("cdTransactionId", IntegerType(), True),
StructField("cdStoreId", IntegerType(), False),
StructField("cdSaleType", StringType(), False),
StructField("cdIsOnlineSale", StringType(), False),
StructField("cdStorePickUp", StringType(), True),
StructField("cdPickUpStoreId", IntegerType(), True),
StructField("cdSkuId", IntegerType(), False),
StructField("cdUnitsSold", IntegerType(), False),
StructField("cdRegularPrice", FloatType(), True),
StructField("cdPromotionPrice", FloatType(), True),
StructField("cdOTD", FloatType(), False),
StructField("cdUnitCost", FloatType(), False),
StructField("cdShippingCost", FloatType(), True),
StructField("cdMarketingAssets", StringType(), True),
StructField("cdCouponCount", IntegerType(), True),
StructField("cdCouponAmount", FloatType(), True),
StructField("cdCouponDescription", StringType(), True),
StructField("cdCouponCode", IntegerType(), True)
])
calender_schema = StructType([
StructField("calDate", DateType(), False),
StructField("calMonthAbbr", StringType(), True),
StructField("calWeekNo", IntegerType(), False),
StructField("calYear", IntegerType(), False),
StructField("calHolidayFlag", IntegerType(), True),
StructField("calHolidayName", StringType(), True),
StructField("calFiscalWeekNumber", IntegerType(), True),
StructField("calFiscalYear", IntegerType(), True),
])
inventory_schema = StructType([
StructField("InvDate", DateType(), False),
StructField("InvSkuId", IntegerType(), False),
StructField("InvCount", IntegerType(), False),
StructField("InvStoreId", IntegerType(), False)
])
hierarchy_schema = StructType([
StructField('catStyleCode', IntegerType(), True),
StructField('catStyleDesc', StringType(), True),
StructField('catSkuId', IntegerType(), False),
StructField('catSkuDesc', StringType(), True),
StructField('catSkuUPC', IntegerType(), True),
StructField('catSkuStatus', StringType(), True),
StructField('catSkuStartDate', DateType(), True),
StructField('catSkuEndDate', DateType(), True),
StructField('catAttributeCode1', IntegerType(), True),
StructField('catAttributeCode2', IntegerType(), True),
StructField('catAttributeCode3', IntegerType(), True),
StructField('catAttributeCode4', IntegerType(), True),
StructField('catAttributeCode5', IntegerType(), True),
StructField('catAttributeCode6', IntegerType(), True),
StructField('catAttributeCode7', IntegerType(), True),
StructField('catAttributeCode8', IntegerType(), True),
StructField('catAttributeCode9', IntegerType(), True),
StructField('catAttributeCode10', IntegerType(), True),
StructField('catAttributeDesc1', StringType(), True),
StructField('catAttributeDesc2', StringType(), True),
StructField('catAttributeDesc3', StringType(), True),
StructField('catAttributeDesc4', StringType(), True),
StructField('catAttributeDesc5', StringType(), True),
StructField('catAttributeDesc6', StringType(), True),
StructField('catAttributeDesc7', StringType(), True),
StructField('catAttributeDesc8', StringType(), True),
StructField('catAttributeDesc9', StringType(), True),
StructField('catAttributeDesc10', StringType(), True),
StructField('catLevel1', IntegerType(), True),
StructField('catLevel2', IntegerType(), True),
StructField('catLevel3', IntegerType(), True),
StructField('catLevel4', IntegerType(), True),
StructField('catLevel5', IntegerType(), True),
StructField('catLevel6', IntegerType(), True),
StructField('catLevel7', IntegerType(), True),
StructField('catLevel8', IntegerType(), True),
StructField('catLevel9', IntegerType(), True),
StructField('catLevel10', IntegerType(), True),
StructField('catLevel11', IntegerType(), True),
StructField('catLevel12', IntegerType(), True),
StructField('catLevel13', IntegerType(), True),
StructField('catLevel14', IntegerType(), True),
StructField('catLevel15', IntegerType(), True),
StructField('catLevel16', IntegerType(), True),
StructField('catLevel17', IntegerType(), True),
StructField('catLevel18', IntegerType(), True),
StructField('catLevel19', IntegerType(), True),
StructField('catLevel20', IntegerType(), True),
StructField('catDescLevel1', StringType(), True),
StructField('catDescLevel2', StringType(), True),
StructField('catDescLevel3', StringType(), True),
StructField('catDescLevel4', StringType(), True),
StructField('catDescLevel5', StringType(), True),
StructField('catDescLevel6', StringType(), True),
StructField('catDescLevel7', StringType(), True),
StructField('catDescLevel8', StringType(), True),
StructField('catDescLevel9', StringType(), True),
StructField('catDescLevel10', StringType(), True),
StructField('catDescLevel11', StringType(), True),
StructField('catDescLevel12', StringType(), True),
StructField('catDescLevel13', StringType(), True),
StructField('catDescLevel14', StringType(), True),
StructField('catDescLevel15', StringType(), True),
StructField('catDescLevel16', StringType(), True),
StructField('catDescLevel17', StringType(), True),
StructField('catDescLevel18', StringType(), True),
StructField('catDescLevel19', StringType(), True),
StructField('catDescLevel20', StringType(), True),
])
competitor_schema = StructType([
StructField('compSkuId', IntegerType(), True),
StructField('compDate', DateType(), False),
StructField('compSkuUPC', IntegerType(), False),
StructField('comp1_RegularPrice', FloatType(), True),
StructField('comp1_PromotedPrice', FloatType(), True),
StructField('comp1_ShippingCost', FloatType(), True),
StructField('comp2_RegularPrice', FloatType(), True),
StructField('comp2_PromotedPrice', FloatType(), True),
StructField('comp2_ShippingCost', FloatType(), True),
StructField('comp3_RegularPrice', FloatType(), True),
StructField('comp3_PromotedPrice', FloatType(), True),
StructField('comp3_ShippingCost', FloatType(), True)
])
store_schema = StructType([
StructField('StoreId', IntegerType(), False),
StructField('StoreName', StringType(), True),
StructField('StoreType', StringType(), True),
StructField('StorePhone', StringType(), True),
StructField('StoreAddress', StringType(), True),
StructField('StoreCity', StringType(), True),
StructField('StoreRegion', StringType(), True),
StructField('StoreState', StringType(), True),
StructField('StoreCountry', StringType(), True),
StructField('StoreZip', StringType(), True),
StructField('StoreOpenDate', DateType(), True),
StructField('StoreManager', StringType(), True),
StructField('StoreLevelCode1', IntegerType(), True),
StructField('StoreLevelCode2', IntegerType(), True),
StructField('StoreLevelCode3', IntegerType(), True),
StructField('StoreLevelCode4', IntegerType(), True),
StructField('StoreLevelCode5', IntegerType(), True),
StructField('StoreLevelCode6', IntegerType(), True),
StructField('StoreLevelCode7', IntegerType(), True),
StructField('StoreLevelCode8', IntegerType(), True),
StructField('StoreLevelCode9', IntegerType(), True),
StructField('StoreLevelCode10', IntegerType(), True),
StructField('StoreLevelName1', StringType(), True),
StructField('StoreLevelName2', StringType(), True),
StructField('StoreLevelName3', StringType(), True),
StructField('StoreLevelName4', StringType(), True),
StructField('StoreLevelName5', StringType(), True),
StructField('StoreLevelName6', StringType(), True),
StructField('StoreLevelName7', StringType(), True),
StructField('StoreLevelName8', StringType(), True),
StructField('StoreLevelName9', StringType(), True),
StructField('StoreLevelName10', StringType(), True),
StructField('StoreSize', IntegerType(), True),
StructField('StoreCompDMACount', IntegerType(), False),
StructField('StoreCompType1', StringType(), True),
StructField('StoreCompType2', StringType(), True),
StructField('StoreCompType3', StringType(), True),
StructField('StoreCompType4', StringType(), True),
StructField('StoreCompType5', StringType(), True),
StructField('StoreCompType6', StringType(), True),
StructField('StoreCompType7', StringType(), True),
StructField('StoreCompType8', StringType(), True),
StructField('StoreCompType9', StringType(), True),
StructField('StoreCompType10', StringType(), True),
StructField('StoreCompTypeCount1', IntegerType(), True),
StructField('StoreCompTypeCount2', IntegerType(), True),
StructField('StoreCompTypeCount3', IntegerType(), True),
StructField('StoreCompTypeCount4', IntegerType(), True),
StructField('StoreCompTypeCount5', IntegerType(), True),
StructField('StoreCompTypeCount6', IntegerType(), True),
StructField('StoreCompTypeCount7', IntegerType(), True),
StructField('StoreCompTypeCount8', IntegerType(), True),
StructField('StoreCompTypeCount9', IntegerType(), True),
StructField('StoreCompTypeCount10', IntegerType(), True),
])
online_schema = StructType([
StructField('oiDate', DateType(), False),
StructField('oiSkuId', IntegerType(), False),
StructField('oiViews', IntegerType(), True),
StructField('oiOpened', IntegerType(), True),
StructField('oiCartAbandonment', IntegerType(), True),
StructField('oiProductRating', FloatType(), True),
StructField('oiSellerRating', FloatType(), True),
StructField('oiCumulativeComplaints', IntegerType(), True),
StructField('oiLandingPage', IntegerType(), True),
])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment