jcbozonier · May 22, 2017 12:48
diff --git a/foo.py b/foo.py
 # These are the "true" slopes and intercepts that I made up.
 set_parameters = (
    (('no degree', 'CA', 'Riverside County'), (5000, 1250)),
    (('no degree', 'IL', 'Cook County'), (6500, 1150)),
    (('no degree', 'IL', 'Lake County'), (7000, 1350)),
    
    (('degree', 'CA', 'Riverside County'), (6000, 1250)),
    (('degree', 'IL', 'Cook County'), (7500, 1150)),
    (('degree', 'IL', 'Lake County'), (8000, 1350))
 )

 # Go through each definition above and generate a fake time series
 rows = []
 for set_parameter, N in zip(set_parameters, data_points):
  key, parameters = set_parameter
  population_rows = [{
      'degree': key[0],
      'state': key[1],
      'county': key[2],
      'month_index': i,
      # Create time series data and add some noise to make it realistic
      'salary': i*parameters[0] + parameters[1] + np.random.normal(loc=0, scale=500) 
  } for i in range(N)]
  rows += population_rows

 salary_df = pd.DataFrame(rows)
	# These are the "true" slopes and intercepts that I made up.
	set_parameters = (
	(('no degree', 'CA', 'Riverside County'), (5000, 1250)),
	(('no degree', 'IL', 'Cook County'), (6500, 1150)),
	(('no degree', 'IL', 'Lake County'), (7000, 1350)),

	(('degree', 'CA', 'Riverside County'), (6000, 1250)),
	(('degree', 'IL', 'Cook County'), (7500, 1150)),
	(('degree', 'IL', 'Lake County'), (8000, 1350))
	)

	# Go through each definition above and generate a fake time series
	rows = []
	for set_parameter, N in zip(set_parameters, data_points):
	key, parameters = set_parameter
	population_rows = [{
	'degree': key[0],
	'state': key[1],
	'county': key[2],
	'month_index': i,
	# Create time series data and add some noise to make it realistic
	'salary': i*parameters[0] + parameters[1] + np.random.normal(loc=0, scale=500)
	} for i in range(N)]
	rows += population_rows

	salary_df = pd.DataFrame(rows)
No results found