soodoku · December 27, 2024 01:13 · soodoku · Dec 26, 2024
diff --git a/demand_supply.R b/demand_supply.R
 # Set random seed for reproducibility
 set.seed(123)

 # Time t parameters
 n_novels_x_t <- 500    # 50% of 1000 novels are type x at time t
 n_novels_y_t <- 500    
 k <- 30               # 30% more likely to read type x
 total_readings <- 800 # Fixed total reading capacity

 # Time t simulation
 prob_x_t <- (1 + k/100)/(2 + k/100)
 prob_y_t <- 1/(2 + k/100)

 # Simulate readings at time t
 readings_t <- numeric(total_readings)
 for(i in 1:total_readings) {
  if(runif(1) < prob_x_t) {
    readings_t[i] <- 1  # Read type x
  } else {
    readings_t[i] <- 0  # Read type y
  }
 }

 # Time t regression
 data_t <- data.frame(
  reading = readings_t,
  type = factor(ifelse(readings_t == 1, "x", "y"))
 )
 model_t <- lm(reading ~ type, data = data_t)

 # Time t+1 parameters
 n_novels_x_t1 <- 700   # 70% of 1000 novels at t+1
 n_novels_y_t1 <- 300

 # Calculate new reading probabilities
 # Due to fixed capacity, probability is affected by supply
 scaling_factor <- (n_novels_x_t/n_novels_x_t1)  # Adjust for increased supply
 prob_x_t1 <- prob_x_t * scaling_factor
 prob_y_t1 <- 1 - prob_x_t1

 # Simulate readings at t+1
 readings_t1 <- numeric(total_readings)
 for(i in 1:total_readings) {
  if(runif(1) < prob_x_t1) {
    readings_t1[i] <- 1
  } else {
    readings_t1[i] <- 0
  }
 }

 # Time t+1 regression
 data_t1 <- data.frame(
  reading = readings_t1,
  type = factor(ifelse(readings_t1 == 1, "x", "y"))
 )
 model_t1 <- lm(reading ~ type, data = data_t1)

 # Results for time t
 cat("Time t results (50-50 split):\n")
 print(summary(model_t))
 prop_x_t <- mean(readings_t)
 prop_y_t <- 1 - prop_x_t
 k_recovered_t <- (prop_x_t/prop_y_t - 1) * 100
 cat("\nRecovered k at time t:", k_recovered_t, "%\n")

 # Results for time t+1
 cat("\nTime t+1 results (70-30 split):\n")
 print(summary(model_t1))
 prop_x_t1 <- mean(readings_t1)
 prop_y_t1 <- 1 - prop_x_t1
 k_recovered_t1 <- (prop_x_t1/prop_y_t1 - 1) * 100
 cat("\nRecovered k at time t+1:", k_recovered_t1, "%\n")
diff --git a/demand_supply_2.R b/demand_supply_2.R
 # Set random seed for reproducibility
 set.seed(123)

 # Parameters
 k <- 30               # 30% more likely to read type x
 total_readings <- 800 # Fixed total reading capacity

 # Time t: 50-50 split
 n_novels_x_t <- 500    
 n_novels_y_t <- 500    

 # Base probability of reading x (incorporating preference k)
 prob_x_t <- (1 + k/100)/(2 + k/100)
 prob_y_t <- 1/(2 + k/100)

 # Simulate readings at time t
 readings_t <- numeric(total_readings)
 for(i in 1:total_readings) {
  if(runif(1) < prob_x_t) {
    readings_t[i] <- 1  # Read type x
  } else {
    readings_t[i] <- 0  # Read type y
  }
 }

 # Time t+1: 70-30 split
 n_novels_x_t1 <- 700   
 n_novels_y_t1 <- 300

 # Adjust probabilities for supply change
 prob_x_t1 <- prob_x_t * (n_novels_x_t/n_novels_x_t1)  # Adjust down due to oversupply
 prob_y_t1 <- 1 - prob_x_t1

 # Simulate readings at t+1
 readings_t1 <- numeric(total_readings)
 for(i in 1:total_readings) {
  if(runif(1) < prob_x_t1) {
    readings_t1[i] <- 1
  } else {
    readings_t1[i] <- 0
  }
 }

 # Create data frames for regressions
 data_t <- data.frame(
  read_x = readings_t,
  period = "t",
  type = factor(ifelse(readings_t == 1, "x", "y"))
 )

 data_t1 <- data.frame(
  read_x = readings_t1,
  period = "t+1",
  type = factor(ifelse(readings_t1 == 1, "x", "y"))
 )

 # Combine data for both periods
 all_data <- rbind(data_t, data_t1)

 # Create period-type interaction for regression
 all_data$period_t1 <- ifelse(all_data$period == "t+1", 1, 0)
 all_data$type_x <- ifelse(all_data$type == "x", 1, 0)
 all_data$period_type <- all_data$period_t1 * all_data$type_x

 # Run regression
 model <- lm(read_x ~ type_x + period_t1 + period_type, data = all_data)

 # Print results
 cat("Regression Results:\n")
 print(summary(model))

 # Calculate and display proportions
 prop_x_t <- mean(readings_t)
 prop_y_t <- 1 - prop_x_t
 ratio_t <- prop_x_t/prop_y_t

 prop_x_t1 <- mean(readings_t1)
 prop_y_t1 <- 1 - prop_x_t1
 ratio_t1 <- prop_x_t1/prop_y_t1

 cat("\nTime t (50-50 split):\n")
 cat("Proportion reading x:", prop_x_t, "\n")
 cat("Proportion reading y:", prop_y_t, "\n")
 cat("Ratio x/y:", ratio_t, "\n")

 cat("\nTime t+1 (70-30 split):\n")
 cat("Proportion reading x:", prop_x_t1, "\n")
 cat("Proportion reading y:", prop_y_t1, "\n")
 cat("Ratio x/y:", ratio_t1, "\n")
diff --git a/demand_supply_observational_bias.md b/demand_supply_observational_bias.md
	# Set random seed for reproducibility
	set.seed(123)

	# Time t parameters
	n_novels_x_t <- 500 # 50% of 1000 novels are type x at time t
	n_novels_y_t <- 500
	k <- 30 # 30% more likely to read type x
	total_readings <- 800 # Fixed total reading capacity

	# Time t simulation
	prob_x_t <- (1 + k/100)/(2 + k/100)
	prob_y_t <- 1/(2 + k/100)

	# Simulate readings at time t
	readings_t <- numeric(total_readings)
	for(i in 1:total_readings) {
	if(runif(1) < prob_x_t) {
	readings_t[i] <- 1 # Read type x
	} else {
	readings_t[i] <- 0 # Read type y
	}
	}

	# Time t regression
	data_t <- data.frame(
	reading = readings_t,
	type = factor(ifelse(readings_t == 1, "x", "y"))
	)
	model_t <- lm(reading ~ type, data = data_t)

	# Time t+1 parameters
	n_novels_x_t1 <- 700 # 70% of 1000 novels at t+1
	n_novels_y_t1 <- 300

	# Calculate new reading probabilities
	# Due to fixed capacity, probability is affected by supply
	scaling_factor <- (n_novels_x_t/n_novels_x_t1) # Adjust for increased supply
	prob_x_t1 <- prob_x_t * scaling_factor
	prob_y_t1 <- 1 - prob_x_t1

	# Simulate readings at t+1
	readings_t1 <- numeric(total_readings)
	for(i in 1:total_readings) {
	if(runif(1) < prob_x_t1) {
	readings_t1[i] <- 1
	} else {
	readings_t1[i] <- 0
	}
	}

	# Time t+1 regression
	data_t1 <- data.frame(
	reading = readings_t1,
	type = factor(ifelse(readings_t1 == 1, "x", "y"))
	)
	model_t1 <- lm(reading ~ type, data = data_t1)

	# Results for time t
	cat("Time t results (50-50 split):\n")
	print(summary(model_t))
	prop_x_t <- mean(readings_t)
	prop_y_t <- 1 - prop_x_t
	k_recovered_t <- (prop_x_t/prop_y_t - 1) * 100
	cat("\nRecovered k at time t:", k_recovered_t, "%\n")

	# Results for time t+1
	cat("\nTime t+1 results (70-30 split):\n")
	print(summary(model_t1))
	prop_x_t1 <- mean(readings_t1)
	prop_y_t1 <- 1 - prop_x_t1
	k_recovered_t1 <- (prop_x_t1/prop_y_t1 - 1) * 100
	cat("\nRecovered k at time t+1:", k_recovered_t1, "%\n")
	# Set random seed for reproducibility
	set.seed(123)

	# Parameters
	k <- 30 # 30% more likely to read type x
	total_readings <- 800 # Fixed total reading capacity

	# Time t: 50-50 split
	n_novels_x_t <- 500
	n_novels_y_t <- 500

	# Base probability of reading x (incorporating preference k)
	prob_x_t <- (1 + k/100)/(2 + k/100)
	prob_y_t <- 1/(2 + k/100)

	# Simulate readings at time t
	readings_t <- numeric(total_readings)
	for(i in 1:total_readings) {
	if(runif(1) < prob_x_t) {
	readings_t[i] <- 1 # Read type x
	} else {
	readings_t[i] <- 0 # Read type y
	}
	}

	# Time t+1: 70-30 split
	n_novels_x_t1 <- 700
	n_novels_y_t1 <- 300

	# Adjust probabilities for supply change
	prob_x_t1 <- prob_x_t * (n_novels_x_t/n_novels_x_t1) # Adjust down due to oversupply
	prob_y_t1 <- 1 - prob_x_t1

	# Simulate readings at t+1
	readings_t1 <- numeric(total_readings)
	for(i in 1:total_readings) {
	if(runif(1) < prob_x_t1) {
	readings_t1[i] <- 1
	} else {
	readings_t1[i] <- 0
	}
	}

	# Create data frames for regressions
	data_t <- data.frame(
	read_x = readings_t,
	period = "t",
	type = factor(ifelse(readings_t == 1, "x", "y"))
	)

	data_t1 <- data.frame(
	read_x = readings_t1,
	period = "t+1",
	type = factor(ifelse(readings_t1 == 1, "x", "y"))
	)

	# Combine data for both periods
	all_data <- rbind(data_t, data_t1)

	# Create period-type interaction for regression
	all_data$period_t1 <- ifelse(all_data$period == "t+1", 1, 0)
	all_data$type_x <- ifelse(all_data$type == "x", 1, 0)
	all_data$period_type <- all_data$period_t1 * all_data$type_x

	# Run regression
	model <- lm(read_x ~ type_x + period_t1 + period_type, data = all_data)

	# Print results
	cat("Regression Results:\n")
	print(summary(model))

	# Calculate and display proportions
	prop_x_t <- mean(readings_t)
	prop_y_t <- 1 - prop_x_t
	ratio_t <- prop_x_t/prop_y_t

	prop_x_t1 <- mean(readings_t1)
	prop_y_t1 <- 1 - prop_x_t1
	ratio_t1 <- prop_x_t1/prop_y_t1

	cat("\nTime t (50-50 split):\n")
	cat("Proportion reading x:", prop_x_t, "\n")
	cat("Proportion reading y:", prop_y_t, "\n")
	cat("Ratio x/y:", ratio_t, "\n")

	cat("\nTime t+1 (70-30 split):\n")
	cat("Proportion reading x:", prop_x_t1, "\n")
	cat("Proportion reading y:", prop_y_t1, "\n")
	cat("Ratio x/y:", ratio_t1, "\n")