yonran · January 4, 2025 00:35
diff --git a/ab1893-compare-sfplanning-maxdensity.sql b/ab1893-compare-sfplanning-maxdensity.sql
 -- Compares the HCD-approved site inventory spreadsheet
 -- with the current site inventory spreadsheet as of 2025-01-02.
 -- See https://x.com/yonran/status/1875018049130246550

 -- Prerequisites:
 -- * Install DuckDB
 -- * Download old AppendixB4.xlsx from
 --   https://web.archive.org/web/20230606031119/https://sfplanning.org/project/housing-element-update-2022
 --   direct link: https://web.archive.org/web/20230606031119/https://sfplanning.s3.amazonaws.com/archives/sfhousingelement.org/files/AppendixB4.xlsx
 -- * Download new Appendix_B4.xslx from
 --   https://sfplanning.org/project/housing-element-update-2022
 --   direct link: https://sfplanning.org/sites/default/files/documents/housing-for-all/Appendix_B4.xlsx

 INSTALL spatial;
 LOAD spatial;

 CREATE TABLE orig_table_b AS
 -- Can’t use st_read HEADERS=FORCE because that only supports one header row,
 -- but this table has 2 header rows so we need to manually skip the header and rename the columns
 WITH row_numbered AS (
 SELECT *, ROW_NUMBER() OVER () as rn 
 FROM st_read('AppendixB4.xlsx', layer='Table B -Submitted-Dec-22', open_options=['HEADERS=DISABLE'])
 ),
 with_column_names as (SELECT 
  -- exclude jurisdiction; it's always San Francisco
  -- "Field1" as "JURISDICT",
  "Field2" as "ADDRESS", 
  "Field3" as "ZIP5",
  "Field4" as "mapblklot",
  NULLIF(NULLIF("Field5", ''), 'n/a')::FLOAT as "M1_VLI",
  NULLIF(NULLIF("Field6", ''), 'n/a')::FLOAT as "M1_LI",
  NULLIF(NULLIF("Field7", ''), 'n/a')::FLOAT as "M1_M",
  NULLIF(NULLIF("Field8", ''), 'n/a')::FLOAT as "M1_AM",
  NULLIF(NULLIF("Field9", ''), 'n/a')::FLOAT as "M2_VLI",
  NULLIF(NULLIF("Field10", ''), 'n/a')::FLOAT as "M2_LI",
  NULLIF(NULLIF("Field11", ''), 'n/a')::FLOAT as "M2_M",
  NULLIF(NULLIF("Field12", ''), 'n/a')::FLOAT as "M2_AM",
  NULLIF(NULLIF("Field13", ''), 'n/a')::FLOAT as "M3_VLI",
  NULLIF(NULLIF("Field14", ''), 'n/a')::FLOAT as "M3_LI",
  NULLIF(NULLIF("Field15", ''), 'n/a')::FLOAT as "M3_M",
  NULLIF(NULLIF("Field16", ''), 'n/a')::FLOAT as "M3_AM",
  "Field17" as "SHORTFALL",
  NULLIF(NULLIF("Field18", ''), 'n/a')::FLOAT as "ACRES",
  "Field19" as "EX_GP_TYPE",
  "Field20" as "EX_ZONING",
  "Field21" as "M1_GP_TYPE",
  "Field22" as "M2_GP_TYPE", 
  "Field23" as "M3_GP_TYPE",
  "Field24" as "M1_ZONING",
  "Field25" as "M2_ZONING",
  "Field26" as "M3_ZONING",
  NULLIF(NULLIF("Field27", ''), 'n/a')::FLOAT as "MIN_DENS",
  NULLIF(NULLIF("Field28", ''), 'n/a')::FLOAT as "M1_MAXDENS",
  NULLIF(NULLIF("Field29", ''), 'n/a')::FLOAT as "M2_MAXDENS",
  NULLIF(NULLIF("Field30", ''), 'n/a')::FLOAT as "M3_MAXDENS",
  -- capacity is density * acres * development probability
  -- development probability is typically about 2% or 100% according to
  -- https://sfplanning.s3.amazonaws.com/archives/sfhousingelement.org/files/AppendixB.pdf#page=43
  NULLIF(NULLIF("Field31", ''), 'n/a')::FLOAT as "M1_CAP",
  NULLIF(NULLIF("Field32", ''), 'n/a')::FLOAT as "M2_CAP",
  NULLIF(NULLIF("Field33", ''), 'n/a')::FLOAT as "M3_CAP",
  "Field34" as "VACANT",
  "Field35" as "EX_USE",
  "Field36" as "INFRA",
  "Field37" as "SS_MAP1",
  "Field38" as "SS_MAP2",
  "Field39" as "SS_MAP3"
 FROM row_numbered
 WHERE rn > 2)
 SELECT *,
  FLOOR(M1_MAXDENS * ACRES)::INT AS m1_max_units,
  FLOOR(M2_MAXDENS * ACRES)::INT AS m2_max_units,
  FLOOR(M3_MAXDENS * ACRES)::INT AS m3_max_units,
  FLOOR(GREATEST(M1_MAXDENS, M2_MAXDENS, M3_MAXDENS) * ACRES)::INT AS max_units,
  REPLACE(REPLACE("mapblklot", ' ', ''), '-', '') as blklot,
 FROM with_column_names;

 CREATE TEMP VIEW new_table_b AS
 -- Can’t use st_read HEADERS=FORCE because that only supports one header row,
 -- but this table has 2 header rows so we need to manually skip the header and rename the columns
 WITH row_numbered AS (
  SELECT *, ROW_NUMBER() OVER () as rn 
  FROM st_read('Appendix_B4.xlsx', layer = 'Table B', open_options=['HEADERS=DISABLE'])
 ),
 with_column_names AS (SELECT 
 -- exclude Jurisdiction_Name; it's always SAN FRANCISCO
 --   "Field1" as "Jurisdiction_Name",
  "Field2" as "Site_Address",
  "Field3" as "ZIP_Code",
  "Field4" as "APN",
  NULLIF("Field5", '')::FLOAT as "Very_Low_Income",
  NULLIF("Field6", '')::FLOAT as "Low_Income",
  NULLIF("Field7", '')::FLOAT as "Moderate_Income",
  NULLIF("Field8", '')::FLOAT as "Above_Moderate_Income",
  "Field9" as "Type_of_Shortfall",
  -- parcel size in acres
  NULLIF("Field10", '')::FLOAT as "Parcel_Size",
  "Field11" as "Current_General_Plan",
  "Field12" as "Current_Zoning",
  "Field13" as "Proposed_GP_Designation",
  "Field14" as "Proposed_Zoning",
  -- min/max density per acre
  NULLIF("Field15", '')::FLOAT as "Min_Density",
  NULLIF("Field16", '')::FLOAT as "Max_Density",
  -- total_capacity is max_density * parcel_size * development probability
  -- which is mostly around 1-2% and 100% for some lots
  NULLIF("Field17", '')::FLOAT as "Total_Capacity",
  "Field18" as "Vacant_Nonvacant",
  "Field19" as "Existing_Uses",
  "Field20" as "Optional_Info1",
  "Field21" as "Optional_Info2",
  "Field22" as "Optional_Info3",
  "Field23" as "Optional_Info32",
 FROM row_numbered 
 WHERE rn > 2)  -- Skip first two rows
 SELECT *,
  FLOOR(MAX_DENSITY * Parcel_Size)::INT AS max_units,
  REPLACE(REPLACE("APN", ' ', ''), '-', '') as blklot,
 FROM with_column_names;


 SELECT
 orig_table_b.m1_max_units,
 orig_table_b.m2_max_units,
 orig_table_b.m3_max_units,
 new_table_b.max_units as new_max_units,
 FROM orig_table_b INNER JOIN new_table_b USING (blklot)
 WHERE GREATEST(m1_max_units, m2_max_units, m3_max_units) >= new_max_units + 1;

 -- result: 15497 rows out of about 53543 rows have fewer allowed units
 -- in the current version of the spreadsheet (which is basically just m3)
 -- than in the original that was transmitted to HCD.
	-- Compares the HCD-approved site inventory spreadsheet
	-- with the current site inventory spreadsheet as of 2025-01-02.
	-- See https://x.com/yonran/status/1875018049130246550

	-- Prerequisites:
	-- * Install DuckDB
	-- * Download old AppendixB4.xlsx from
	-- https://web.archive.org/web/20230606031119/https://sfplanning.org/project/housing-element-update-2022
	-- direct link: https://web.archive.org/web/20230606031119/https://sfplanning.s3.amazonaws.com/archives/sfhousingelement.org/files/AppendixB4.xlsx
	-- * Download new Appendix_B4.xslx from
	-- https://sfplanning.org/project/housing-element-update-2022
	-- direct link: https://sfplanning.org/sites/default/files/documents/housing-for-all/Appendix_B4.xlsx

	INSTALL spatial;
	LOAD spatial;

	CREATE TABLE orig_table_b AS
	-- Can’t use st_read HEADERS=FORCE because that only supports one header row,
	-- but this table has 2 header rows so we need to manually skip the header and rename the columns
	WITH row_numbered AS (
	SELECT *, ROW_NUMBER() OVER () as rn
	FROM st_read('AppendixB4.xlsx', layer='Table B -Submitted-Dec-22', open_options=['HEADERS=DISABLE'])
	),
	with_column_names as (SELECT
	-- exclude jurisdiction; it's always San Francisco
	-- "Field1" as "JURISDICT",
	"Field2" as "ADDRESS",
	"Field3" as "ZIP5",
	"Field4" as "mapblklot",
	NULLIF(NULLIF("Field5", ''), 'n/a')::FLOAT as "M1_VLI",
	NULLIF(NULLIF("Field6", ''), 'n/a')::FLOAT as "M1_LI",
	NULLIF(NULLIF("Field7", ''), 'n/a')::FLOAT as "M1_M",
	NULLIF(NULLIF("Field8", ''), 'n/a')::FLOAT as "M1_AM",
	NULLIF(NULLIF("Field9", ''), 'n/a')::FLOAT as "M2_VLI",
	NULLIF(NULLIF("Field10", ''), 'n/a')::FLOAT as "M2_LI",
	NULLIF(NULLIF("Field11", ''), 'n/a')::FLOAT as "M2_M",
	NULLIF(NULLIF("Field12", ''), 'n/a')::FLOAT as "M2_AM",
	NULLIF(NULLIF("Field13", ''), 'n/a')::FLOAT as "M3_VLI",
	NULLIF(NULLIF("Field14", ''), 'n/a')::FLOAT as "M3_LI",
	NULLIF(NULLIF("Field15", ''), 'n/a')::FLOAT as "M3_M",
	NULLIF(NULLIF("Field16", ''), 'n/a')::FLOAT as "M3_AM",
	"Field17" as "SHORTFALL",
	NULLIF(NULLIF("Field18", ''), 'n/a')::FLOAT as "ACRES",
	"Field19" as "EX_GP_TYPE",
	"Field20" as "EX_ZONING",
	"Field21" as "M1_GP_TYPE",
	"Field22" as "M2_GP_TYPE",
	"Field23" as "M3_GP_TYPE",
	"Field24" as "M1_ZONING",
	"Field25" as "M2_ZONING",
	"Field26" as "M3_ZONING",
	NULLIF(NULLIF("Field27", ''), 'n/a')::FLOAT as "MIN_DENS",
	NULLIF(NULLIF("Field28", ''), 'n/a')::FLOAT as "M1_MAXDENS",
	NULLIF(NULLIF("Field29", ''), 'n/a')::FLOAT as "M2_MAXDENS",
	NULLIF(NULLIF("Field30", ''), 'n/a')::FLOAT as "M3_MAXDENS",
	-- capacity is density * acres * development probability
	-- development probability is typically about 2% or 100% according to
	-- https://sfplanning.s3.amazonaws.com/archives/sfhousingelement.org/files/AppendixB.pdf#page=43
	NULLIF(NULLIF("Field31", ''), 'n/a')::FLOAT as "M1_CAP",
	NULLIF(NULLIF("Field32", ''), 'n/a')::FLOAT as "M2_CAP",
	NULLIF(NULLIF("Field33", ''), 'n/a')::FLOAT as "M3_CAP",
	"Field34" as "VACANT",
	"Field35" as "EX_USE",
	"Field36" as "INFRA",
	"Field37" as "SS_MAP1",
	"Field38" as "SS_MAP2",
	"Field39" as "SS_MAP3"
	FROM row_numbered
	WHERE rn > 2)
	SELECT *,
	FLOOR(M1_MAXDENS * ACRES)::INT AS m1_max_units,
	FLOOR(M2_MAXDENS * ACRES)::INT AS m2_max_units,
	FLOOR(M3_MAXDENS * ACRES)::INT AS m3_max_units,
	FLOOR(GREATEST(M1_MAXDENS, M2_MAXDENS, M3_MAXDENS) * ACRES)::INT AS max_units,
	REPLACE(REPLACE("mapblklot", ' ', ''), '-', '') as blklot,
	FROM with_column_names;

	CREATE TEMP VIEW new_table_b AS
	-- Can’t use st_read HEADERS=FORCE because that only supports one header row,
	-- but this table has 2 header rows so we need to manually skip the header and rename the columns
	WITH row_numbered AS (
	SELECT *, ROW_NUMBER() OVER () as rn
	FROM st_read('Appendix_B4.xlsx', layer = 'Table B', open_options=['HEADERS=DISABLE'])
	),
	with_column_names AS (SELECT
	-- exclude Jurisdiction_Name; it's always SAN FRANCISCO
	-- "Field1" as "Jurisdiction_Name",
	"Field2" as "Site_Address",
	"Field3" as "ZIP_Code",
	"Field4" as "APN",
	NULLIF("Field5", '')::FLOAT as "Very_Low_Income",
	NULLIF("Field6", '')::FLOAT as "Low_Income",
	NULLIF("Field7", '')::FLOAT as "Moderate_Income",
	NULLIF("Field8", '')::FLOAT as "Above_Moderate_Income",
	"Field9" as "Type_of_Shortfall",
	-- parcel size in acres
	NULLIF("Field10", '')::FLOAT as "Parcel_Size",
	"Field11" as "Current_General_Plan",
	"Field12" as "Current_Zoning",
	"Field13" as "Proposed_GP_Designation",
	"Field14" as "Proposed_Zoning",
	-- min/max density per acre
	NULLIF("Field15", '')::FLOAT as "Min_Density",
	NULLIF("Field16", '')::FLOAT as "Max_Density",
	-- total_capacity is max_density * parcel_size * development probability
	-- which is mostly around 1-2% and 100% for some lots
	NULLIF("Field17", '')::FLOAT as "Total_Capacity",
	"Field18" as "Vacant_Nonvacant",
	"Field19" as "Existing_Uses",
	"Field20" as "Optional_Info1",
	"Field21" as "Optional_Info2",
	"Field22" as "Optional_Info3",
	"Field23" as "Optional_Info32",
	FROM row_numbered
	WHERE rn > 2) -- Skip first two rows
	SELECT *,
	FLOOR(MAX_DENSITY * Parcel_Size)::INT AS max_units,
	REPLACE(REPLACE("APN", ' ', ''), '-', '') as blklot,
	FROM with_column_names;


	SELECT
	orig_table_b.m1_max_units,
	orig_table_b.m2_max_units,
	orig_table_b.m3_max_units,
	new_table_b.max_units as new_max_units,
	FROM orig_table_b INNER JOIN new_table_b USING (blklot)
	WHERE GREATEST(m1_max_units, m2_max_units, m3_max_units) >= new_max_units + 1;

	-- result: 15497 rows out of about 53543 rows have fewer allowed units
	-- in the current version of the spreadsheet (which is basically just m3)
	-- than in the original that was transmitted to HCD.