Skip to content

Instantly share code, notes, and snippets.

@yonran
Last active January 4, 2025 00:35
Show Gist options
  • Save yonran/5df4e5d296e6eb43cb170337646782ba to your computer and use it in GitHub Desktop.
Save yonran/5df4e5d296e6eb43cb170337646782ba to your computer and use it in GitHub Desktop.
Studying San Francisco’s 2022 Housing Element Site Inventory
-- Compares the HCD-approved site inventory spreadsheet
-- with the current site inventory spreadsheet as of 2025-01-02.
-- See https://x.com/yonran/status/1875018049130246550
-- Prerequisites:
-- * Install DuckDB
-- * Download old AppendixB4.xlsx from
-- https://web.archive.org/web/20230606031119/https://sfplanning.org/project/housing-element-update-2022
-- direct link: https://web.archive.org/web/20230606031119/https://sfplanning.s3.amazonaws.com/archives/sfhousingelement.org/files/AppendixB4.xlsx
-- * Download new Appendix_B4.xslx from
-- https://sfplanning.org/project/housing-element-update-2022
-- direct link: https://sfplanning.org/sites/default/files/documents/housing-for-all/Appendix_B4.xlsx
INSTALL spatial;
LOAD spatial;
CREATE TABLE orig_table_b AS
-- Can’t use st_read HEADERS=FORCE because that only supports one header row,
-- but this table has 2 header rows so we need to manually skip the header and rename the columns
WITH row_numbered AS (
SELECT *, ROW_NUMBER() OVER () as rn
FROM st_read('AppendixB4.xlsx', layer='Table B -Submitted-Dec-22', open_options=['HEADERS=DISABLE'])
),
with_column_names as (SELECT
-- exclude jurisdiction; it's always San Francisco
-- "Field1" as "JURISDICT",
"Field2" as "ADDRESS",
"Field3" as "ZIP5",
"Field4" as "mapblklot",
NULLIF(NULLIF("Field5", ''), 'n/a')::FLOAT as "M1_VLI",
NULLIF(NULLIF("Field6", ''), 'n/a')::FLOAT as "M1_LI",
NULLIF(NULLIF("Field7", ''), 'n/a')::FLOAT as "M1_M",
NULLIF(NULLIF("Field8", ''), 'n/a')::FLOAT as "M1_AM",
NULLIF(NULLIF("Field9", ''), 'n/a')::FLOAT as "M2_VLI",
NULLIF(NULLIF("Field10", ''), 'n/a')::FLOAT as "M2_LI",
NULLIF(NULLIF("Field11", ''), 'n/a')::FLOAT as "M2_M",
NULLIF(NULLIF("Field12", ''), 'n/a')::FLOAT as "M2_AM",
NULLIF(NULLIF("Field13", ''), 'n/a')::FLOAT as "M3_VLI",
NULLIF(NULLIF("Field14", ''), 'n/a')::FLOAT as "M3_LI",
NULLIF(NULLIF("Field15", ''), 'n/a')::FLOAT as "M3_M",
NULLIF(NULLIF("Field16", ''), 'n/a')::FLOAT as "M3_AM",
"Field17" as "SHORTFALL",
NULLIF(NULLIF("Field18", ''), 'n/a')::FLOAT as "ACRES",
"Field19" as "EX_GP_TYPE",
"Field20" as "EX_ZONING",
"Field21" as "M1_GP_TYPE",
"Field22" as "M2_GP_TYPE",
"Field23" as "M3_GP_TYPE",
"Field24" as "M1_ZONING",
"Field25" as "M2_ZONING",
"Field26" as "M3_ZONING",
NULLIF(NULLIF("Field27", ''), 'n/a')::FLOAT as "MIN_DENS",
NULLIF(NULLIF("Field28", ''), 'n/a')::FLOAT as "M1_MAXDENS",
NULLIF(NULLIF("Field29", ''), 'n/a')::FLOAT as "M2_MAXDENS",
NULLIF(NULLIF("Field30", ''), 'n/a')::FLOAT as "M3_MAXDENS",
-- capacity is density * acres * development probability
-- development probability is typically about 2% or 100% according to
-- https://sfplanning.s3.amazonaws.com/archives/sfhousingelement.org/files/AppendixB.pdf#page=43
NULLIF(NULLIF("Field31", ''), 'n/a')::FLOAT as "M1_CAP",
NULLIF(NULLIF("Field32", ''), 'n/a')::FLOAT as "M2_CAP",
NULLIF(NULLIF("Field33", ''), 'n/a')::FLOAT as "M3_CAP",
"Field34" as "VACANT",
"Field35" as "EX_USE",
"Field36" as "INFRA",
"Field37" as "SS_MAP1",
"Field38" as "SS_MAP2",
"Field39" as "SS_MAP3"
FROM row_numbered
WHERE rn > 2)
SELECT *,
FLOOR(M1_MAXDENS * ACRES)::INT AS m1_max_units,
FLOOR(M2_MAXDENS * ACRES)::INT AS m2_max_units,
FLOOR(M3_MAXDENS * ACRES)::INT AS m3_max_units,
FLOOR(GREATEST(M1_MAXDENS, M2_MAXDENS, M3_MAXDENS) * ACRES)::INT AS max_units,
REPLACE(REPLACE("mapblklot", ' ', ''), '-', '') as blklot,
FROM with_column_names;
CREATE TEMP VIEW new_table_b AS
-- Can’t use st_read HEADERS=FORCE because that only supports one header row,
-- but this table has 2 header rows so we need to manually skip the header and rename the columns
WITH row_numbered AS (
SELECT *, ROW_NUMBER() OVER () as rn
FROM st_read('Appendix_B4.xlsx', layer = 'Table B', open_options=['HEADERS=DISABLE'])
),
with_column_names AS (SELECT
-- exclude Jurisdiction_Name; it's always SAN FRANCISCO
-- "Field1" as "Jurisdiction_Name",
"Field2" as "Site_Address",
"Field3" as "ZIP_Code",
"Field4" as "APN",
NULLIF("Field5", '')::FLOAT as "Very_Low_Income",
NULLIF("Field6", '')::FLOAT as "Low_Income",
NULLIF("Field7", '')::FLOAT as "Moderate_Income",
NULLIF("Field8", '')::FLOAT as "Above_Moderate_Income",
"Field9" as "Type_of_Shortfall",
-- parcel size in acres
NULLIF("Field10", '')::FLOAT as "Parcel_Size",
"Field11" as "Current_General_Plan",
"Field12" as "Current_Zoning",
"Field13" as "Proposed_GP_Designation",
"Field14" as "Proposed_Zoning",
-- min/max density per acre
NULLIF("Field15", '')::FLOAT as "Min_Density",
NULLIF("Field16", '')::FLOAT as "Max_Density",
-- total_capacity is max_density * parcel_size * development probability
-- which is mostly around 1-2% and 100% for some lots
NULLIF("Field17", '')::FLOAT as "Total_Capacity",
"Field18" as "Vacant_Nonvacant",
"Field19" as "Existing_Uses",
"Field20" as "Optional_Info1",
"Field21" as "Optional_Info2",
"Field22" as "Optional_Info3",
"Field23" as "Optional_Info32",
FROM row_numbered
WHERE rn > 2) -- Skip first two rows
SELECT *,
FLOOR(MAX_DENSITY * Parcel_Size)::INT AS max_units,
REPLACE(REPLACE("APN", ' ', ''), '-', '') as blklot,
FROM with_column_names;
SELECT
orig_table_b.m1_max_units,
orig_table_b.m2_max_units,
orig_table_b.m3_max_units,
new_table_b.max_units as new_max_units,
FROM orig_table_b INNER JOIN new_table_b USING (blklot)
WHERE GREATEST(m1_max_units, m2_max_units, m3_max_units) >= new_max_units + 1;
-- result: 15497 rows out of about 53543 rows have fewer allowed units
-- in the current version of the spreadsheet (which is basically just m3)
-- than in the original that was transmitted to HCD.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment