Last active
August 29, 2015 14:16
-
-
Save clhenrick/6bc3d28ea67250b22a2a to your computer and use it in GitHub Desktop.
find distinct addresses from dhcr rent stabilized list
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- PostgreSQL | |
-- combine data for all years into one table | |
-- to find distinct addresses & BBL numbers | |
create table dhcr_all ( | |
zip integer, | |
bldgno1 text, | |
street_name1 text, | |
street_suffix1 text, | |
bldgno2 text, | |
street_name2 text, | |
street_suffix2 text, | |
bldgno3 text, | |
street_name3 text, | |
street_suffix3 text, | |
status1 text, | |
status2 text, | |
status3 text, | |
block text, | |
lot integer, | |
boro_code integer | |
); | |
insert into dhcr_all ( | |
zip, | |
bldgno1, | |
street_name1, | |
street_suffix1, | |
bldgno2, | |
street_name2, | |
street_suffix2, | |
bldgno3, | |
street_name3, | |
street_suffix3, | |
status1, | |
status2, | |
status3, | |
block, | |
lot, | |
boro_code | |
) | |
SELECT * FROM dhcr2009tmp; | |
insert into dhcr_all ( | |
zip, | |
bldgno1, | |
street_name1, | |
street_suffix1, | |
bldgno2, | |
street_name2, | |
street_suffix2, | |
bldgno3, | |
street_name3, | |
street_suffix3, | |
status1, | |
status2, | |
status3, | |
block, | |
lot, | |
boro_code | |
) | |
SELECT * FROM dhcr2011; | |
insert into dhcr_all ( | |
zip, | |
bldgno1, | |
street_name1, | |
street_suffix1, | |
bldgno2, | |
street_name2, | |
street_suffix2, | |
bldgno3, | |
street_name3, | |
street_suffix3, | |
status1, | |
status2, | |
status3, | |
block, | |
lot, | |
boro_code | |
) | |
SELECT * FROM dhcr2012; | |
-- 2013 doesn't have a bldg03, street3, stsufx3 columns | |
insert into dhcr_all ( | |
zip, | |
bldgno1, | |
street_name1, | |
street_suffix1, | |
bldgno2, | |
street_name2, | |
street_suffix2, | |
status1, | |
status2, | |
status3, | |
block, | |
lot, | |
boro_code | |
) | |
SELECT | |
"ZIP", | |
"BLDGNO1", | |
"STREET1", | |
"STSUFX1", | |
"BLDGNO2", | |
"STREET2", | |
"STSUFX2", | |
"STATUS1", | |
"STATUS2", | |
"STATUS3", | |
"BLOCK", | |
"LOT", | |
"BORO_CODE" | |
FROM dhcr2013; | |
-- should return about 170,000 rows | |
select count(*) from dhcr_all; | |
-- split bldgno1 column into two separate numbers for geocoding with NYC Geoclient API | |
alter table dhcr_all add column bldgno1a text; | |
alter table dhcr_all add column bldgno1b text; | |
update dhcr_all set bldgno1a = split_part(bldgno1, 'TO', 1); | |
update dhcr_all set bldgno1b = split_part(bldgno1, 'TO', 2; | |
-- find all distinct addresses | |
select bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip | |
from dhcr_all | |
group by bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip | |
-- returns a measley 50,000 rows, DHCR's data is obviously not complete! | |
select count(*) from ( | |
select bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip | |
from dhcr_all | |
group by bldgno1, bldgno1a, bldgno1b, street_name1, street_suffix1, boro_code, zip | |
) as distinct_addresses; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment