Last active
November 19, 2024 18:15
-
-
Save ranchodeluxe/b33ec007134816b9f26a76a5bf98afc5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from postal.expand import expand_address | |
import sys | |
import xxhash | |
import math | |
import pprint | |
import pandas as pd | |
from sqlalchemy import create_engine, text | |
from datetime import datetime, timedelta | |
DATABASE_URI = "postgresql://kaiizenadmin:[email protected]/kaiizen" | |
engine = create_engine(DATABASE_URI) | |
def meters_to_degrees(meters, latitude): | |
meters_per_degree_lat = 111320 | |
latitude_degrees = meters / meters_per_degree_lat | |
meters_per_degree_lon = 111320 * math.cos(math.radians(latitude)) | |
longitude_degrees = meters / meters_per_degree_lon | |
return { | |
"latitude_degrees": latitude_degrees, | |
"longitude_degrees": longitude_degrees, | |
} | |
def match_address_expansion(address): | |
expansions = expand_address(address) | |
expansion_hashes = [xxhash.xxh64(ex).intdigest() for ex in expansions] | |
query = """ | |
SELECT * | |
FROM properties_address_expansions | |
WHERE expansion_hash = ANY(%(expansion_hashes)s) | |
""" | |
matches = pd.read_sql(query, engine, params={"expansion_hashes": expansion_hashes}) | |
return matches | |
def get_subject_properties_raw(address_expansions): | |
""" | |
:param address_expansions: | |
:return: | |
""" | |
if address_expansions.loc[0].dwellsy_pk is None: | |
query = """ | |
SELECT | |
DISTINCT(pae.cherre_pk), | |
ps.pk AS cherre_pk, | |
ps.addr AS properties_addr, | |
ps.addr_city AS properties_city, | |
ps.addr_state AS properties_state, | |
ps.addr_zip AS properties_zip, | |
ps.last_sale_price | |
FROM | |
properties_address_expansions pae | |
LEFT JOIN | |
properties_simple ps ON pae.cherre_pk = ps.pk | |
WHERE | |
pae.cherre_pk = %(cherre_pk)s | |
""" | |
comps = pd.read_sql( | |
query, | |
engine, | |
params={"cherre_pk": int(address_expansions.loc[0].cherre_pk)}, | |
) | |
else: | |
query = """ | |
SELECT | |
DISTINCT(pae.cherre_pk), | |
ps.pk AS cherre_pk, | |
ps.addr AS properties_addr, | |
ps.addr_city AS properties_city, | |
ps.addr_state AS properties_state, | |
ps.addr_zip AS properties_zip, | |
ps.last_sale_price, | |
ls.pk AS dwellsy_pk, | |
# ls.addr AS listings_addr, | |
# ls.addr_city AS listings_city, | |
# ls.addr_state AS listings_state, | |
# ls.addr_zip AS listings_zip, | |
ls.listing_amount, | |
ls.listing_deposit | |
FROM | |
properties_address_expansions pae | |
LEFT JOIN | |
properties_simple ps ON pae.cherre_pk = ps.pk | |
LEFT JOIN | |
listings_simple ls ON pae.dwellsy_pk = ls.pk | |
WHERE | |
pae.dwellsy_pk = %(dwellsy_pk)s AND | |
pae.cherre_pk = %(cherre_pk)s | |
""" | |
comps = pd.read_sql( | |
query, | |
engine, | |
params={ | |
"dwellsy_pk": int(address_expansions.loc[0].dwellsy_pk), | |
"cherre_pk": int(address_expansions.loc[0].cherre_pk), | |
}, | |
) | |
return comps | |
def find_comparables(dw_id: int, get_rental_rates=False): | |
""" """ | |
with engine.connect() as connection: | |
# 1. get subject property from PropDB | |
subject_query = text( | |
""" | |
SELECT | |
pk, | |
last_sale_price, | |
last_sale_date, | |
bed_count, | |
bath_count, | |
year_built, | |
sqft_building, | |
addr_city, | |
msa_name, | |
ST_X(coords) AS longitude, | |
ST_Y(coords) AS latitude | |
FROM properties_simple | |
WHERE pk = :dw_id | |
""" | |
) | |
subject_property = pd.read_sql( | |
subject_query, connection, params={"dw_id": str(dw_id)} | |
).iloc[0] | |
print(f"[ SUBJECT PROPERTY ]") | |
print( | |
subject_property[ | |
[ | |
"last_sale_price", | |
"last_sale_date", | |
"bed_count", | |
"bath_count", | |
"year_built", | |
"sqft_building", | |
] | |
] | |
) | |
# 2. filters set up etc | |
is_rural = is_rural_area( | |
subject_property["addr_city"], subject_property["msa_name"] | |
) | |
proximity_distance_meters = 5 * 1609.34 if is_rural else 1 * 1609.34 | |
proximity_distance_degrees = meters_to_degrees( | |
proximity_distance_meters, subject_property.latitude | |
) | |
min_sale_price = subject_property["last_sale_price"] * 0.75 | |
max_sale_price = subject_property["last_sale_price"] * 1.25 | |
min_sale_date = datetime.now() - timedelta(days=360) | |
min_beds = (subject_property["bed_count"] or 1) - 1 | |
max_beds = (subject_property["bed_count"] or 1) + 1 | |
min_baths = (subject_property["bath_count"] or 1) - 1 | |
max_baths = (subject_property["bath_count"] or 1) + 1 | |
min_year_built = (subject_property["year_built"] or 1700) - 5 | |
max_year_built = (subject_property["year_built"] or 1700) + 5 | |
min_sqft = (subject_property["sqft_building"] or 0) - 300 | |
max_sqft = (subject_property["sqft_building"] or 0) + 300 | |
# 3. appply filters to find comparables in PropDB | |
# | |
# TODO: what we really want here to use all the above | |
# filters but we get zero comps with building sqft | |
# | |
# WHERE | |
# ps.last_sale_price BETWEEN :min_sale_price AND :max_sale_price | |
# AND ps.last_sale_date >= :min_sale_date | |
# AND ps.bed_count BETWEEN :min_beds AND :max_beds | |
# AND ps.bath_count BETWEEN :min_baths AND :max_baths | |
# AND ps.year_built BETWEEN :min_year_built AND :max_year_built | |
# AND ps.sqft_building BETWEEN :min_sqft AND :max_sqft | |
# | |
comparables_query = text( | |
""" | |
WITH filtered_properties AS ( | |
SELECT * | |
FROM properties_simple | |
WHERE ST_DWithin( | |
coords, | |
ST_SetSRID(ST_MakePoint(:longitude, :latitude), 4326), | |
:proximity_distance_degrees | |
) | |
) | |
SELECT | |
ps.pk, | |
ps.addr, | |
ps.addr_city, | |
ps.addr_state, | |
ps.addr_zip, | |
ps.last_sale_price, | |
ps.last_sale_date, | |
ps.bed_count, | |
ps.bath_count, | |
ps.sqft_building, | |
ps.year_built, | |
ST_X(ps.coords) AS "longitude", | |
ST_Y(ps.coords) AS "latitude", | |
ST_DistanceSphere( | |
ps.coords, | |
ST_SetSRID(ST_MakePoint(:longitude, :latitude), 4326) | |
) / 1609.34 AS "distance_miles" | |
FROM filtered_properties ps | |
WHERE | |
ps.last_sale_price IS NOT NULL | |
AND ps.last_sale_date IS NOT NULL | |
AND ps.last_sale_price BETWEEN :min_sale_price AND :max_sale_price | |
AND ps.last_sale_date >= :min_sale_date | |
AND ps.bed_count BETWEEN :min_beds AND :max_beds | |
AND ps.bath_count BETWEEN :min_baths AND :max_baths | |
AND ps.year_built BETWEEN :min_year_built AND :max_year_built | |
AND ps.sqft_building BETWEEN :min_sqft AND :max_sqft | |
ORDER BY ps.pk, "distance_miles" | |
LIMIT 50; | |
""" | |
) | |
params = { | |
"longitude": subject_property["longitude"], | |
"latitude": subject_property["latitude"], | |
"min_sale_price": min_sale_price, | |
"max_sale_price": max_sale_price, | |
"min_sale_date": min_sale_date, | |
"min_beds": min_beds, | |
"max_beds": max_beds, | |
"min_baths": min_baths, | |
"max_baths": max_baths, | |
"min_year_built": min_year_built, | |
"max_year_built": max_year_built, | |
"min_sqft": min_sqft, | |
"max_sqft": max_sqft, | |
"proximity_distance_degrees": proximity_distance_degrees[ | |
"latitude_degrees" | |
], | |
} | |
# pprint.pprint(params) | |
comparables = pd.read_sql(comparables_query, connection, params=params) | |
cherre_ids = comparables["pk"].tolist() | |
if not cherre_ids: | |
# raise ValueError("No comparables found for subject property") | |
return comparables | |
# 5. if there are comparables and we care about rental rates then return them | |
if not get_rental_rates: | |
return comparables | |
if get_rental_rates: | |
listings_query = text( | |
""" | |
SELECT DISTINCT ON (pae.cherre_pk) | |
pae.cherre_pk AS "cherreId", | |
ls.pk AS "dwellsyId", | |
ls.listing_amount AS "listingAmount", | |
ls.listing_deposit AS "listingDeposit" | |
FROM properties_address_expansions pae | |
INNER JOIN listings_simple ls ON pae.dwellsy_pk = ls.pk | |
WHERE pae.cherre_pk = ANY(:cherre_ids) | |
""" | |
) | |
listings = pd.read_sql( | |
listings_query, connection, params={"cherre_ids": cherre_ids} | |
) | |
# 6. Merge comparables with listings | |
merged_data = pd.merge(comparables, listings, how="left", on="cherreId") | |
return merged_data.to_dict(orient="records") | |
def is_rural_area(city_name: str, msa_name: str) -> bool: | |
""" | |
Determines if a given city is considered rural. | |
:param city_name: The name of the city. | |
:param msa_name: The name of the Metropolitan Statistical Area (MSA). | |
:return: True if rural, False otherwise. | |
""" | |
return msa_name is None | |
if __name__ == "__main__": | |
all_address = """4942 W Novak Way Laveen AZ 85339 | |
1466 E Avenida Fresca Casa Grande AZ 85122 | |
1609 Carter Rd. Decatur GA 30032 | |
14875 Arrowhead Dr Yukon OK 73099 | |
3430 Rocky Springs Ct Marietta GA 30062 | |
2485 Hillside ave. Decatur GA 30032 | |
1427 Beaverton Ave Cincinnati OH 45237 | |
950 Sugar Meadow Dr Sugar Hill GA 30518 | |
5114 Crestway Dr, La Porte TX 77571 | |
10415 Gloria St Gibsonton FL 33534 | |
2859 Alexandria Dr SW Atlanta GA 30331""".split( | |
"\n" | |
) | |
for addr in all_address: | |
print(f"############################") | |
print(f"[ ADDRESS ]: {addr.lstrip()}") | |
expansions = match_address_expansion(addr) | |
# print(expansions) | |
if expansions.empty: | |
continue | |
comps = find_comparables(expansions.loc[0].cherre_pk) | |
if comps.empty: | |
print(f"[ COMPS ]: 0") | |
continue | |
print(f"[ COMPS ]: {len(comps)}") | |
print( | |
comps[ | |
[ | |
"last_sale_price", | |
"last_sale_date", | |
"bed_count", | |
"bath_count", | |
"year_built", | |
"sqft_building", | |
"distance_miles", | |
] | |
] | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment