Created
September 10, 2016 19:06
-
-
Save bastula/89eba37a000b73c7015cf72c9ed1afd3 to your computer and use it in GitHub Desktop.
Script to plot City of Chicago building permits on a map using Luigi.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import luigi | |
import requests | |
import pandas as pd | |
class DownloadTask(luigi.ExternalTask): | |
""" | |
Downloads Permit Data from the Portal | |
""" | |
def run(self): | |
url = "https://data.cityofchicago.org/api/views/ydr8-5enu/rows.csv?accessType=DOWNLOAD" | |
response = requests.get(url) | |
with self.output().open('w') as out_file: | |
out_file.write(response.text) | |
def output(self): | |
return luigi.LocalTarget("data/permits.csv") | |
#class ParseTable(luigi.ExternalTask): | |
# def requires(self): | |
# return DownloadTask() | |
# | |
# def run(self): | |
# df = pd.read_csv(self.input()) | |
# | |
# | |
# def output(self): | |
# return luigi.LocalTarget("data/dataframe") | |
# | |
class GenerateHTML(luigi.Task): | |
def requires(self): | |
return DownloadTask() | |
def run(self): | |
import folium | |
df = pd.read_csv(self.input().open('r')) | |
mapp = folium.Map(location=[41.87, -87.62], zoom_start=10) | |
marker_cluster = folium.MarkerCluster().add_to(mapp) | |
for name, row in df.iloc[:100000].iterrows(): | |
folium.Marker([row["LATITUDE"], row["LONGITUDE"]]).add_to(marker_cluster) | |
mapp.save(self.output().open('w')) | |
def output(self): | |
return luigi.LocalTarget('data/out.html') | |
if __name__ == '__main__': | |
luigi.run(['GenerateHTML', '--workers', '2', '--local-scheduler']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script was developed by @adelaneh and myself at PyData Chicago 2016 as part of an exercise in @hunterowens talk about Luigi & Data Pipelines. Hunter's talk and project repo can be found here: https://github.com/hunterowens/data-pipelines