Created
December 2, 2023 22:34
-
-
Save CSutter5/2b3786196f05ca16be504fd1c074f905 to your computer and use it in GitHub Desktop.
Iowa State Environmental Mesonet Data dowloader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
pd.low_memory=False | |
from tqdm import tqdm | |
import numpy as np | |
import requests | |
import time | |
import math | |
import os | |
import threading | |
import queue | |
import argparse | |
""" | |
updater.py | |
This file will get all of the data and process the data | |
example usage: | |
python updater.py --network WI_ASOS --startingYear 2023 --startingMonth 11 --startingDay 1 --endingYear 2023 --endingMonth 12 --endingDay 1 --processThreads 2 --downloadThreads 2 | |
URL: | |
https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station={station}&data=all&year1={StartingYear}&month1={StartingMonth}&day1={StartingDay}&year2={EndingYear}&month2={EndingMonth}&day2={EndingDay}&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=empty&trace=empty&direct=no&report_type=3&report_type=4 | |
""" | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--network", type=str, default="WI_ASOS", help="The network to get the data from") | |
parser.add_argument("--stations", type=str, default=None, help="The stations to get the data from") | |
parser.add_argument("--startingYear", type=int, default=1940, help="The starting year to get the data from") | |
parser.add_argument("--startingMonth", type=int, default=1, help="The starting month to get the data from") | |
parser.add_argument("--startingDay", type=int, default=1, help="The starting day to get the data from") | |
parser.add_argument("--endingYear", type=int, default=2030, help="The ending year to get the data from") | |
parser.add_argument("--endingMonth", type=int, default=1, help="The ending month to get the data from") | |
parser.add_argument("--endingDay", type=int, default=1, help="The ending day to get the data from") | |
parser.add_argument("--downloadThreads", type=int, default=2, help="The amount of threads to use to download the data") | |
parser.add_argument("--processThreads", type=int, default=2, help="The amount of threads to use to process the data") | |
network = None | |
stations = None | |
startingYear = None | |
startingMonth = None | |
startingDay = None | |
endingYear = None | |
endingMonth = None | |
endingDay = None | |
dataToProcess = queue.Queue() | |
dataToGet = queue.Queue() | |
downloadDone = 0 | |
downloadBar = None | |
processDone = 0 | |
processBar = None | |
# Get Data downloads the data from the website and saves it to a file | |
def getData(): | |
global dataToGet, downloadDone, downloadBar | |
# While there is data to get | |
while not dataToGet.empty(): | |
station = dataToGet.get() | |
# Download the data | |
r = requests.get(f"https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station={station}&data=all&year1={startingYear}&month1={startingMonth}&day1={startingDay}&year2={endingYear}&month2={endingMonth}&day2={endingDay}&tz=America%2FChicago&format=onlycomma&latlon=no&elev=no&missing=empty&trace=empty&direct=no&report_type=3") | |
open(f"data/download/{station}.csv", "w").write(r.content.decode()) | |
# Process the progress bar | |
downloadDone += 1 | |
downloadBar.update() | |
dataToProcess.put(station) | |
dataToGet.task_done() | |
# Process Data will process the data and turn it into a usable format | |
def processData(): | |
global dataToProcess, dataToGet, processDone, processBar | |
# While there is data to process | |
while not dataToProcess.empty() or not dataToGet.empty(): | |
station = dataToProcess.get() | |
# Read the downloaded data | |
df = pd.read_csv(f"data/download/{station}.csv", low_memory=False) | |
df = df.rename(columns={"valid": "time"}) | |
df.to_csv(f"data/{network}/{station}/WeatherData.csv") | |
# Change the index to the time column | |
df.set_index('time', inplace=True) | |
df.index = pd.to_datetime(df.index) | |
if not os.path.exists(f"data/{network}/{station}/"): | |
os.makedirs(f"data/{network}/{station}/") | |
# Calculate the sin and cos of the time | |
df['month_sin'] = df.index.month.map(lambda x: math.sin(x * (2. * math.pi / 12))) | |
df['month_cos'] = df.index.month.map(lambda x: math.cos(x * (2. * math.pi / 12))) | |
df['day_sin'] = df.index.day .map(lambda x: math.sin(x * (2. * math.pi / 31))) | |
df['day_cos'] = df.index.day .map(lambda x: math.cos(x * (2. * math.pi / 31))) | |
df['hour_sin'] = df.index.hour .map(lambda x: math.sin(x * (2. * math.pi / 24))) | |
df['hour_cos'] = df.index.hour .map(lambda x: math.cos(x * (2. * math.pi / 24))) | |
df['year'] = df.index.year | |
df['month'] = df.index.month | |
df['day'] = df.index.day | |
df['hour'] = df.index.hour | |
df['dayofyear'] = df.index.dayofyear | |
# Turn the direction into a sin | |
df['drct_sin'] = df['drct'].map(lambda x: 0 if np.isnan(x) else math.sin(x * (2. * math.pi / 360))) | |
# Try and see if the file exists, if it does then concat the two files | |
try: | |
df1 = pd.read_csv(f"data/{network}/{station}/WeatherData.csv", low_memory=False) | |
df = pd.concat([df1, df]) | |
except: pass | |
# Save the file | |
df.to_csv(f"data/{network}/{station}/TrimmedWeatherData.csv") | |
# Process the progress bar | |
processDone += 1 | |
processBar.update() | |
dataToProcess.task_done() | |
def getStations(network): | |
# https://mesonet.agron.iastate.edu/sites/networks.php?network={network}&format=csv&nohtml=on | |
r = requests.get(f"https://mesonet.agron.iastate.edu/sites/networks.php?network={network}&format=csv&nohtml=on") | |
open(f"data/{network}.csv", "w").write(r.content.decode()) | |
df = pd.read_csv(f"data/{network}.csv", low_memory=False) | |
return df['stid'].values | |
if __name__ == "__main__": | |
# Create the directories if they don't exist | |
if not os.path.exists("data/download/"): os.makedirs("data/download/") | |
args = parser.parse_args() | |
if args.stations is not None: | |
stations = args.stations.split(",") | |
network = args.network | |
stations = getStations(args.network) | |
startingYear = args.startingYear | |
startingMonth = args.startingMonth | |
startingDay = args.startingDay | |
endingYear = args.endingYear | |
endingMonth = args.endingMonth | |
endingDay = args.endingDay | |
downloadBar = tqdm(total=len(stations), desc="Amount of Stations Downloaded", postfix=downloadDone) | |
processBar = tqdm(total=len(stations), desc="Amount of Stations Processed", postfix=processDone) | |
# Put all of the stations into the queue, and create dir | |
for station in stations: | |
if not os.path.exists(f"data/download/{station}.csv"): os.makedirs(f"data/download/{station}.csv") | |
dataToGet.put(station) | |
# Start all of the threads | |
dataGetterThreads = [] | |
for i in range(args.downloadThreads): | |
t = threading.Thread(target=getData, daemon=True) | |
t.start() | |
dataGetterThreads.append(t) | |
dataProcessorThreads = [] | |
for i in range(args.processThreads): | |
t = threading.Thread(target=processData, daemon=True) | |
t.start() | |
dataProcessorThreads.append(t) | |
# Wait for all of the threads to finish | |
for t in dataGetterThreads: t.join() | |
for t in dataProcessorThreads: t.join() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment