Skip to content

Instantly share code, notes, and snippets.

@afcotroneo
Created December 13, 2021 18:08
Show Gist options
  • Select an option

  • Save afcotroneo/4fe531bd50240c96d919ebc38c1da5ad to your computer and use it in GitHub Desktop.

Select an option

Save afcotroneo/4fe531bd50240c96d919ebc38c1da5ad to your computer and use it in GitHub Desktop.
import gc
cols = ["id", "dt", "element_", "value_", "m_flag", "q_flag", "s_flag", "obs_time"]
for i in range(1900,2022):
con = connect(user="l", password="!", host="",port=0, dbname="")
url = "https://noaa-ghcn-pds.s3.amazonaws.com/csv/" + str(i) + ".csv"
data = pd.read_csv(url,names=cols,parse_dates=["dt"],dtype={'id':'str','element_':'str','value_':'int32','m_flag':'str','q_flag':'str','s_flag':'str','obs_time':'object'},iterator=True ,chunksize=1000000)
for df in data:
strings = ['SNOW', 'SNWD'] # Filter out only the Snow rows
output = df.element_.isin(strings)
df_snow = df[output]
df_snow = df_snow[['id','dt','element_','value_']] # Removing certain columns
print(df_snow.head(2))
try:
con.load_table_columnar("df_snow", df_snow)
print('inserted ' + str(df_snow.shape[0]) + ' rows')
except Exception as e:
print(f"Fail to insert data {e}")
del df
gc.collect()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment