Last active
May 21, 2022 03:36
-
-
Save davidshumway/23c098cbd52bdf0a84c8530b3353f647 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
When everything is finished, use `gzip -k -f buildingObs.n3` to compress files. | |
''' | |
import random | |
import time | |
prefix = ''' | |
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns> | |
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> | |
prefix sosa: <http://www.w3.org/ns/sosa/> | |
prefix xsd: <http://www.w3.org/2001/XMLSchema#> | |
prefix ex: <http://www.example3.com/> | |
''' | |
''' | |
Weather station observations | |
''' | |
n = time.time() | |
# 10 years | |
# 300 stations * 15 obs/day * 10 years * 365 days/year | |
size = 300*15*10*365 | |
df_weather_obs = pd.DataFrame({ | |
# 300 stations. 0-299, then repeats. | |
'station': np.arange(size) % 300, | |
'result': np.random.randint(0, 10, size, dtype=np.uint32), | |
# Every 300 rows, increment by 1 until 14. Then repeat. | |
'property': (np.arange(size) / 300).astype(int) % 15, #% 300 | |
# Each day contains 300 stations * 15 parameters per station per day. | |
# Convert to seconds per day, then subtract from today. | |
'time': n - ((np.arange(size) / (300*15)).astype(int) * 24*60*60) | |
}) | |
df_weather_obs['time'] = df_weather_obs['time'].astype('datetime64[s]') | |
df_weather_obs['year'] = df_weather_obs['time'].dt.year | |
df_weather_obs['month'] = df_weather_obs['time'].dt.month | |
df_weather_obs['day'] = df_weather_obs['time'].dt.day | |
df_weather_obs['idx'] = df_weather_obs.index | |
# Assume that the stations occasionally don't report one or more parameters on some days. | |
df_weather_obs = df_weather_obs.sample(frac=9999/10000) | |
# Write to n3 file | |
fn = 'weatherObs.n3' | |
# Reorder cols | |
df_weather_obs = df_weather_obs[['idx','station','year','month','day','property','result']] | |
with open(fn, 'w') as f: | |
f.write(prefix) | |
np.savetxt(f, df_weather_obs.values, fmt=''' | |
ex:weatherObservation-%s a sosa:Observation ; | |
sosa:hasFeatureOfInterest ex:weatherStation-%s ; | |
sosa:resultTime "%s-%s-%sT00:00:00"^^xsd:dateTime ; | |
sosa:hasProperty "%s"^^xsd:string ; | |
sosa:hasSimpleResult "%s"^^xsd:double ; | |
ex:observationType "weather"^^xsd:string . | |
''') | |
''' | |
Building observations | |
''' | |
n = time.time() | |
# 600 buildings * 1 obs/day * 365 days/year * 10 years | |
size = 600*1*365*10 | |
df_building_obs = pd.DataFrame({ | |
'building': np.arange(size) % 600, | |
'time': n - ((np.arange(size) / 600).astype(int) * 24*60*60), | |
'result': np.random.randint(0, 10, size, dtype=np.uint32) | |
}) | |
df_building_obs['time'] = df_building_obs['time'].astype('datetime64[s]') | |
df_building_obs['year'] = df_building_obs['time'].dt.year | |
df_building_obs['month'] = df_building_obs['time'].dt.month | |
df_building_obs['day'] = df_building_obs['time'].dt.day | |
df_building_obs['idx'] = df_building_obs.index | |
# 2190000 rows means every building has 1 obs per day but we only assume | |
# 50 obs per year per building. | |
df_building_obs = df_building_obs.sample(frac=50/365) | |
# Write to n3 file | |
fn = 'buildingObs.n3' | |
# Reorder cols | |
df_building_obs = df_building_obs[['idx','building','year','month','day','result']] | |
with open(fn, 'w') as f: | |
f.write(prefix) | |
np.savetxt(f, df_building_obs.values, fmt=''' | |
ex:buildingObservation-%s a sosa:Observation ; | |
sosa:hasFeatureOfInterest ex:building-%s ; | |
sosa:resultTime "%s-%s-%sT00:00:00"^^xsd:dateTime ; | |
sosa:hasSimpleResult "%s"^^xsd:double ; | |
ex:observationType "building"^^xsd:string . | |
''') | |
''' | |
Distance + FOI | |
''' | |
# fois | |
size = 300 | |
df1 = pd.DataFrame({ | |
'station': np.arange(size).astype(int), | |
'lat': np.random.uniform(-90, 90, size), | |
'lon': np.random.uniform(-180, 180, size), | |
}) | |
size = 600 | |
df2 = pd.DataFrame({ | |
'building': np.arange(size).astype(int), | |
'lat': np.random.uniform(-90, 90, size), | |
'lon': np.random.uniform(-180, 180, size), | |
}) | |
# dist | |
dist = pd.DataFrame({ | |
'building': (np.arange(600*300) % 600).astype(int), | |
'station': (np.arange(600*300) / 600).astype(int), | |
}) | |
from geopy import distance | |
dist['km'] = dist.apply(lambda x: distance.distance( | |
(df1.iloc[x.station]['lat'], df1.iloc[x.station]['lon']), | |
(df2.iloc[x.building]['lat'], df2.iloc[x.building]['lon'])).km, axis=1) | |
dist['km'] = dist['km'].astype(int) | |
dist = dist.iloc[:, [1, 2, 0, 0, 2, 1]] # reorder cols | |
# Write FOIs to file | |
fn = 'foi.n3' | |
with open(fn, 'w') as f: | |
f.write(prefix) | |
np.savetxt(f, df1.to_numpy(dtype=str), fmt=''' | |
ex:weatherStation-%s a sosa:Sensor ; | |
ex:geoType "weather"^^xsd:string ; | |
geo:asWKT "<http://www.opengis.net/def/crs/EPSG/0/4326> POINT(%s %s)"^^geo:wktLiteral . | |
''') | |
np.savetxt(f, df2.to_numpy(dtype=str), fmt=''' | |
ex:building-%s a sosa:Sensor ; | |
ex:geoType "building"^^xsd:string ; | |
geo:asWKT "<http://www.opengis.net/def/crs/EPSG/0/4326> POINT(%s %s)"^^geo:wktLiteral . | |
''') | |
# Write to distances to file | |
fn = 'dist.n3' | |
with open(fn, 'w') as f: | |
f.write(prefix) | |
np.savetxt(f, dist.to_numpy(dtype=str), fmt=''' | |
ex:weatherStation-%s ex:hasDistanceToBuilding [ | |
ex:distance "%s"^^xsd:decimal ; | |
ex:building ex:building-%s ] . | |
ex:building-%s ex:hasDistanceToStation [ | |
ex:distance "%s"^^xsd:decimal ; | |
ex:weatherStation ex:weatherStation-%s ] . | |
''') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment