Skip to content

Instantly share code, notes, and snippets.

@oskarryn
Created May 22, 2021 15:39
Show Gist options
  • Save oskarryn/3b3cb145b4027ec11711e6f23c9e1429 to your computer and use it in GitHub Desktop.
Save oskarryn/3b3cb145b4027ec11711e6f23c9e1429 to your computer and use it in GitHub Desktop.
from scipy.stats import skewnorm
import numpy as np
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
def generate_cycle_randomly(unit_id, cycle, model_variant, label):
temp = 50+skewnorm.rvs(-8, size=1).item() + np.random.normal(0, 5)
pressure = np.random.uniform(900,1200) + np.random.normal(0, 50)
return (unit_id, cycle, model_variant, round(temp, 2), round(pressure, 2), label)
def generate_cycles(unit_id, model_variant, init_rul):
res = []
rul = init_rul
for cycle, vals in enumerate(range(rul)):
res.append(generate_cycle_randomly(unit_id=unit_id, cycle=cycle, model_variant=model_variant, label=rul))
rul -= 1
return res
data = generate_cycles(unit_id=0, model_variant='A', init_rul=45) + generate_cycles(unit_id=1, model_variant='B', init_rul=40)
df = spark.createDataFrame(data, schema=["unit_id", "cycle", "model_variant", "temp", "pressure", "label"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment