Skip to content

Instantly share code, notes, and snippets.

@ashemag
Created November 17, 2021 23:06
Show Gist options
  • Save ashemag/6c05d9d91571eac735f2cb58c04ed6cb to your computer and use it in GitHub Desktop.
Save ashemag/6c05d9d91571eac735f2cb58c04ed6cb to your computer and use it in GitHub Desktop.
import numpy as np
import tensorflow as tf
import os
import argparse
from build.gcp_utils import download_from_gcp, write_to_gcp
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
def process_data(filename):
print("Downloading from gcp...", flush=True)
df = download_from_gcp(folder="data", filename=filename)
# processing steps here!
return df
def process_features(df):
# process features here!
return df
def data_handler(args):
print(args.experiment_name + "\n" + args.data_filename, flush=True)
print("Processing data...", flush=True)
df = process_data(args.data_filename)
print("Processing features...", flush=True)
df = process_features(df)
print("Writing to gcp...", flush=True)
write_to_gcp(df, folder=args.experiment_name, filename="data", csv=False)
print("Finished writing to gcp...", flush=True)
result_path = f"my_result_path/data.parquet"
print("Writing result path, ", result_path, flush=True)
with open("/result_path.txt", "w") as f:
f.write(result_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--data_filename", type=str)
parser.add_argument("--experiment-name", type=str)
args = parser.parse_args()
print("Args here", args, flush=True)
data_handler(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment