Skip to content

Instantly share code, notes, and snippets.

@fsndzomga
Created February 28, 2024 19:28
Show Gist options
  • Save fsndzomga/7929d4e88b14a09da5dbd1fa5c2144af to your computer and use it in GitHub Desktop.
Save fsndzomga/7929d4e88b14a09da5dbd1fa5c2144af to your computer and use it in GitHub Desktop.
import streamlit as st
import pandas as pd
from data_generation import generate_dataset, generate_parts_of_dataset
# Sidebar with company logo and use case navigation
# Use Markdown to increase font size for the title
st.sidebar.markdown("""
<style>
.big-font {
font-size:20px !important;
}
</style>
""", unsafe_allow_html=True)
st.sidebar.markdown('<p class="big-font">Select a Use Case</p>',
unsafe_allow_html=True)
option = st.sidebar.selectbox('Select a Use Case',
('Synthetic Data Generation',
'Targeted Data Generation'),
label_visibility="hidden")
# Synthetic Data Generation UI
if option == 'Synthetic Data Generation':
st.header("Synthetic Data Generation")
columns = st.text_input("Enter column names separated by comma")
rows = st.number_input("Enter the number of rows", min_value=1, value=10)
if st.button("Generate"):
# Directly call generate_dataset function
if columns: # Ensure columns input is not empty
columns_list = [column.strip() for column in columns.split(",")]
df = generate_dataset(columns_list, rows)
st.dataframe(df)
else:
st.error("Please enter at least one column name.")
# Targeted Data Generation UI
elif option == 'Targeted Data Generation':
st.header("Targeted Data Generation")
uploaded_file = st.file_uploader("Upload a file", type=['csv'])
if uploaded_file is not None:
# Determine the file format and read the file accordingly
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
elif uploaded_file.name.endswith('.xlsx'):
df = pd.read_excel(uploaded_file)
st.write("Old dataset:")
st.dataframe(df)
if st.button("Fill Data"):
# Implementation for data filling
output_df = generate_parts_of_dataset(df)
st.write("Data filled successfully")
st.write("New dataset:")
st.dataframe(output_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment