Created
January 27, 2025 11:40
-
-
Save avriiil/e14baad70ab6bc999c961f88c167a7fd to your computer and use it in GitHub Desktop.
Generate butterfly data in CSV
We can make this file beautiful and searchable if this error is corrected: Illegal quoting in line 6.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This gist generates a specified number of CSV files | |
# Each CSV file contains a varying number of rows about butterflies | |
import os | |
import random | |
import pandas as pd | |
output_dir = "your/output/directory" | |
# Step 1: Generate Butterfly Species Data | |
def generate_csv_files(output_dir, num_files=25): | |
# Ensure output directory exists | |
try: | |
if not os.path.exists(output_dir): | |
os.makedirs(output_dir) | |
print(f"Output directory: {os.path.abspath(output_dir)}") | |
except Exception as e: | |
print(f"Failed to create output directory: {e}") | |
return | |
# Species details | |
species_data = [ | |
{"species": "Papilio machaon", "native_habitat": "Europe"}, | |
{"species": "Danaus plexippus", "native_habitat": "North America"}, | |
{"species": "Pieris rapae", "native_habitat": "Asia"}, | |
{"species": "Vanessa atalanta", "native_habitat": "Europe"}, | |
{"species": "Morpho menelaus", "native_habitat": "South America"}, | |
{"species": "Heliconius charithonia", "native_habitat": "Central America"}, | |
] | |
colors = ["black", "orange", "white", "blue", "yellow"] | |
# Generate CSV files | |
for file_index in range(1, num_files + 1): | |
num_rows = random.randint(1000, 10000) | |
rows = [] | |
for _ in range(num_rows): | |
butterfly = random.choice(species_data) | |
row = { | |
"species": butterfly["species"], | |
"native_habitat": butterfly["native_habitat"], | |
"age_days": random.randint(1, 111), | |
"main_color": random.choice(colors), | |
} | |
rows.append(row) | |
# Save file | |
file_path = os.path.join(output_dir, f"butterflies_{file_index}.csv") | |
try: | |
if not rows: | |
print(f"No data generated for file {file_index}. Skipping...") | |
continue | |
pd.DataFrame(rows).to_csv(file_path, index=False) | |
print(f"Generated {file_path} with {num_rows} rows.") | |
except Exception as e: | |
print(f"Failed to write {file_path}: {e}") | |
# Run the generator | |
generate_csv_files(output_dir) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment