Skip to content

Instantly share code, notes, and snippets.

@avriiil
Created January 27, 2025 11:40
Show Gist options
  • Save avriiil/e14baad70ab6bc999c961f88c167a7fd to your computer and use it in GitHub Desktop.
Save avriiil/e14baad70ab6bc999c961f88c167a7fd to your computer and use it in GitHub Desktop.
Generate butterfly data in CSV
We can make this file beautiful and searchable if this error is corrected: Illegal quoting in line 6.
# This gist generates a specified number of CSV files
# Each CSV file contains a varying number of rows about butterflies
import os
import random
import pandas as pd
output_dir = "your/output/directory"
# Step 1: Generate Butterfly Species Data
def generate_csv_files(output_dir, num_files=25):
# Ensure output directory exists
try:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print(f"Output directory: {os.path.abspath(output_dir)}")
except Exception as e:
print(f"Failed to create output directory: {e}")
return
# Species details
species_data = [
{"species": "Papilio machaon", "native_habitat": "Europe"},
{"species": "Danaus plexippus", "native_habitat": "North America"},
{"species": "Pieris rapae", "native_habitat": "Asia"},
{"species": "Vanessa atalanta", "native_habitat": "Europe"},
{"species": "Morpho menelaus", "native_habitat": "South America"},
{"species": "Heliconius charithonia", "native_habitat": "Central America"},
]
colors = ["black", "orange", "white", "blue", "yellow"]
# Generate CSV files
for file_index in range(1, num_files + 1):
num_rows = random.randint(1000, 10000)
rows = []
for _ in range(num_rows):
butterfly = random.choice(species_data)
row = {
"species": butterfly["species"],
"native_habitat": butterfly["native_habitat"],
"age_days": random.randint(1, 111),
"main_color": random.choice(colors),
}
rows.append(row)
# Save file
file_path = os.path.join(output_dir, f"butterflies_{file_index}.csv")
try:
if not rows:
print(f"No data generated for file {file_index}. Skipping...")
continue
pd.DataFrame(rows).to_csv(file_path, index=False)
print(f"Generated {file_path} with {num_rows} rows.")
except Exception as e:
print(f"Failed to write {file_path}: {e}")
# Run the generator
generate_csv_files(output_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment