Skip to content

Instantly share code, notes, and snippets.

@pmacMaps
Created February 11, 2025 15:09
Show Gist options
  • Save pmacMaps/2c54d8e28552ed3097279981bbfea2b4 to your computer and use it in GitHub Desktop.
Save pmacMaps/2c54d8e28552ed3097279981bbfea2b4 to your computer and use it in GitHub Desktop.
Randomly remove records from a CSV file
# the majority of the script was created by ChatGPT responding to the prompt "is there a python function that can randomly remove records from a CSV file?"
import csv
import random
import sys
def remove_records(input_file, output_file, remove_fraction=0.1):
"""
Randomly removes a fraction of records from a CSV file.
:param input_file: Path to the input CSV file
:param output_file: Path to the output CSV file after removal
:param remove_fraction: Fraction of records to remove (default is 10%)
"""
try:
with open(input_file, 'r', newline='', encoding='utf-8') as infile:
reader = list(csv.reader(infile))
if len(reader) <= 1:
print("Not enough records to process.")
return
header, rows = reader[0], reader[1:] # Separate header from data
num_to_remove = int(len(rows) * remove_fraction)
rows_to_keep = random.sample(rows, len(rows) - num_to_remove) # Random selection of remaining rows
with open(output_file, 'w', newline='', encoding='utf-8') as outfile:
writer = csv.writer(outfile)
writer.writerow(header) # Write header back
writer.writerows(rows_to_keep) # Write remaining rows
print(f"Removed {num_to_remove} records. Output saved to {output_file}")
except (Exception, EnvironmentError) as e:
tbE = sys.exc_info()[2]
# Write the line number the error occured to the log file
# update to use code to get name of file
print(f'error at Line {tbE.tb_lineno}')
# Write the error print( to the log file
print(f'error: {e}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment