Created
February 11, 2025 15:09
-
-
Save pmacMaps/2c54d8e28552ed3097279981bbfea2b4 to your computer and use it in GitHub Desktop.
Randomly remove records from a CSV file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# the majority of the script was created by ChatGPT responding to the prompt "is there a python function that can randomly remove records from a CSV file?" | |
import csv | |
import random | |
import sys | |
def remove_records(input_file, output_file, remove_fraction=0.1): | |
""" | |
Randomly removes a fraction of records from a CSV file. | |
:param input_file: Path to the input CSV file | |
:param output_file: Path to the output CSV file after removal | |
:param remove_fraction: Fraction of records to remove (default is 10%) | |
""" | |
try: | |
with open(input_file, 'r', newline='', encoding='utf-8') as infile: | |
reader = list(csv.reader(infile)) | |
if len(reader) <= 1: | |
print("Not enough records to process.") | |
return | |
header, rows = reader[0], reader[1:] # Separate header from data | |
num_to_remove = int(len(rows) * remove_fraction) | |
rows_to_keep = random.sample(rows, len(rows) - num_to_remove) # Random selection of remaining rows | |
with open(output_file, 'w', newline='', encoding='utf-8') as outfile: | |
writer = csv.writer(outfile) | |
writer.writerow(header) # Write header back | |
writer.writerows(rows_to_keep) # Write remaining rows | |
print(f"Removed {num_to_remove} records. Output saved to {output_file}") | |
except (Exception, EnvironmentError) as e: | |
tbE = sys.exc_info()[2] | |
# Write the line number the error occured to the log file | |
# update to use code to get name of file | |
print(f'error at Line {tbE.tb_lineno}') | |
# Write the error print( to the log file | |
print(f'error: {e}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment