Skip to content

Instantly share code, notes, and snippets.

@alexwilson
Created June 24, 2025 07:16
Show Gist options
  • Save alexwilson/70275842900fcea04593a24aa8dad588 to your computer and use it in GitHub Desktop.
Save alexwilson/70275842900fcea04593a24aa8dad588 to your computer and use it in GitHub Desktop.
Fix malformed CSVs
import csv
import sys
def escape_field(field):
if any(c in field for c in ['"', ',', '\n', '\r']):
field = field.replace('"', '""')
return f'"{field}"'
return field
def fix_row(fields, expected_columns):
if len(fields) == expected_columns:
return [escape_field(f) for f in fields]
elif len(fields) > expected_columns:
# assume last column is valid; merge the rest
merged = ','.join(fields[:-1])
return [escape_field(merged), escape_field(fields[-1])]
else:
# too few fields — pad with empty strings
return [escape_field(f) for f in fields] + [''] * (expected_columns - len(fields))
def main():
input_path = sys.argv[1]
output_path = sys.argv[2] if len(sys.argv) > 2 else None
with open(input_path, newline='', encoding='utf-8') as infile:
lines = infile.readlines()
with (open(output_path, 'w', newline='', encoding='utf-8') if output_path else sys.stdout) as outfile:
writer = csv.writer(outfile)
header = lines[0].strip().split(',')
expected_columns = len(header)
writer.writerow(header)
for line in lines[1:]:
if not line.strip():
continue
fields = [f.strip() for f in line.strip().split(',')]
fixed = fix_row(fields, expected_columns)
writer.writerow(fixed)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment