Skip to content

Instantly share code, notes, and snippets.

@yeiichi
Created July 26, 2025 12:50
Show Gist options
  • Select an option

  • Save yeiichi/84b7eb95b4eabdf143741c3be78a3a74 to your computer and use it in GitHub Desktop.

Select an option

Save yeiichi/84b7eb95b4eabdf143741c3be78a3a74 to your computer and use it in GitHub Desktop.
Normalize missing-like values to Python None for PostgreSQL or general use
#!/usr/bin/env python3
import pandas as pd
import numpy as np
def normalize_missing(
df: pd.DataFrame,
targets=None,
columns=None,
output_dtype="object"
) -> pd.DataFrame:
"""
Normalize missing-like values to Python None for PostgreSQL or general use.
Parameters:
df : pd.DataFrame
The input DataFrame.
targets : dict or list or None
Values to replace with None.
If None, uses common placeholders (e.g., '', 'N/A', np.nan).
columns : list or None
If given, only apply to these columns.
output_dtype : str or None
If 'object', ensures all columns become object dtype for safe None insertion.
Returns:
pd.DataFrame
A new DataFrame with normalized missing values.
"""
# Default missing-like values
if targets is None:
targets = {
'': None,
' ': None,
'N/A': None,
'NA': None,
'n/a': None,
'na': None,
'--': None,
'null': None,
'NULL': None,
np.nan: None
}
elif isinstance(targets, list):
targets = {k: None for k in targets}
# Work on copy
df_out = df.copy()
# Apply replacement
if columns:
df_out[columns] = df_out[columns].replace(targets)
else:
df_out = df_out.replace(targets)
# Force object dtype if needed
if output_dtype == "object":
df_out = df_out.astype(object)
return df_out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment