Skip to content

Instantly share code, notes, and snippets.

@Per48edjes
Last active June 21, 2020 04:21
Show Gist options
  • Save Per48edjes/f4e9a337afd4801dc453c644678f4c81 to your computer and use it in GitHub Desktop.
Save Per48edjes/f4e9a337afd4801dc453c644678f4c81 to your computer and use it in GitHub Desktop.
Dupe-check logging
import datetime
# Filename function for logging
def dt_filename(
filename: str,
extension: str,
path: str = "",
date: str = str(datetime.datetime.now()),
) -> str:
filename = "_".join([filename, date])
return "/".join([path, filename]) + extension
# Write dupes to output files suffixed by datetime
# THIS IS AN EXAMPLE
dupes_idx = [df.duplicated(keep=False),
df['client_id'].duplicated(keep=False)]
dupes_filenames = ['row_dupes',
'client_dupes']
for idx, filename in zip(dupes_idx, dupes_filenames):
if not idx.any():
print(f"No duplicates for {filename}!")
else:
df[idx].to_csv(dt_filename(filename, '.csv', 'outputs'))
print(filename)
display(df[idx])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment