Last active
June 21, 2020 04:21
-
-
Save Per48edjes/f4e9a337afd4801dc453c644678f4c81 to your computer and use it in GitHub Desktop.
Dupe-check logging
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
# Filename function for logging | |
def dt_filename( | |
filename: str, | |
extension: str, | |
path: str = "", | |
date: str = str(datetime.datetime.now()), | |
) -> str: | |
filename = "_".join([filename, date]) | |
return "/".join([path, filename]) + extension | |
# Write dupes to output files suffixed by datetime | |
# THIS IS AN EXAMPLE | |
dupes_idx = [df.duplicated(keep=False), | |
df['client_id'].duplicated(keep=False)] | |
dupes_filenames = ['row_dupes', | |
'client_dupes'] | |
for idx, filename in zip(dupes_idx, dupes_filenames): | |
if not idx.any(): | |
print(f"No duplicates for {filename}!") | |
else: | |
df[idx].to_csv(dt_filename(filename, '.csv', 'outputs')) | |
print(filename) | |
display(df[idx]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment