Created
July 24, 2024 11:35
-
-
Save camilosampedro/7b64155d23d5f6ac8799fa9b0f3e7c28 to your computer and use it in GitHub Desktop.
Bitwarden Json export cleanup script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from datetime import datetime | |
input_file = 'bitwarden_export_original.json' | |
output_file = 'output.json' | |
# These columns are ignored when comparing if two entries are the same. | |
# - Ids are unique, so always different | |
# - revisionDate and creationDate tend to be slighly apart between entries | |
columns_to_ignore = ["id", "revisionDate", "creationDate"] | |
# Function to remove duplicate entries in Bitwarden Json export file | |
def remove_duplicates(json_obj): | |
unique_items = [] | |
# Record of visited elements. | |
seen = {} | |
for item in json_obj["items"]: | |
item_str = str({k: v for k, v in item.items() if k not in columns_to_ignore}) | |
if item_str not in seen: | |
# First time we see this entry | |
seen[item_str] = item | |
else: | |
# Now we deduplicate. We take whichever has a later revisionDate. | |
# At this point entries are identical so this might not be needed, but it may be good to just preserve the later date for the record. | |
existing_item = seen[item_str] | |
existing_date = datetime.fromisoformat(existing_item["revisionDate"].replace("Z", "+00:00")) | |
new_date = datetime.fromisoformat(item["revisionDate"].replace("Z", "+00:00")) | |
if new_date > existing_date: | |
seen[item_str] = item | |
unique_items = list(seen.values()) | |
discarded_count = len(json_obj["items"]) - len(unique_items) | |
json_obj["items"] = unique_items | |
print(f"Discarded {discarded_count} duplicate items. Total items: {len(unique_items)}") | |
return json_obj | |
with open(input_file) as f: | |
json_obj = json.load(f) | |
json_obj = remove_duplicates(json_obj) | |
with open(output_file, 'w') as f: | |
json.dump(json_obj, f, indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment