Skip to content

Instantly share code, notes, and snippets.

@topisani
Forked from serif/bwclean2.py
Last active May 8, 2025 17:29
Show Gist options
  • Save topisani/066b63b87346afe76ffdf0998d4ebc2f to your computer and use it in GitHub Desktop.
Save topisani/066b63b87346afe76ffdf0998d4ebc2f to your computer and use it in GitHub Desktop.
Bitwarden Duplicate Entry Remover v2
#!/usr/bin/env python3
import sys
import csv
import hashlib
import json
from urllib.parse import urlparse
from datetime import datetime
def main(argv):
if len(argv) < 1:
sys.exit('Supply input file path as command argument')
in_path = argv[0]
json_out = '_out.json'
json_rem = '_rem.json'
out_path = in_path.rsplit('.json', 1)[0] + json_out
rem_path = in_path.rsplit('.json', 1)[0] + json_rem
hash_items = dict()
rem_items = list()
keep_items = list()
in_data = dict()
# Process file
with open(in_path, 'r', newline='', encoding='utf8') as in_file:
in_data = json.load(in_file)
items = in_data['items']
for item in items:
login = item.get('login', {})
uris = login.get('uris', [])
if not uris:
keep_items.append(item)
continue
username = str(login.get('username'))
password = str(login.get('password'))
for uri in uris:
domain = urlparse(uri.get('uri', '')).netloc
token = domain + username + password
hash = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest()
old = hash_items.get(hash, None)
if old is None:
hash_items[hash] = item
continue
if datetime.fromisoformat(old.get('revisionDate')) < datetime.fromisoformat(item.get('revisionDate')):
hash_items[hash] = item
rem_items.append(old)
else:
hash_items[hash] = old
rem_items.append(item)
print(f"found duplicate entry for uri='{domain}', username='{username}'")
out_data = in_data | { "items": list(hash_items.values()) + keep_items }
rem_data = in_data | { "items": rem_items }
with open(out_path, 'wt', newline='', encoding='utf8') as out_file, \
open(rem_path, 'wt', newline='', encoding='utf8') as rem_file:
json.dump(out_data, out_file, indent=4)
json.dump(rem_data, rem_file, indent=4)
# Report
print(f'\n{len(items)} total entries')
print(f'\nOutput file: {out_path}\n{len(hash_items)} unique entries deduplicated')
print(f'{len(keep_items)} items kept')
print(f'\nDuplicates saved to {rem_path}\n{len(rem_items)} entries removed')
if __name__ == "__main__":
main(sys.argv[1:])
@brianjmurrell
Copy link

Does this merge/add the domains from the removed duplicates to the remaining entry? Does it merge other fields like notes, etc.?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment