Last active
August 16, 2024 12:27
-
-
Save serif/a1281c676cf5a1f77af6ff1a25255a85 to your computer and use it in GitHub Desktop.
Bitwarden Duplicate Entry Remover v2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# updated 2023-11-27 | |
# updated 2023-10-12 | |
# updated 2021 | |
# updated 2020 | |
# created 2018 | |
import sys | |
import hashlib | |
from urllib.parse import urlparse | |
def main(argv): | |
# Fields in Bitwarden CSV | |
f = 'folder,favorite,type,name,notes,fields,reprompt,login_uri,login_username,login_password,login_totp'.split(',') | |
if len(argv) < 1: | |
sys.exit('Supply input file path as command argument') | |
in_path = argv[0] | |
csv = '.csv' | |
csv_out = '_out' + csv | |
csv_rem = '_rem' + csv | |
out_path = in_path.replace(csv, csv_out) | |
rem_path = in_path.replace(csv, csv_rem) | |
completed_lines_hash = set() | |
line_number = -1 | |
write_count = 0 | |
cache = '' | |
# Process file | |
with open(out_path, 'w', encoding='utf8') as out_file, \ | |
open(rem_path, 'w', encoding='utf8') as rem_file, \ | |
open(in_path, 'r', encoding='utf8') as in_file: | |
for line in in_file: | |
line_number += 1 | |
# Validate .csv format | |
if line_number == 0 and not line.strip() == ','.join(f): | |
print('\nBitwarden CSV format has changed.') | |
print('Contact author for update.') | |
exit(1) | |
# Skip empty lines | |
if not line.strip(): | |
continue | |
fields = line.split(',') | |
# If the line has fewer fields than expected, | |
# try to combine with the previous line | |
if len(fields) < len(f): | |
# Add previous line if short | |
line = cache.strip('\n') + line | |
cache = line | |
fields = line.split(',') | |
if len(fields) == len(f): | |
print(f'Recovered with line {line_number}:\n{line}') | |
cache = '' | |
else: | |
print(f'Missing fields in line {line_number}:\n{line}') | |
rem_file.write(line) | |
continue | |
else: | |
cache = '' | |
# Generate an MD5 hash based on login URI, username, and password | |
if line_number != 0: | |
domain = urlparse(fields[f.index('login_uri')]).netloc | |
if len(domain) > 0: | |
fields[f.index('login_uri')] = domain | |
token = fields[f.index('login_uri')] | |
token += fields[f.index('login_username')] | |
token += fields[f.index('login_password')] | |
hashValue = hashlib.md5(token.rstrip().encode('utf-8')).hexdigest() | |
# Write entry | |
if hashValue not in completed_lines_hash: | |
out_file.write(line) | |
completed_lines_hash.add(hashValue) | |
write_count += 1 | |
else: | |
rem_file.write(line) | |
# print(f'Duplicate on line {line_number}:\n{line}') | |
# Report | |
dup_count = line_number - write_count | |
print(f'\nOutput file: {out_path}\n{write_count} unique entries saved') | |
print(f'\n{dup_count} duplicates saved to {rem_path}') | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
To anyone new looking for a script that can, in addition to removing duplicates, also helps you find and get rid of any old copies of passwords to websites (where you have the same username but old and new passwords) this script that i wrote can help
This uses the bitwarden json export, preserving more data, including properly preserving notes. It also keeps the newest entry if multiple exist. Consider merging this with @howird's logic above for the best combination
https://gist.github.com/topisani/066b63b87346afe76ffdf0998d4ebc2f
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have managed to import a file with multi-line entries using some modifications (full code below). It also works for me when there are curly braces inside password fields. The only issue is multiple deleted "lines" are created in the _rem file for each iteration going through the multi-line portion.