Forked from jwmcgettigan/bitwarden_duplicate_cleaner.py
Last active
September 24, 2023 14:36
-
-
Save ewa/f5e115628b955bf8cd1e0540116b135a to your computer and use it in GitHub Desktop.
Use the BitWarden command-line client to purge duplicate entries from your vault. NOTE: this is my one-time-use variant, and the original has been improved significantly since I forked it, so you should probably look there.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# This script will pull all of your vault 'items' using the 'bw list items' command and then it will compare | |
# all properties that are not inherently unique from the returned JSON to determine if they are duplicates. | |
# Note: It removes older duplicates first - the newest copy of the 'item' will be the only one to remain. | |
# You can simply flip the '>' sign to '<' if you want to preserve the oldest 'item' instead. | |
# | |
# Setup Steps | |
# 1. You must install Bitwarden CLI first: https://bitwarden.com/help/cli/#download-and-install | |
# 2. Login to the CLI with the 'bw login' command. You need your session key setup before continuing: https://bitwarden.com/help/cli/#using-a-session-key | |
# 3. Make sure to backup your 'items'. You can use the 'bw export' command to do so: https://bitwarden.com/help/cli/#export | |
# 4. Run this python script and your duplicate 'items' will start being deleted. https://bitwarden.com/help/cli/#delete | |
# Note: I am NOT using the '--permanent' flag. This means you can restore anything this script deletes within 30 days. | |
# Note2: The deletion process is pretty slow (1-2/items per second) so you'll likely need to let it run for a while. | |
# Revision history: | |
# March 28, 2023: Justin McGettigan: shared gist on github as https://gist.github.com/jwmcgettigan/0bf7cd39947764896735997056ca74d7 -- no specific license given | |
# Sept. 22, 2023: Eric Anderson: Working on extended fork at https://gist.github.com/ewa/f5e115628b955bf8cd1e0540116b135a | |
import sys | |
import os | |
import json | |
import hashlib | |
import subprocess | |
import argparse | |
## | |
## Eric Anderson extensions: For safety, don't actually *do* anything unless specifically told to | |
## | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--really', '-Y', action='store_true', help='Go ahead and make potentially-destructive changes to bitwarden vault') | |
parser.add_argument('--session', '-s', action='store', type=str, metavar='KEY', help='Use session key KEY. Note that there are security implications to providing the key on the command line, so think about it. Setting the BW_SESSION environment variable is preferred.') | |
parser.add_argument('--interactive', '-i', action='store_true', help='Log in interactively (if needed)') | |
args = parser.parse_args() | |
## Sanity check | |
if args.session is not None and args.interactive == True: | |
parser.error("Using --session (-s) and --interactive(-i) together makes no sense.") | |
## Do this as a closure so we don't have to pass around (or accidentally change) the 'confirmed' variable | |
def make_delete(confirmed): | |
""" Returns a delete function that's only "active" if the 'confirmed' parameter to make_delete is True """ | |
assert isinstance(confirmed, bool) | |
def do_it(*args): | |
del_args = list(args) | |
sub_run_args = ['bw', 'delete'] + del_args | |
if confirmed: | |
subprocess.run(sub_run_args) | |
else: | |
print(f"** would have done: subprocess.run({sub_run_args})") | |
return do_it | |
bw_delete = make_delete(args.really) | |
## | |
## Check for session key, in the following order of precendence: | |
## BW_SESSION variable, --session command-line argument, interactive login | |
## | |
## Interactive login is brittle -- it's ungraceful (but safe) if you | |
## fail to log in, if you're already logged, or really anything else | |
## weird. | |
## | |
if 'BW_SESSION' in os.environ: | |
print("Using session key from BW_SESSION env var", file=sys.stderr) | |
if (args.session is not None) or (args.interactive == True): | |
print("WARNING, you specified --session and/or --interactive, but BW_SESSION supercedes these", file=sys.stderr) | |
elif args.session is not None: | |
os.environ['BW_SESSION'] = args.session | |
print("Using session key from command-line argument", file=sys.stderr) | |
elif args.interactive == True: | |
cmdline=['bw', 'login', '--raw'] | |
max_stdout_size=4096 # I made this up, but the session key is way shorter than this. | |
print(f"starting '{' '.join(cmdline)}'. Only the session key will be exposed to this script", file=sys.stderr) | |
with subprocess.Popen(cmdline, bufsize=max_stdout_size, stdin=None, stderr=None, | |
stdout=subprocess.PIPE, # This ONLY works because bw uses stderr for interaction but writes the result to stdout | |
) as proc: | |
proc.wait() | |
if proc.returncode != 0: | |
raise ValueError(f"bw login returned {proc.returncode} meaning something went wrong. Possibly you are already logged in, but did not provide the session key? If so, either provide it in BW_SESSION or via --session, or log out.") | |
output=proc.stdout.read() | |
if not (isinstance(output, bytes) and len(output)>0): | |
raise ValueError(f"output of {' '.join(cmdline)} does not make sense: {repr(output)}", output, proc) | |
os.environ['BW_SESSION']=output.decode() | |
item_dict = {} | |
# Get the JSON data for each item in the vault | |
cmd_results = subprocess.run(['bw', 'list', 'items'], capture_output=True) | |
if cmd_results.returncode !=0: | |
if cmd_results.returncode==1 and cmd_results.stderr==b'You are not logged in.': | |
print("BitWarden says you are not logged in.", file=sys.stderr) | |
print("Log in using 'bw login' and/or 'bw unlock' and export the session key as BW_SESSION", file=sys.stderr) | |
print("See https://bitwarden.com/help/cli/#using-a-session-key", file=sys.stderr) | |
else: | |
print(f"bw list items gave an error return code ({cmd_results.returncode}) and an error message this script did not recognize:", file=sys.stderr) | |
print(f"\"{cmd_results.stderr.decode()}\"", file=sys.stderr) | |
print("You'll have to debug this yourself",file=sys.stderr) | |
sys.exit(cmd_results.returncode) | |
output=cmd_results.stdout | |
items = json.loads(output) | |
print(repr(args)) | |
for item in items: | |
# Remove unique fields from the item data | |
item_data = item.copy() | |
del item_data['id'] | |
del item_data['folderId'] | |
del item_data['revisionDate'] | |
del item_data['creationDate'] | |
del item_data['deletedDate'] | |
# Calculate a hash of the item data | |
item_hash = hashlib.sha256(str(item_data).encode('utf-8')).hexdigest() | |
# Check if we've seen this item before | |
if item_hash in item_dict: | |
# Compare the revisionDate to see which item is newer | |
if item['revisionDate'] > item_dict[item_hash]['revisionDate']: | |
print(f'Duplicate item found: {item["name"]}') | |
bw_delete('item', item_dict[item_hash]['id']) | |
print(f'Deleted older item "{item_dict[item_hash]["name"]}".') | |
item_dict[item_hash] = item | |
else: | |
print(f'Duplicate item found: {item["name"]}') | |
bw_delete('item', item['id']) | |
print(f'Deleted older item "{item["name"]}".') | |
else: | |
item_dict[item_hash] = item |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is not the script you're looking for.
This is a fork of an older version of
jwmcgettigan / bitwarden_duplicate_cleaner.py. The original author, @jwmcgettigan, extended and refactored his version to the point where merging my changes back in is non-trivial, so I haven't tried, but the "real" version is almost certainly better for your purposes.