Skip to content

Instantly share code, notes, and snippets.

@jasonsnell
Created January 15, 2025 21:34
Show Gist options
  • Save jasonsnell/19b3796f3144bacdf00cc34df7c0a214 to your computer and use it in GitHub Desktop.
Save jasonsnell/19b3796f3144bacdf00cc34df7c0a214 to your computer and use it in GitHub Desktop.
Count duplicates (2023 version)
#!/usr/bin/env python3
import collections
import subprocess
import re
theFinal = ""
def getClipboardData():
p = subprocess.Popen(['pbpaste'], stdout=subprocess.PIPE)
retcode = p.wait()
data = p.stdout.read()
return data
def setClipboardData(data):
p = subprocess.Popen(['pbcopy'], stdin=subprocess.PIPE)
p.stdin.write(data)
p.stdin.close()
retcode = p.wait()
cd = getClipboardData()
results = cd.decode("utf-8")
results = results.title()
# this first regex normalizes line endings, thanks Excel
results = re.sub(r'\r\n', '\n', results)
results = re.sub(r'\([^)]+\)', '', results)
results = re.sub(r'\n\n', '\n', results)
results = re.sub(r'\nThe ?', '\n', results)
results = re.sub(r'\s+\n', '\n', results)
results = re.sub(r':', '', results)
theResults = results.splitlines(True)
c = collections.Counter(theResults)
# sorted by value (number of occurences, but descending order)
for k, v in c.most_common():
theFinal += (f'{k[:-1]}\t{v}\n')
setClipboardData(str.encode(theFinal))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment