jasonsnell · January 15, 2025 21:34
diff --git a/countduplicates.py b/countduplicates.py
 #!/usr/bin/env python3

 import collections
 import subprocess
 import re

 theFinal = ""

 def getClipboardData():
 p = subprocess.Popen(['pbpaste'], stdout=subprocess.PIPE)
 retcode = p.wait()
 data = p.stdout.read()
 return data

 def setClipboardData(data):
 p = subprocess.Popen(['pbcopy'], stdin=subprocess.PIPE)
 p.stdin.write(data)
 p.stdin.close()
 retcode = p.wait()

 cd = getClipboardData()
 results = cd.decode("utf-8")
 results = results.title()
 # this first regex normalizes line endings, thanks Excel
 results = re.sub(r'\r\n', '\n', results)
 results = re.sub(r'\([^)]+\)', '', results)
 results = re.sub(r'\n\n', '\n', results)
 results = re.sub(r'\nThe ?', '\n', results)
 results = re.sub(r'\s+\n', '\n', results)
 results = re.sub(r':', '', results)

 theResults = results.splitlines(True)

 c = collections.Counter(theResults)

 # sorted by value (number of occurences, but descending order)
 for k, v in c.most_common():
    theFinal += (f'{k[:-1]}\t{v}\n')
    
 setClipboardData(str.encode(theFinal))
	#!/usr/bin/env python3

	import collections
	import subprocess
	import re

	theFinal = ""

	def getClipboardData():
	p = subprocess.Popen(['pbpaste'], stdout=subprocess.PIPE)
	retcode = p.wait()
	data = p.stdout.read()
	return data

	def setClipboardData(data):
	p = subprocess.Popen(['pbcopy'], stdin=subprocess.PIPE)
	p.stdin.write(data)
	p.stdin.close()
	retcode = p.wait()

	cd = getClipboardData()
	results = cd.decode("utf-8")
	results = results.title()
	# this first regex normalizes line endings, thanks Excel
	results = re.sub(r'\r\n', '\n', results)
	results = re.sub(r'\([^)]+\)', '', results)
	results = re.sub(r'\n\n', '\n', results)
	results = re.sub(r'\nThe ?', '\n', results)
	results = re.sub(r'\s+\n', '\n', results)
	results = re.sub(r':', '', results)

	theResults = results.splitlines(True)

	c = collections.Counter(theResults)

	# sorted by value (number of occurences, but descending order)
	for k, v in c.most_common():
	theFinal += (f'{k[:-1]}\t{v}\n')

	setClipboardData(str.encode(theFinal))