Skip to content

Instantly share code, notes, and snippets.

@agrif
Created June 6, 2025 21:45
Show Gist options
  • Save agrif/416e7eba1b97682960e5e206090e5832 to your computer and use it in GitHub Desktop.
Save agrif/416e7eba1b97682960e5e206090e5832 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from collections import Counter
from dataclasses import dataclass
from functools import cached_property
import math
import sys
import numpy
import numpy.fft
THRESHOLD = 0.90
TABS = 'tabs'
SPACES = 'spaces'
MIXED = 'mixed'
@dataclass
class Judgement:
stats: Counter[str, int]
tab_hist: Counter[int, int]
space_hist: Counter[int, int]
@cached_property
def category(self):
if self.stats[TABS] == 0 and self.stats[SPACES] == 0:
if self.stats[MIXED]:
return MIXED
return None
if self.stats[TABS] >= self.stats[SPACES]:
return TABS
return SPACES
@cached_property
def certainty(self):
if self.category is None:
return 1
elif self.category == MIXED:
return 0
return self.stats[self.category] / self.stats.total()
@cached_property
def perfect(self):
if self.category is None:
return True
elif self.category == MIXED:
return False
return self.stats[self.category] == self.stats.total()
@cached_property
def mixed(self):
if self.category in [None, MIXED]:
return False
return self.certainty < THRESHOLD
@cached_property
def flag_label(self):
if self.perfect:
return '🟢'
elif self.mixed:
return '🔴'
else:
return '🟡'
@cached_property
def label(self):
if self.category is None:
return 'none'
return self.category
def _calc_indent(self, hist):
# nyquist says double it to detect a frequency of 1
# use an irrational (ish) number to avoid aliasing
# we also extend it out and periodically extend data to get more detail
spread = math.pi
duplicate = 1000
arr = numpy.zeros(int(spread * (max(hist) + 1) * duplicate))
for k, v in hist.items():
if k == 0:
continue
pk = 0
while round(spread * pk) < len(arr):
arr[int(round(spread * pk))] += v
pk += k
# discard dc term
fft = numpy.fft.rfft(arr)[1:]
freqs = numpy.fft.rfftfreq(len(arr))[1:]
fft = abs(fft)
freqs = 1 / freqs / spread
freq = freqs[numpy.argmax(fft)]
return int(round(freq))
@cached_property
def tab_indent(self):
return self._calc_indent(self.tab_hist)
@cached_property
def space_indent(self):
return self._calc_indent(self.space_hist)
@cached_property
def indent_label(self):
if self.category is None:
return ''
elif self.category == MIXED:
return f't={self.tab_indent} s={self.space_indent}'
elif self.category == TABS:
return f't={self.tab_indent}'
elif self.category == SPACES:
return f's={self.space_indent}'
def analyze(lines):
stats = Counter()
tabcount = Counter()
spacecount = Counter()
inside_comment = False
for line in lines:
lstripped = line.lstrip();
ws = line[:-len(lstripped)]
some_tabs = '\t' in ws
some_spaces = ' ' in ws
mixed = some_tabs and some_spaces
if not inside_comment and ws:
tabcount[ws.count('\t')] += 1
spacecount[ws.count(' ')] += 1
if mixed:
stats['mixed'] += 1
if some_tabs and not mixed:
stats['tabs'] += 1
if some_spaces and not mixed:
stats['spaces'] += 1
# keep track of commenthood
comment_start = line.find('/*')
comment_end = line.find('*/')
if comment_end >= 0:
inside_comment = False
if comment_start >= 0 and (comment_end < 0 or comment_start > comment_end + 1):
inside_comment = True
return Judgement(stats, tabcount, spacecount)
def main():
_, *fnames = sys.argv
for fname in fnames:
with open(fname) as f:
j = analyze(f.readlines())
print(f"{j.label:6s} {j.certainty:.02f} {j.flag_label} {j.indent_label:7s} {fname}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment