Skip to content

Instantly share code, notes, and snippets.

@kbridge
Created May 27, 2022 09:49
Show Gist options
  • Save kbridge/157f164de01d30f33ab45950cc418e05 to your computer and use it in GitHub Desktop.
Save kbridge/157f164de01d30f33ab45950cc418e05 to your computer and use it in GitHub Desktop.
A line-oriented non-ascii byte finder, with unnecessary elegance
import sys
from typing import Iterator, Optional
ascii_range = range(32, 128)
def is_non_ascii(b):
return b not in ascii_range
def non_ascii_iter(bs):
return (b for b in bs if is_non_ascii(b))
def first_non_ascii_byte(bs):
return next(non_ascii_iter(bs), None)
def contains_non_ascii(bs):
return first_non_ascii_byte(bs) is not None
if __name__ == '__main__':
if len(sys.argv) != 2:
program = sys.argv[0]
print(f'usage: python {program} PATH')
sys.exit(1)
path = sys.argv[1]
with open(path, mode='rb') as f:
for (line_index, line) in enumerate(f):
line_without_lf = line.rstrip(b'\n')
if contains_non_ascii(line_without_lf):
line_number = line_index + 1
line_representaion = repr(line_without_lf)
line_representaion_without_quotes = line_representaion[2:-1]
print(f'{path}:{line_number}: {line_representaion_without_quotes}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment