Last active
May 13, 2022 23:19
-
-
Save pyllyukko/37dff6068d943e54050e2dc7749d24e5 to your computer and use it in GitHub Desktop.
bits.awk
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/gawk -f | |
# https://www.gnu.org/software/gawk/manual/gawk.html#Nondecimal_002dnumbers | |
# | |
# https://www.gnu.org/software/gawk/manual/gawk.html#Ordinal-Functions | |
# | |
# TODO: | |
# - truncate static bytes | |
# - padding | |
function _ord_init( low, high, i, t) | |
{ | |
low = sprintf("%c", 7) # BEL is ascii 7 | |
if (low == "\a") { # regular ascii | |
low = 0 | |
high = 127 | |
} else if (sprintf("%c", 128 + 7) == "\a") { | |
# ascii, mark parity | |
low = 128 | |
high = 255 | |
} else { # ebcdic(!) | |
low = 0 | |
high = 255 | |
} | |
for (i = low; i <= high; i++) { | |
t = sprintf("%c", i) | |
_ord_[t] = i | |
} | |
} function ord(str, c) { | |
# only first character is of interest | |
c = substr(str, 1, 1) | |
return _ord_[c] | |
} function chr(c) { | |
# force c to be numeric by adding 0 | |
return sprintf("%c", c + 0) | |
} function byte_to_bits(byte) { | |
# this function creates an array bits[] from 1 byte | |
bitpos=0 | |
for(n=128; n>=1; n=n/2) { | |
#if(byte & n) | |
bits[bitpos++] = and(byte, n) ? "1" : "0" | |
#printf "%d: %d\n", n, and(byte, n) ? "1" : "0" | |
#else | |
#printf "%d: 0\n", n | |
#total+=n | |
} | |
#for(bitpos=0; bitpos<=7; bitpos++) | |
#printf "%d", bits[bitpos] | |
#printf "\ntotal: %d\n", total | |
#printf "\n" | |
} BEGIN { | |
_ord_init() | |
longest=0 | |
printf "processing tokens" | |
} { | |
# start string from bit 0 | |
bit_count=0 | |
# store the length of the longest string | |
if(length($0)>longest) | |
longest = length($0) | |
# read the tokens into an array | |
tokens[NR-1]=$0 | |
# process every byte in the line... | |
for(j=1; j<=length($0); j++) { | |
char = substr($0, j, 1) | |
#printf "%d: %d: %c\n", NR, j, char | |
byte_to_bits(ord(char)) | |
for(i=0; i<=7; i++) { | |
#printf "%d", bits[i] | |
# i want an array of bit values, so we can truncate unused bits... | |
if(bits[i]==0) | |
zero_bits_by_position[bit_count + i]++ | |
} | |
bit_count+=8 | |
#printf "\n" | |
} | |
if(NR%1000==0) | |
printf "." | |
#printf "total bits: %d\n\n", bit_count | |
#printf "DEBUG: %d\n", length(zero_bits_by_position) | |
#printf "DEBUG: NR=%d\n", NR | |
# print current zero bit count | |
#for(i=0; i<length(zero_bits_by_position); i++) { | |
# printf "%d", zero_bits_by_position[i] | |
# # if it matches NR... that bit was never used. | |
# if(i%8==7) | |
# printf " " | |
#} | |
#printf "\n" | |
}END{ | |
printf "\n" | |
x = 0 | |
y = NR | |
#printf "DEBUG: %d\n", bits_by_position[longest+1] | |
# count the real x (with zero bits truncated) | |
for(i=0; i<longest*8; i++){ | |
#printf "%d: %d\n", i, zero_bits_by_position[i] | |
if(zero_bits_by_position[i]!=y) | |
x++ | |
} | |
# some info | |
printf "x: %d (truncating %d bits)\n", x, longest*8-x | |
printf "y: %d\n", y | |
printf "creating image" | |
# create PBM header | |
# | |
# https://en.wikipedia.org/wiki/Portable_anymap#PBM_example | |
printf "P1\n" >"bits.pbm" | |
printf "%d %d\n", x, y >"bits.pbm" | |
# for every token | |
for(i=0; i<y; i++){ | |
bit_count=0 | |
# for every char | |
for(j=1; j<=length($0); j++) { | |
# get one char (byte) from the string | |
char = substr(tokens[i], j, 1) | |
byte_to_bits(ord(char)) | |
# for every bit | |
for(k=0; k<=7; k++) { | |
# truncate unused bits | |
if(zero_bits_by_position[bit_count + k]!=y){ | |
printf "%d ", bits[k] >"bits.pbm" | |
} | |
} | |
bit_count+=8 | |
} | |
printf "\n" >"bits.pbm" | |
if(i%1000==0) | |
printf "." | |
} | |
printf "\n" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment