#!/bin/bash
# is_z80 - guess if a binary is z80 or 8080
#  from frequency of two-byte Z80 instructions
# scruss, 2023-02
# this might need gnu awk. Needs bc for sure

file="$1"

if
    [ "$#" -eq 0 ]
then
    echo "usage: $0 file"
    exit 1
fi

if
    [ ! -f "$file" ]
then
    echo "usage: $0 file"
    exit 1
fi

# file size in bytes, the POSIX way. Shut it, shellcheck ...
# shellcheck disable=SC2012
size=$(ls -l "$file" | cut -d ' ' -f 5)

if
    [ "$size" -eq 0 ]
then
    heur=0
else
    t=$(mktemp) || exit
    # dump contents to one hex byte per line text
    # this avoids weird binary grep options
    # od uses lower case a-f, remember
    od -A n -t x1 -w1 -v "$file" > "$t"
    cbcount=$(awk 'p ~ /cb/ && !/3[0-7]/ {k++;} {p=$0;} END {print 0+k;}' "$t")
    ddcount=$(awk 'p ~ /dd/ && /(09|19|21|22|23|29|2a|2b|34|35|39|46|4e|56|5e|66|6e|70|71|72|73|74|75|76|77|7e|86|8e|96|9e|a6|ae|b6|be|cb|e1|e3|e5|e9|f9)/ {k++;} {p=$0;} END {print 0+k;}' "$t")
    # ed is big and ugly at 57 opcodes
    edcount=$(awk 'p ~ /ed/ && /(40|41|42|43|44|45|46|47|48|49|4a|4b|4d|4f|50|51|52|53|56|57|58|59|5a|5b|5e|5f|60|61|62|67|68|69|6a|6b|6f|72|73|78|79|7a|7b|a0|a1|a2|a3|a8|a9|aa|ab|b0|b1|b2|b3|b8|b9|ba|bb)/ {k++;} {p=$0;} END {print 0+k;}' "$t")
    # fd uses same match as dd
    fdcount=$(awk 'p ~ /fd/ && /(09|19|21|22|23|29|2a|2b|34|35|39|46|4e|56|5e|66|6e|70|71|72|73|74|75|76|77|7e|86|8e|96|9e|a6|ae|b6|be|cb|e1|e3|e5|e9|f9)/ {k++;} {p=$0;} END {print 0+k;}' "$t")
    # estimated number of valid Z80-only instructions per 10,000 bytes
    bc_calc="scale=3; ( $cbcount + $ddcount + $edcount + $fdcount ) / ( $size / 10000 )"
    heur=$(printf '%.f\n' "$(echo "$bc_calc" | bc)")
    # clean up
    rm -f "$t"
fi

# output
# echo "$file" "$size" "$cbcount" "$ddcount" "$edcount" "$fdcount"
echo "$heur" "$file"