Last active
January 23, 2024 08:33
-
-
Save regstuff/4c355550fcbd7f81fd9fcde206af8c39 to your computer and use it in GitHub Desktop.
Convert whisper.cpp colored terminal out into a html colored file, for transcript confidence scores
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Use with a command such as: (./main -m models/ggml-medium.bin -p 1 -t 8 -pc -f input.wav | ./ansi2html.sh > whisper.html && ./match_colors.sh whisper.html) | |
# The open and close parantheses in the command are important. | |
# Assumes ansi2html.sh is in the same folder. ansi2html.sh is available here: https://github.com/pixelb/scripts/blob/master/scripts/ansi2html.sh & here: https://gist.github.com/regstuff/a9cb16df25c74d10608a6bff3a3df95d | |
# Will generate an ouput called whisper.html. That can be parsed and used for final transcript. All low-confidence i.e. red or orange printed colors are converted to a span class named red, which can be used for further css downstream | |
# This script assumes bc (Basic Calculator) is installed and available in your UNIX environment. It's a common calculator utility available on many Unix-like systems. | |
# Check for the presence of an input file argument | |
if [ "$#" -ne 1 ]; then | |
echo "Usage: $0 <input-html-file>" | |
exit 1 | |
fi | |
# Read the first argument as the file name | |
input_file=$1 | |
# Check if the file exists | |
if [ ! -f "$input_file" ]; then | |
echo "Error: File '$input_file' not found." | |
exit 1 | |
fi | |
# Define rainbow colors with their hex values | |
declare -A rainbow_colors | |
rainbow_colors=( | |
[Red]=FF0000 | |
[Orange]=FFA500 | |
[Yellow]=FFFF00 | |
[Green]=008000 | |
[Blue]=0000FF | |
[Indigo]=4B0082 | |
[Violet]=EE82EE | |
) | |
# extract the CSS content between <style> tags. | |
css=$(awk '/<style type="text\/css">/,/<\/style>/' "$input_file" | sed '1d;$d') | |
# Extract span classes from the input HTML file | |
classes=$(grep -o 'class="[^"]\+"' "$input_file" | cut -d '"' -f2 | sort -u) | |
# Function to convert a hex color to RGB | |
hex_to_rgb() { | |
local hex_color="$1" | |
R=$((16#${hex_color:0:2})) | |
G=$((16#${hex_color:2:2})) | |
B=$((16#${hex_color:4:2})) | |
} | |
# Euclidean distance function | |
get_distance() { | |
echo "sqrt(($1 - $4)^2 + ($2 - $5)^2 + ($3 - $6)^2)" | bc | |
} | |
# Find the nearest rainbow color | |
get_nearest_rainbow_color() { | |
local min_distance=1000000 | |
local nearest_color="" | |
hex_to_rgb "$1" | |
local src_r=$R | |
local src_g=$G | |
local src_b=$B | |
for color_name in "${!rainbow_colors[@]}"; do | |
hex_to_rgb "${rainbow_colors[$color_name]}" | |
local distance=$(get_distance $src_r $src_g $src_b $R $G $B) | |
if (( $(echo "$distance < $min_distance" | bc) )); then | |
min_distance=$distance | |
nearest_color=$color_name | |
fi | |
done | |
echo "$nearest_color" | |
} | |
# Iterate over each unique class | |
while read -r class; do | |
# Find the corresponding color in the extracted CSS | |
color=$(echo "$css" | awk -v class=".$class " '$0 ~ class && /color:/ { sub(/^.*color:\s*#/, ""); sub(/;.*$/, ""); print }' | head -n 1) | |
if [[ -n "$color" ]]; then | |
# Find nearest rainbow color | |
nearest_rainbow=$(get_nearest_rainbow_color "$color") | |
# Output the class and its corresponding nearest rainbow color | |
echo "$class matches nearest rainbow color $nearest_rainbow" | |
fi | |
done <<< "$classes" | |
# Iterate over each unique class | |
for class in $classes; do | |
# Find the corresponding color in the extracted CSS | |
color=$(echo "$css" | awk -v class=".$class " '$0 ~ class && /color:/ { sub(/^.*color:\s*#/, ""); sub(/;.*$/, ""); print }') | |
if [[ -n "$color" ]]; then | |
# Convert color to uppercase for matching with defined values | |
color=${color^^} | |
# Find nearest rainbow color | |
nearest_rainbow=$(get_nearest_rainbow_color "$color") | |
# Output the class and its corresponding nearest rainbow color | |
echo "$class matches nearest rainbow color $nearest_rainbow" | |
# If the nearest color is Red or Orange, replace the class name with "red" in the input file | |
if [[ "$nearest_rainbow" == "Red" || "$nearest_rainbow" == "Orange" ]]; then | |
# Replace the class name with "red" in the HTML file | |
sed -i "s/class=\"$class\"/class=\"red\"/g" "$input_file" | |
else | |
# Replace the class name with plain `span` in the HTML file | |
sed -i "s/class=\"$class\"//g" "$input_file" | |
fi | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment