Created
September 4, 2022 18:43
-
-
Save 505e06b2/1167def474f1b0d3dd02ef739f9a0a2d to your computer and use it in GitHub Desktop.
Lossy ASCII compression
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
export const settings = { | |
debug: false | |
}; | |
function padBits(bit_string, divisible_by) { | |
const is_divisible = bit_string.length % divisible_by === 0; | |
if(is_divisible) { | |
return bit_string; | |
} | |
const padding_required = divisible_by - bit_string.length % divisible_by; | |
if(padding_required) { | |
bit_string = bit_string.padStart(bit_string.length+padding_required, "0"); | |
} | |
return bit_string; | |
} | |
const encoding_values = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; | |
const encoded_char_binary_size = (encoding_values.length-1).toString(2).length; | |
function encode(binary_string) { | |
binary_string = padBits(binary_string, encoded_char_binary_size); | |
if(settings.debug) console.log("Padded for Encode:", binary_string); | |
const binary_values = binary_string.match(new RegExp(`.{${encoded_char_binary_size}}`, "g")); | |
let encoded = ""; | |
for(const bits of binary_values) { | |
encoded += encoding_values[parseInt(bits, 2)]; | |
} | |
return encoded; | |
} | |
function decode(encoded_string) { | |
let binary_string = ""; | |
for(const c of encoded_string) { | |
const index = encoding_values.indexOf(c); | |
if(index === -1) { | |
console.error(`"${c}" is not in the encoding scheme`); | |
return; | |
} | |
binary_string += padBits(index.toString(2), encoded_char_binary_size); | |
} | |
const remove_padding = binary_string.length % subset_char_binary_size; | |
if(remove_padding) { | |
binary_string = binary_string.slice(remove_padding); | |
} else if(binary_string.startsWith("0".repeat(subset_char_binary_size))) { //already fits perfectly, as divisible by both 5 and 6 - remove padding used for encode | |
binary_string = binary_string.slice(subset_char_binary_size); | |
} | |
return binary_string; | |
} | |
const ascii_subset = "\nabcdefghijklmnopqrstuvwxyz,.!? "; | |
const subset_char_binary_size = (ascii_subset.length-1).toString(2).length; | |
export const _getRandomStr = (count) => { //for testing | |
let x = ""; | |
for(let i = 0; i < count; i++) x += ascii_subset[parseInt(Math.random() * ascii_subset.length)]; | |
return x.replace(/^\n+/, ""); | |
} | |
export function compress(input_string) { | |
const trimmed = input_string.replace(/^\n+/, ""); //trimming is needed as if the first byte is 0, it could be mistaken for padding during encoding (newlines are the least harmful to remove) | |
if(trimmed !== input_string) { | |
console.warn("Trimmed leading newlines"); | |
input_string = trimmed; | |
} | |
if(settings.debug) console.log("Input String:", input_string); | |
if(!input_string) return ""; | |
let binary_string = ""; | |
for(const c of input_string.toLowerCase()) { | |
const index = ascii_subset.indexOf(c); | |
if(index !== -1) { | |
binary_string += padBits(index.toString(2), subset_char_binary_size); | |
} else { | |
console.warn(`Skipping "${c}" as not in subset`); | |
} | |
} | |
if(settings.debug) console.log("Compressed:", binary_string); | |
return encode(binary_string); | |
} | |
export function decompress(input_encoded) { | |
if(!input_encoded) return ""; | |
const binary_string = padBits(decode(input_encoded), subset_char_binary_size); | |
if(settings.debug) console.log("Padded for Decode:", binary_string); | |
const binary_values = binary_string.match(new RegExp(`.{${subset_char_binary_size}}`, "g")); | |
let decompressed = ""; | |
for (const bits of binary_values) { | |
decompressed += ascii_subset[parseInt(bits, 2)]; | |
} | |
return decompressed; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment