Last active
April 13, 2025 08:33
-
-
Save sohang3112/8122b0729c41f84b0fb8a79018e0201c to your computer and use it in GitHub Desktop.
(Haskell) Compresses a string by counting consecutive repeating characters.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
import Data.List (foldl') | |
-- | Compresses a string by counting consecutive repeating characters. | |
-- | |
-- Returns a list of tuples where each tuple consists of a character and the | |
-- number of its consecutive occurrences in the string. | |
-- | |
-- This function uses 'reverse', 'foldl'' and bang patterns for strict accumulation. | |
-- Consequently, it is non-lazy — it will hang if given an infinite input. | |
-- | |
-- >>> compressConsecutive "aaaabbbcca" | |
-- [('a',4),('b',3),('c',2),('a',1)] | |
compressConsecutive :: String -> [(Char, Int)] | |
compressConsecutive [] = [] | |
compressConsecutive (c:cs) = reverse $ foldl' update [(c, 1)] cs | |
where | |
-- strict bang pattern is needed for count to prevent count+1 becoming thunk because it isn't inspected | |
-- but it's NOT needed for char and ch because both are immediately tested for equality | |
-- ch is also guaranteed to be resolved (not a thunk) because foldl' is strict in its arguments | |
-- Strictness Annotation (bang patterns) guideline: https://wiki.haskell.org/Performance/Strictness | |
update :: [(Char, Int)] -> Char -> [(Char, Int)] | |
update ((char, !count) : xs) ch | |
| char == ch = (char, count+1) : xs | |
| otherwise = (ch, 1) : (char, count) : xs | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment