Last active
May 23, 2016 14:32
-
-
Save radix/bd7f462c5cd51b801e86fb3cca84f41a to your computer and use it in GitHub Desktop.
Nixy Shake
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE TupleSections #-} | |
import Control.Monad (when, foldM_, forM) | |
import Data.Functor | |
import Data.List | |
import Data.Ord (comparing) | |
import Development.Shake | |
import Development.Shake.FilePath | |
import qualified System.Directory as Dir | |
import System.Posix.Files (touchFile) | |
main :: IO () | |
main = shake shakeOptions $ do | |
let mainFiles = [ "README.md", "stack.yaml" ] | |
rule = cachingRule "_build/cache" | |
want ["_build/main"] | |
rule "main" mainFiles $ \out -> do | |
putNormal "building main from scratch" | |
Stdout contents <- cmd "cat" mainFiles | |
writeFile' out contents | |
liftIO $ gcCache "_build/cache" 5 | |
-- | A Rule which keeps a cache of whatever you build, so that if the files are in the same state as | |
-- they were in a previous build, they will not be rebuilt (even if there was an intermediate build | |
-- where the files WERE different). | |
cachingRule :: FilePath -> FilePath -> [FilePath] -> (FilePath -> Action ()) -> Rules () | |
cachingRule cacheDir target needs buildAction = do | |
-- _always_ hash the inputs, and then touch the cache directory, so even no-op rebuilds still let | |
-- the GC know that some artifacts have been used recently. Is this too slow? | |
hash <- liftIO $ hashAll needs | |
let hashDir = cacheDir </> hash | |
cacheTarget = hashDir </> target | |
fullTarget = "_build" </> target | |
liftIO $ do | |
e <- Dir.doesDirectoryExist hashDir | |
when e (touchFile hashDir) | |
-- We define two rules: one for doing the compilation, which runs the passed in action with an | |
-- output location inside the hash-named cache directory: | |
cacheDir </> "*" </> target %> buildAction | |
-- And another for copying from that cache directory to the final destination. | |
fullTarget %> \_ -> do | |
putNormal ("checking hash for " ++ target) | |
need (cacheTarget:needs) | |
copyFile' cacheTarget fullTarget | |
-- | Generate a single checksum of all the given files. | |
hashAll :: [FilePath] -> IO String | |
hashAll files = do | |
let files' = sort files | |
Stdout out <- cmd "md5sum" files' | |
Stdout final <- cmd (Stdin out) "md5sum" | |
case words final of | |
[checksum, "-"] -> return checksum | |
_ -> error ("strange output from md5sum: " ++ show final) | |
-- | If the `cacheDir` is more than `gcMax` megabytes, delete the oldest directories inside of it | |
-- until the size is under `gcMax`. | |
gcCache :: FilePath -> Int -> IO () | |
gcCache cacheDir gcMax = do | |
Stdout duOutput <- cmd "du -m --max-depth=1" [cacheDir] | |
let sizesWithTotal :: [(FilePath, Int)] | |
sizesWithTotal = map parseLine (lines duOutput) | |
total = snd (last sizesWithTotal) | |
sizes = init sizesWithTotal | |
putStrLn $ "[GC] " ++ cacheDir ++ " is " ++ show total ++ " MB." | |
when (total > gcMax) $ putStrLn ("[GC] Performing GC on " ++ cacheDir) | |
fileInfo <- mapM (\(fn, size) -> (fn,size,) <$> Dir.getModificationTime fn) sizes | |
let fileInfoSorted = sortBy (comparing (\(_, _, mtime) -> mtime)) fileInfo | |
forFoldM_ total fileInfoSorted $ \reducedSize (filename, size, _) -> | |
if reducedSize < gcMax then | |
return reducedSize | |
else do | |
putStrLn $ "[GC] Removing " ++ show filename | |
Dir.removeDirectoryRecursive filename | |
return (reducedSize - size) | |
where | |
forFoldM_ base l action = foldM_ action base l | |
parseLine :: String -> (FilePath, Int) | |
parseLine l = case words l of | |
[s, fn] -> (fn, read s :: Int) | |
other -> error ("strange output from `du`: " ++ show other) |
It would be nice to touch
the directories when they are used, as a weak reference counting for garbage collection, kinda like nix's delete-older-than
.
Oh also ignore files would be nice for things like .git.
@eborden I've updated it!
- generally refactored so that it's in a reusable form (hackage package TBD...)
- implemented GC
- always update the modification time of cache directories so the GC knows they're in use.
hm, the way I'm doing the hashing / touching is breaking when depending on a file that is not statically available. working on a fix
I'm working on applying this to my company's build. I've found some issues.
TODO:
- hashing/touching is broken when working on dynamically built files (have a super inefficient workaround for now)
- hardcoded
_build
(fixed locally) - doesn't work with rules that build multiple files (still working this out)
Very cool! CC @snowleopard, as this is essentially lots of simultaneous input-output pairs at once, which is a prerequisite for something like Shake-on-a-server with multiple users.
@radix: What is the case for #3 (doesn't work with rules that build multiple files), could you elaborate?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also TBD: garbage collection. I think something like "if work directory is bigger than N gigabytes, delete oldest directories" would work well