Last active
May 21, 2021 08:56
-
-
Save yiding/0be1cabcdc2b1b51411f to your computer and use it in GitHub Desktop.
Parsing xcode DGPH files. This one handles DGPH 1.04 (used in xcode 7.0, 7.1) Xcode DGPH files contains dependency graph information that lets Xcode do incremental builds.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
module DGPHParser(parseDgph) where | |
import qualified Data.Attoparsec.ByteString as A | |
import qualified Data.ByteString as B | |
import Data.Bits | |
import Data.Foldable (foldl') | |
import qualified Data.ByteString as B | |
import qualified Data.Attoparsec.ByteString as A | |
import Control.Monad.ST ( runST, ST ) | |
import Data.Array.Unsafe (castSTUArray) | |
import Data.Array.ST ( newArray, readArray, MArray, STUArray ) | |
import Data.Bits | |
import Data.Word | |
-- | |
-- Utility functions for parsing doubles and such. | |
-- | |
parseWord64LE :: A.Parser Word64 | |
parseWord64LE = do | |
B.foldr (\byte word -> fromIntegral byte .|. word `shiftL` 8 ) 0 <$> A.take 8 | |
-- | reinterpret_cast | |
cast :: (MArray (STUArray s) a (ST s), | |
MArray (STUArray s) b (ST s)) => | |
a -> ST s b | |
cast x = newArray (0 :: Int, 0) x >>= castSTUArray >>= flip readArray 0 | |
parseDoubleLE :: A.Parser Double | |
parseDoubleLE = do | |
word <- parseWord64LE | |
return $ runST (cast word) | |
-- | |
-- parsing particularly prevalent structures | |
-- | |
-- | Parse a 7 bit little endian variable length encoded number. | |
-- | |
-- The encoding takes 7 bit blocks of the number and encodes it in a byte, | |
-- and set the msb of that byte to 1 if there are additional bytes to follow. | |
-- | |
-- For example, a hypothetical 4 byte number encodes as follows: | |
-- | |
-- @ | |
-- 0000 0000 0000z zzzz zzyy yyyy yxxx xxxx | |
-- | |
-- 1xxxxxxx 1yyyyyyy 0zzzzzzz | |
-- ^msb ^lsb | |
-- @ | |
parseVarLenWordLE :: A.Parser Int | |
parseVarLenWordLE = go [] | |
where | |
go :: [Int] -> A.Parser Int | |
go acc = | |
A.anyWord8 >>= \x -> | |
let acc' = fromIntegral (x .&. 0x7f) : acc in | |
case x .&. 0x80 of | |
-- more to go | |
0x80 -> go acc' | |
-- done, summarize it into a number | |
-- now the numbers are [z, y, x] | |
_ -> return $ foldl' (\a new -> a `shiftL` 7 .|. new) 0 acc' | |
-- | Parse a single byte, and then parse that many number of bytes. | |
parseByteLengthPrefixedString = do | |
b <- A.anyWord8 | |
A.take (fromIntegral b) | |
parseVarLenPrefixedString = do | |
len <- parseVarLenWordLE | |
A.take len | |
parseVarLenPrefixedList :: A.Parser a -> A.Parser [a] | |
parseVarLenPrefixedList itemParser = do | |
len <- parseVarLenWordLE | |
A.count len itemParser | |
-- | |
-- parsing the DGPH file itself | |
-- | |
parseDgph = do | |
A.string "DGPH" | |
_version <- A.take 4 | |
_versionBuildDate <- parseByteLengthPrefixedString | |
_versionBuildTime <- parseByteLengthPrefixedString | |
-- Following, there comes a bunch of XCDependencyNode. | |
-- A count of nodes is followed by nodes, node refers to parents by their node number. | |
-- The first node in the list has number 1, 0 represents ... null | |
nodes <- parseVarLenPrefixedList $ do | |
isVirtual <- (/= 0) <$> A.anyWord8 | |
parentNum <- if not isVirtual then Just <$> parseVarLenWordLE else return Nothing | |
name <- parseVarLenPrefixedString | |
return (parentNum, name) | |
-- filesystem root node. | |
rootNodeNum <- parseVarLenWordLE | |
projectNodeNum <- parseVarLenWordLE | |
-- Next, XCDepGraphNodeState | |
nodeStates <- parseVarLenPrefixedList $ do | |
nodeNum <- parseVarLenWordLE | |
options <- parseVarLenWordLE | |
errNum <- parseVarLenWordLE | |
if errNum == 0 | |
then Right <$> do | |
mtime <- parseVarLenWordLE | |
size <- parseVarLenWordLE | |
mode <- parseVarLenWordLE | |
return (mtime, size, mode) | |
else return $ Left errNum | |
-- Next, a list of XCDependencyCommandInvocationRecord | |
invocations <- parseVarLenPrefixedList $ do | |
identifier <- parseVarLenPrefixedString | |
signature <- A.take 16 | |
desc <- parseVarLenPrefixedString | |
args <- parseVarLenPrefixedList parseVarLenPrefixedString | |
env <- parseVarLenPrefixedList parseVarLenPrefixedString | |
workingDirNodeNum <- parseVarLenWordLE | |
startTime <- parseDoubleLE | |
endTime <- parseDoubleLE | |
exitStatus <- parseVarLenWordLE | |
builderId <- parseVarLenPrefixedString | |
activityLogData <- parseVarLenPrefixedString | |
inputNodeStateNums <- parseVarLenPrefixedList parseVarLenWordLE | |
outputNodeNums <- parseVarLenPrefixedList parseVarLenWordLE | |
return (identifier, signature, desc, args, env, workingDirNodeNum, startTime, endTime, exitStatus, builderId, activityLogData, inputNodeStateNums, outputNodeNums) | |
artifactNodeNums <- parseVarLenPrefixedList parseVarLenWordLE | |
return (nodes, rootNodeNum, projectNodeNum, nodeStates, invocations, artifactNodeNums) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
See https://gist.github.com/yiding/7a22deff33160c84b04e for parsing the SLF0 encoded
activityLogData
field.