Skip to content

Instantly share code, notes, and snippets.

@Xion
Created December 27, 2011 21:30
Show Gist options
  • Save Xion/1525222 to your computer and use it in GitHub Desktop.
Save Xion/1525222 to your computer and use it in GitHub Desktop.
Haskell implementation of coded4 analyzer (simplified)
-- Coded4.hs
-- Simplified version of coded4 analyzer (http://github.com/Xion/coded4)
-- Supports only Git repositories and produces less neatly formatted output
-- usage:
-- $ runghc Coded4.hs <path-to-git-repo>
module Coded4 where
import Data.Time.Clock (UTCTime, NominalDiffTime, diffUTCTime)
import System.FilePath ((</>))
import System.Directory (doesDirectoryExist)
import System.Environment (getArgs)
import Control.Monad (liftM, forM_)
import System.Process (runInteractiveProcess)
import GHC.IO.Handle (hSetBinaryMode, hGetContents)
import Data.List (intersperse, intercalate, partition, groupBy)
import Text.Regex (mkRegex, splitRegex)
import Data.Time.Format (parseTime)
import System.Locale (defaultTimeLocale)
import Data.Function (on)
data Commit = Commit { commitHash :: String,
commitTime :: UTCTime,
commitAuthor :: String,
commitMessage :: String }
deriving (Show) -- debug
data Contributor = Contributor { contribName :: String,
contribCommits :: [Commit],
contribTime :: NominalDiffTime }
instance Show Contributor where
show c = contribName c ++ " (" ++ show (length $ contribCommits c)
++ " commits) - " ++ (show $ contribTime c)
-- Generic VCS code
supportedVcs = ["git"]
detectVcs :: FilePath -> IO (Maybe String)
detectVcs repoDir =
let
isVcsRepo :: FilePath -> String -> IO Bool
isVcsRepo repoDir vcs = doesDirectoryExist vcsDir
where
vcsDir = repoDir </> internalVcsDir
internalVcsDir = '.':vcs
in do
found <- mapM (isVcsRepo repoDir) supportedVcs
let vcsFound = [vcs | (vcs, inDir) <- zip supportedVcs found, inDir]
return $ if null vcsFound then Nothing
else Just (head vcsFound)
-- Git support
retrieveGitHistory :: FilePath -> IO [Commit]
retrieveGitHistory repoDir = do
gitLogLines <- runGitLog repoDir
return $ map gitCommit gitLogLines
where
sep = "|"
runGitLog :: FilePath -> IO [String]
runGitLog repoDir =
lines `liftM` execShell "git" ("log":gitLogArgs) (Just repoDir)
where
gitLogArgs = ["--format=format:" ++ logFormat]
logFormat = intercalate sep ["%H", "%at", "%an", "%s"]
gitCommit :: String -> Commit
gitCommit logLine =
Commit hash time author message
where
Just time = parseTime defaultTimeLocale "%s" timestamp
(hash:timestamp:author:message:_) = splitRegex sepRegex logLine
sepRegex = mkRegex $ escapeChars sep
escapeChars str = '\\':(intersperse '\\' str)
-- Statistic functions
calculateStats :: [Commit] -> [Contributor]
calculateStats commits =
map contributorStats commitsByContributors
where
commitsByContributors = divideCommits commits
divideCommits :: [Commit] -> [(String, [Commit])]
divideCommits commits =
map makeTuple groupedCommits
where
makeTuple commits = (commitAuthor . head $ commits, commits)
groupedCommits = groupBy ((==) `on` commitAuthor) commits
contributorStats :: (String, [Commit]) -> Contributor
contributorStats (name, commits) =
Contributor name commits totalTime
where
totalTime =
oneCommitTime + manyCommitsTime
where
oneCommitTime = sum $ take (length oneCommit) (repeat epsilon)
-- ^ hack, NominalDiffTime doesn't support multiplication
manyCommitsTime = sum $ map commitListTime manyCommits
(oneCommit, manyCommits) = partition (\c -> length c > 1) commitClusters
commitClusters = clusterCommits commits
commitListTime commitList =
let commitTimes = map commitTime commitList
in diffUTCTime (maximum commitTimes) (minimum commitTimes)
clusterCommits commits =
clusterBy sameSession commits
where
sameSession c1 c2 = ((diffUTCTime `on` commitTime) c1 c2) < epsilon
epsilon = (10 * 60) :: NominalDiffTime -- 10 minutes
-- Utility functions
execShell :: FilePath -> [String] -> Maybe String -> IO String
execShell cmd args workDir = do
(_, stdout, _, _) <- runInteractiveProcess cmd args workDir Nothing
hSetBinaryMode stdout False
hGetContents stdout
clusterBy :: (a -> a -> Bool) -> [a] -> [[a]]
clusterBy isNear = foldr f []
where f x (cluster@(y:_) : result) | isNear x y = (x:cluster) : result
f x result = [x] : result
-- from: http://stackoverflow.com/a/8647991/434799
-- Main function
main = do
repoDir:_ <- getArgs
vcs <- detectVcs repoDir
case vcs of
Nothing -> putStrLn $ "No repo found in " ++ repoDir
Just vcsName -> do
putStrLn $ vcsName ++ " repo found in " ++ repoDir
history <- retrieveGitHistory repoDir
let stats = calculateStats history
putStrLn "Contributors: "
forM_ stats $ \c ->
putStrLn $ "- " ++ show c
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment