Created
December 27, 2011 21:30
-
-
Save Xion/1525222 to your computer and use it in GitHub Desktop.
Haskell implementation of coded4 analyzer (simplified)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Coded4.hs | |
-- Simplified version of coded4 analyzer (http://github.com/Xion/coded4) | |
-- Supports only Git repositories and produces less neatly formatted output | |
-- usage: | |
-- $ runghc Coded4.hs <path-to-git-repo> | |
module Coded4 where | |
import Data.Time.Clock (UTCTime, NominalDiffTime, diffUTCTime) | |
import System.FilePath ((</>)) | |
import System.Directory (doesDirectoryExist) | |
import System.Environment (getArgs) | |
import Control.Monad (liftM, forM_) | |
import System.Process (runInteractiveProcess) | |
import GHC.IO.Handle (hSetBinaryMode, hGetContents) | |
import Data.List (intersperse, intercalate, partition, groupBy) | |
import Text.Regex (mkRegex, splitRegex) | |
import Data.Time.Format (parseTime) | |
import System.Locale (defaultTimeLocale) | |
import Data.Function (on) | |
data Commit = Commit { commitHash :: String, | |
commitTime :: UTCTime, | |
commitAuthor :: String, | |
commitMessage :: String } | |
deriving (Show) -- debug | |
data Contributor = Contributor { contribName :: String, | |
contribCommits :: [Commit], | |
contribTime :: NominalDiffTime } | |
instance Show Contributor where | |
show c = contribName c ++ " (" ++ show (length $ contribCommits c) | |
++ " commits) - " ++ (show $ contribTime c) | |
-- Generic VCS code | |
supportedVcs = ["git"] | |
detectVcs :: FilePath -> IO (Maybe String) | |
detectVcs repoDir = | |
let | |
isVcsRepo :: FilePath -> String -> IO Bool | |
isVcsRepo repoDir vcs = doesDirectoryExist vcsDir | |
where | |
vcsDir = repoDir </> internalVcsDir | |
internalVcsDir = '.':vcs | |
in do | |
found <- mapM (isVcsRepo repoDir) supportedVcs | |
let vcsFound = [vcs | (vcs, inDir) <- zip supportedVcs found, inDir] | |
return $ if null vcsFound then Nothing | |
else Just (head vcsFound) | |
-- Git support | |
retrieveGitHistory :: FilePath -> IO [Commit] | |
retrieveGitHistory repoDir = do | |
gitLogLines <- runGitLog repoDir | |
return $ map gitCommit gitLogLines | |
where | |
sep = "|" | |
runGitLog :: FilePath -> IO [String] | |
runGitLog repoDir = | |
lines `liftM` execShell "git" ("log":gitLogArgs) (Just repoDir) | |
where | |
gitLogArgs = ["--format=format:" ++ logFormat] | |
logFormat = intercalate sep ["%H", "%at", "%an", "%s"] | |
gitCommit :: String -> Commit | |
gitCommit logLine = | |
Commit hash time author message | |
where | |
Just time = parseTime defaultTimeLocale "%s" timestamp | |
(hash:timestamp:author:message:_) = splitRegex sepRegex logLine | |
sepRegex = mkRegex $ escapeChars sep | |
escapeChars str = '\\':(intersperse '\\' str) | |
-- Statistic functions | |
calculateStats :: [Commit] -> [Contributor] | |
calculateStats commits = | |
map contributorStats commitsByContributors | |
where | |
commitsByContributors = divideCommits commits | |
divideCommits :: [Commit] -> [(String, [Commit])] | |
divideCommits commits = | |
map makeTuple groupedCommits | |
where | |
makeTuple commits = (commitAuthor . head $ commits, commits) | |
groupedCommits = groupBy ((==) `on` commitAuthor) commits | |
contributorStats :: (String, [Commit]) -> Contributor | |
contributorStats (name, commits) = | |
Contributor name commits totalTime | |
where | |
totalTime = | |
oneCommitTime + manyCommitsTime | |
where | |
oneCommitTime = sum $ take (length oneCommit) (repeat epsilon) | |
-- ^ hack, NominalDiffTime doesn't support multiplication | |
manyCommitsTime = sum $ map commitListTime manyCommits | |
(oneCommit, manyCommits) = partition (\c -> length c > 1) commitClusters | |
commitClusters = clusterCommits commits | |
commitListTime commitList = | |
let commitTimes = map commitTime commitList | |
in diffUTCTime (maximum commitTimes) (minimum commitTimes) | |
clusterCommits commits = | |
clusterBy sameSession commits | |
where | |
sameSession c1 c2 = ((diffUTCTime `on` commitTime) c1 c2) < epsilon | |
epsilon = (10 * 60) :: NominalDiffTime -- 10 minutes | |
-- Utility functions | |
execShell :: FilePath -> [String] -> Maybe String -> IO String | |
execShell cmd args workDir = do | |
(_, stdout, _, _) <- runInteractiveProcess cmd args workDir Nothing | |
hSetBinaryMode stdout False | |
hGetContents stdout | |
clusterBy :: (a -> a -> Bool) -> [a] -> [[a]] | |
clusterBy isNear = foldr f [] | |
where f x (cluster@(y:_) : result) | isNear x y = (x:cluster) : result | |
f x result = [x] : result | |
-- from: http://stackoverflow.com/a/8647991/434799 | |
-- Main function | |
main = do | |
repoDir:_ <- getArgs | |
vcs <- detectVcs repoDir | |
case vcs of | |
Nothing -> putStrLn $ "No repo found in " ++ repoDir | |
Just vcsName -> do | |
putStrLn $ vcsName ++ " repo found in " ++ repoDir | |
history <- retrieveGitHistory repoDir | |
let stats = calculateStats history | |
putStrLn "Contributors: " | |
forM_ stats $ \c -> | |
putStrLn $ "- " ++ show c |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment