Created
June 3, 2010 17:51
-
-
Save astanin/424219 to your computer and use it in GitHub Desktop.
Count Hackage reverse dependencies by license type
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{- Count reverse dependencies by license type. | |
Output on 20100603 (license, # of revdeps, # of packages, revdeps per package): | |
BSD3 33469 1519 22.033574720210666 | |
LGPL 1018 99 10.282828282828282 | |
GPL 603 334 1.8053892215568863 | |
OtherLicense 300 77 3.896103896103896 | |
PublicDomain 214 64 3.34375 | |
BSD4 53 4 13.25 | |
MIT 8 22 0.36363636363636365 | |
AllRightsReserved 0 2 0.0 | |
-} | |
import Data.List (intercalate, intersperse, sortBy) | |
import Distribution.License | |
import Distribution.PackageDescription | |
import Distribution.PackageDescription.Parse | |
import Network.HTTP | |
import System.IO.Unsafe (unsafeInterleaveIO) | |
import qualified Data.Map as M | |
revdepsURL = "http://bifunctor.homelinux.net/~roel/hackage/packages/archive/revdeps-list.csv" | |
-- this is where 00-index.tar is unpacked | |
idxbase = "/tmp/hackage" | |
(>>>) = flip (.) | |
(|>) = flip ($) | |
infixl 0 |> | |
main = do | |
csv <- simpleHTTP (getRequest revdepsURL) >>= getResponseBody | |
-- csv <- readFile "revdeps-list.csv" | |
pkgs <- csv |> lines >>> map (split ',' >>> counts) >>> mapM findLicense | |
pkgs |> countByLicense >>> M.toList >>> printLicenseCounts | |
where | |
counts (name:d:i:[]) = | |
(name, cabalpath name, (read d) + (read i)) :: (String, String, Int) | |
getLicense cabal = unsafeInterleaveIO $ | |
readFile cabal >>= | |
parsePackageDescription >>> ( \r -> case r of | |
ParseOk _ gpd -> gpd |> packageDescription >>> license >>> showL | |
_ -> "" | |
) >>> return | |
showL (GPL _) = "GPL" | |
showL (LGPL _) = "LGPL" | |
showL (UnknownLicense x) = x | |
showL l = show l | |
findLicense (name,cabal,i) = getLicense cabal >>= \l -> return (name,l,i) | |
countByLicense = foldr go M.empty | |
where go (_,lic,rdeps) totals = M.insertWith add lic (1,rdeps) totals | |
add (libs,rds) (libs',rds') = (libs+libs', rds+rds') | |
printLicenseCounts lcs = | |
let lcs' = sortBy (\(l,(_,c)) (l',(_,c')) -> compare c' c) lcs | |
in flip mapM_ lcs' $ (\(l,(libs,rds)) -> | |
putStrLn $ intercalate " " | |
[l, show rds, show libs, | |
show (fromIntegral rds / fromIntegral libs)]) | |
split delim = foldr go [[]] | |
where | |
go c a@(p:ps) | c == delim = []:a | |
| otherwise = (c:p):ps | |
cabalpath pkgname = | |
let (basename, version) = rsplit1 '-' pkgname | |
in intercalate "/" [idxbase, basename, version, basename ++ ".cabal"] | |
where | |
rsplit1 _ [] = ([],[]) | |
rsplit1 d xs = | |
let ps = split d xs :: [String] | |
in (concat $ intersperse (d:[]) (init ps),last ps) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment