Last active
May 12, 2018 21:16
-
-
Save phadej/e595c5e02884ce9f778759007a78c321 to your computer and use it in GitHub Desktop.
This file is my attempt to clarify and formalise my own thoughts about dependency resolution problem.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE DeriveFoldable #-} | |
{-# LANGUAGE FlexibleContexts #-} | |
{-# LANGUAGE FlexibleInstances #-} | |
{-# LANGUAGE FunctionalDependencies #-} | |
{-# LANGUAGE MultiParamTypeClasses #-} | |
{-# LANGUAGE RankNTypes #-} | |
{-# LANGUAGE ScopedTypeVariables #-} | |
{-# LANGUAGE UndecidableInstances #-} | |
-- | This file is my attempt to clarify and formalise my own thoughts about | |
-- dependency resolution problem. | |
module DepProb where | |
-- Imports | |
import Data.Foldable (Foldable (foldMap), all) | |
import Data.List (find) | |
import Data.Map (Map) | |
import qualified Data.Map as Map | |
import Data.Maybe (fromMaybe) | |
import Data.Monoid (All (..), Any (..), Monoid (..), Sum (..)) | |
import Data.Set (Set) | |
import qualified Data.Set as Set | |
import Prelude hiding (all) | |
-- | To describe the dependency solving problem, we start with its solution. | |
-- | |
-- A solution to dependency resolution problem is a set of packages, identified | |
-- by name @n@ ('Distribution.Package.PackageName'), with particular version | |
-- @v@ ('Data.Version.Version') and some additional configuration @a@ (more | |
-- about this later). | |
-- | |
-- /Note:/ With dependent type, we'd like to have @Solution n v (a : n -> v -> | |
-- *) = ...@, i.e. configuration variants can be different per package version. | |
-- | |
-- /Note:/ We could also have per-component solutions (libraries, executables, | |
-- test-suites and benchmarks could be treaten as separate "packages"). | |
newtype Solution n v a = S { getSolution :: Map n (v, a) } | |
-- | The problem itself is identified by /constraints/ that we can have on | |
-- the 'Solution' | |
class Constraint n v a c | c -> n v a where | |
holdsOn :: c -> Solution n v a -> Bool | |
-- | One of the inputs to the problem, is a package database: | |
newtype DB n v a c = DB { getDB :: Map n (Map (v, a) c) } | |
-- | With initial constraint (constraining packages already installed, and | |
-- packages we'd like to install), we can write a type of decision function. | |
type Solve n v a c = Constraint n v a c => c -> DB n v a c -> Maybe (Solution n v a) | |
-- | Now it's possible to talk about whether 'Solution' is valid. We omit | |
-- a notion of installability (i.e. preventing cycles), but it's easy to add. | |
validIn | |
:: forall n v a c. (Constraint n v a c, Ord n, Ord v, Ord a) | |
=> Solution n v a -> DB n v a c -> Bool | |
validIn solution db = getAll $ Map.foldMapWithKey f (getSolution solution) | |
where | |
f :: n -> (v, a) -> All | |
f n va = All $ fromMaybe False $ do | |
m <- Map.lookup n (getDB db) | |
c <- Map.lookup va m | |
return $ c `holdsOn` solution | |
------------------------------------------------------------------------------- | |
-- Solution quality | |
------------------------------------------------------------------------------- | |
-- | Solution quality is difficult topic. Minimal solutions, i.e. plans with | |
-- less packages considered better. This is easy metric to define. Yet when | |
-- splitting packages (e.g. network and network-uri), this metric lies. | |
solutionSizeMetric :: Solution n v a -> Int | |
solutionSizeMetric = Map.size . getSolution | |
-- | There are usually various solutions differing by the versions of packages | |
-- present, we'd prefer plans with recent versions of packages. Yet pointwise | |
-- comparison of solutions isn't total order, as there could be valid solutions | |
-- @foo-1, bar-2@ and @foo-2, bar-1@, but @foo-2, bar-2@ is invalid. | |
-- | |
-- Still it's useful to define partial order. /TODO/ | |
solutionVerLeq :: Solution n v a -> Solution n v a -> Bool | |
solutionVerLeq = undefined | |
-- Thoughts about SMT-solvers: they could generate initial solutions quite fast. | |
-- the disjunction of those few first ones could be used as a lower constraints | |
-- for second iterations. That would be nice to experiment with. | |
-- | |
-- Also as Cabal-test suite has DSL for specifying package indexes, | |
-- one could play with it. | |
------------------------------------------------------------------------------- | |
-- Concrete implementations | |
------------------------------------------------------------------------------- | |
-- | The simplest type which can be 'Constraint'. | |
newtype G n v a = G { gHoldsOn :: Solution n v a -> Bool } | |
instance Constraint n v a (G n v a) where | |
holdsOn = gHoldsOn | |
-- 'G' doesn't expose any underlying structue, so the best decision procedure | |
-- we could have is to go thru all possible solutions and pick first one. | |
-- | |
-- /Nitpick:/ this could be better, as solution procedure could learn about | |
-- conflicting package version, and try to pick the next solution without them. | |
-- It would require slightly different type-signature though. | |
solveG | |
:: forall n v a. (Ord n, Ord v, Ord a) | |
=> [Solution n v a] | |
-> Solve n v a (G n v a) | |
solveG solutions = solve | |
where | |
solve :: Solve n v a (G n v a) | |
solve initialConstraint db = find pred solutions | |
where | |
pred s = initialConstraint `holdsOn` s && validIn s db | |
------------------------------------------------------------------------------- | |
-- Per-package constraints | |
------------------------------------------------------------------------------- | |
-- | Let's assume that each package has a fixed set of dependencies, | |
-- with constraints per package only. | |
-- | |
-- To return to additional configuration: let there be none, i.e. @a ~ ()@. | |
-- This still models manual flags, as their effect can be applied on 'DB' before | |
-- giving it to solving procedure. | |
-- | |
-- I'll call this type 'Old', because this is situation as per @Cabal <1.18@. | |
-- After that 'Cabal' learned about automatic flags. | |
newtype Old n v = Old { getOld :: Map n (v -> Bool) } | |
instance Ord n => Constraint n v () (Old n v) where | |
old `holdsOn` solution = getAll $ Map.foldMapWithKey f (getOld old) | |
where | |
f :: n -> (v -> Bool) -> All | |
f n pred = All $ fromMaybe False $ do | |
(v, _) <- Map.lookup n (getSolution solution) | |
return $ pred v | |
------------------------------------------------------------------------------- | |
-- Disjunctions | |
------------------------------------------------------------------------------- | |
-- | 'Old' constraint formulation reprsents more information, to be exploited | |
-- by the solver. OTOH now we have limited what kind of dependencies we can pose. | |
-- | |
-- Feature most needed, is ability to express disjunctions of different packages. | |
-- For example: | |
-- | |
-- @ | |
-- build-depends: network >= 2.6 && network-uri >= 2.6 || network < 2.6 | |
-- @ | |
-- | |
-- Currently Cabal solves this by having /automatic flags/, which solving | |
-- procedure is free to choose. This can be formulated by small adjustment to | |
-- the 'Old'. Instead of giving single `Map`, we can many. Then the @a@ - | |
-- extra argument would be an index into that list (here we'd like to have | |
-- dependent types, to make indexing safe). | |
-- | |
-- /See:/ <https://github.com/haskell/cabal/issues/2033> | |
-- | |
-- /Note:/ Cabal solver is smart, and considers the intersection of | |
-- dependencies (i.e. dependencies which are same for each configuration). The | |
-- intersection is possible to extract from this formulation as well. | |
newtype Dis n v = Dis { getDis :: [Map n (v -> Bool)] } | |
instance Ord n => Constraint n v Int (Dis n v) where | |
dis `holdsOn` solution = | |
getAny . foldMap (\m -> Any . getAll $ Map.foldMapWithKey f m) $ getDis dis | |
where | |
f :: n -> (v -> Bool) -> All | |
f n pred = All $ fromMaybe False $ do | |
-- here we don't care which index was picked for each package | |
-- it's there to be used by 'validIn'. | |
(v, _) <- Map.lookup n (getSolution solution) | |
return $ pred v | |
------------------------------------------------------------------------------- | |
-- Depends | |
------------------------------------------------------------------------------- | |
-- | Depends proposal is an alternative proposal for disjunctions: | |
-- | |
-- @ | |
-- if depend(network >= 2.6) | |
-- build-depends: network-uri >= 2.6 | |
-- @ | |
-- | |
-- It can be interpreted as an additional implication of constraints. We can apply | |
-- it on any 'Constraint' | |
-- | |
-- /See:/ <https://github.com/haskell/cabal/issues/2033#issuecomment-231772073> | |
-- | |
-- /Note:/ there could be less powerful versions of 'Depends', where premise of | |
-- depends is less powerful than full @c@ (i.e. if we have automatic flags, how | |
-- this features interact). | |
data Depends c = Depends | |
{ depC :: c | |
, depI :: [(c, c)] -- ^ List of possible implications | |
} | |
instance Constraint n v a c => Constraint n v a (Depends c) where | |
dep `holdsOn` solution = depC dep `holdsOn` solution && impl | |
where | |
impl = getAll . foldMap (All . pred) $ depI dep | |
pred (prem, conl) = prem `holdsOn` solution ==> conl `holdsOn` solution | |
------------------------------------------------------------------------------- | |
-- Provides | |
------------------------------------------------------------------------------- | |
-- | Sometimes we want to have mutually exclusive packages. One proposal to solve | |
-- this is 'provides' mechanism: | |
-- | |
-- @ | |
-- provides: orphans-base-foldable-either | |
-- @ | |
-- | |
-- With the idea, that any provided identifier could exist at most once | |
-- in the 'Solution'. | |
-- | |
-- See <https://github.com/haskell/cabal/issues/3061> | |
data Provides i c = Provides | |
{ providesC :: c | |
, providesI :: Set i -- ^ Set of provided identifiers | |
} | |
instance (Ord i, Constraint n v a c) => Constraint n v a (Provides i c) where | |
pro `holdsOn` solution = providesC pro `holdsOn` solution | |
-- | There are different ways to formulate exclusivity of @i@. | |
-- Clearenst way is to do the check on the whole 'Solution'. | |
validInWithProvides | |
:: forall n v a c i. (Constraint n v a c, Ord n, Ord v, Ord a, Ord i) | |
=> Solution n v a -> DB n v a (Provides i c) -> Bool | |
validInWithProvides solution db = validIn solution db && providesCheck | |
where | |
providesCheck = all (<= 1) . Map.foldMapWithKey f $ getSolution solution | |
f n va = fromMaybe mempty $ do | |
m <- Map.lookup n (getDB db) | |
c <- Map.lookup va m | |
return $ MapSum $ setToMap (Sum 1 :: Sum Integer) (providesI c) | |
-- If we also make possible to depend on @provides@ idenifiers, it would | |
-- be possible to force automatic flag selection ('Dis'). This could be | |
-- useful feature for packages providing instances, so they could depend truly | |
-- optionally on large set of dependencies. | |
-- | |
-- @ | |
-- if flag(profunctors) | |
-- -- This identifiers could be scoped by package with some syntax | |
-- provide: aeson-profunctors | |
-- @ | |
-- | |
-- @ | |
-- build-depends: aeson >= 0.11 | |
-- provide-depends: aeson-profunctors | |
-- @ | |
-- | |
-- /TODO/ write 'Constraint' instance | |
data ProDep i c = ProDep | |
{ proDepC :: c | |
, proDepI :: Set i | |
, proDepD :: Set i -- ^ we require this identifiers to present in final 'Solution' | |
} | |
------------------------------------------------------------------------------- | |
-- Constraint proposal | |
------------------------------------------------------------------------------- | |
-- | Alternative to the /Provides/ -proposal is a constraint proposal. | |
-- | |
-- In this framework it's much simpler than 'Provides', as one can see from its | |
-- 'Constraint' implementation. | |
-- | |
-- It also highlights one implicit property of 'Old' and 'Dis', if package name | |
-- is in the map, it should also be in the 'Solution'. | |
-- Packages in 'conCon' doesn't need to be. | |
-- | |
-- See <https://github.com/haskell/cabal/issues/3061> | |
data Con n v c = Con | |
{ conC :: c -- ^ original constraint | |
, conCon :: Map n (v -> Bool) -- ^ additional constraints in 'Old' style. We could want 'Dis' style too, but it's trivial to adopt. | |
} | |
instance (Ord n, Constraint n v a c) => Constraint n v a (Con n v c) where | |
con `holdsOn` solution = conC con `holdsOn` solution && conHolds | |
where | |
conHolds = getAll $ Map.foldMapWithKey f (conCon con) | |
-- | Here we don't require that packages listed in 'conCon' are in the 'Solution'. | |
-- Yet if they are there, constraint must hold. | |
f :: n -> (v -> Bool) -> All | |
f n pred = All $ fromMaybe True $ do | |
(v, _) <- Map.lookup n (getSolution solution) | |
return $ pred v | |
------------------------------------------------------------------------------- | |
-- Conflicts proposaal | |
------------------------------------------------------------------------------- | |
-- | Conflicts proposal is even simpler then 'constraint', yet powerful as it | |
-- introduces negation to the constraint language. | |
-- | |
-- See <https://github.com/haskell/cabal/issues/3061> | |
data Conflict c = Conflict | |
{ conflictPos :: c | |
, conflictNeg :: c | |
} | |
instance Constraint n v a c => Constraint n v a (Conflict c) where | |
c `holdsOn` solution = | |
conflictPos c `holdsOn` solution && | |
not (conflictNeg c `holdsOn` solution) | |
------------------------------------------------------------------------------- | |
-- Utilities | |
------------------------------------------------------------------------------- | |
infix 4 ==> | |
(==>) :: Bool -> Bool -> Bool | |
False ==> _ = True | |
True ==> b = b | |
setToMap :: Ord a => b -> Set a -> Map a b | |
setToMap x = Map.fromList . map f . Set.toList | |
where | |
f k = (k, x) | |
newtype MapSum n i = MapSum { getMapSum :: Map n (Sum i) } | |
instance Foldable (MapSum n) where | |
foldMap f (MapSum m) = foldMap (f . getSum) m | |
instance (Ord n, Num i) => Monoid (MapSum n i) where | |
mempty = MapSum Map.empty | |
mappend (MapSum a) (MapSum b) = MapSum (Map.unionWith mappend a b) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment