Created
September 18, 2014 17:53
-
-
Save gbluma/796cc37ed9606f29b6bf to your computer and use it in GitHub Desktop.
A Hindley-Milner type system in PHP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** Lambda abstraction */ | |
class Lambda | |
{ | |
function __construct($v, $body) { | |
$this->v = $v; | |
$this->body = $body; | |
} | |
function __toString() { | |
return "(fn {$this->v} => {$this->body})"; | |
} | |
} | |
/** Identfier */ | |
class Ident | |
{ | |
function __construct($name) { | |
$this->name = $name; | |
} | |
function __toString() { | |
return $this->name; | |
} | |
} | |
/** Function application */ | |
class Apply | |
{ | |
function __construct($fn, $arg) { | |
$this->fn = $fn; | |
$this->arg = $arg; | |
} | |
function __toString() { | |
return "({$this->fn} {$this->arg})"; | |
} | |
} | |
/** Let binding */ | |
class Let | |
{ | |
function __construct($v, $defn, $body) { | |
$this->v = $v; | |
$this->defn = $defn; | |
$this->body = $body; | |
} | |
function __toString() { | |
return "(let {$this->v} = {$this->defn} in {$this->body})"; | |
} | |
} | |
/** Letrec binding */ | |
class Letrec | |
{ | |
function __construct($v, $defn, $body) { | |
$this->v = $v; | |
$this->defn = $defn; | |
$this->body = $body; | |
} | |
function __toString() { | |
return "(letrec {$this->v} = {$this->defn} in {$this->body})"; | |
} | |
} | |
//======================================================= | |
// Exception types | |
/** Raised if the type inference algorithm cannot infer types successfully */ | |
class TypeError extends Exception | |
{ | |
function __construct($message) { | |
$this->message = $message; | |
} | |
function __toString() { | |
return (string) $this->message; | |
} | |
} | |
/** Raised if the type environment supplied for is incomplete */ | |
class ParseError extends Exception | |
{ | |
function __construct($message) { | |
$this->message = $message; | |
} | |
function __toString() { | |
return (string) $this->message; | |
} | |
} | |
//======================================================= | |
// Types and type constructors | |
/** | |
* A type variable standing for an arbitrary type. | |
* | |
* All type variables have a unique id, but names are only assigned lazily, | |
* when required. | |
*/ | |
class TypeVariable | |
{ | |
static $next_variable_id = 0; | |
static $next_variable_name = 'a'; | |
function __construct() { | |
$this->id = TypeVariable::$next_variable_id; | |
TypeVariable::$next_variable_id += 1; | |
$this->instance = null; | |
$this->name = null; | |
} | |
/** Names are allocated to TypeVariables lazily, so that only TypeVariables present */ | |
function getName() { | |
if ($this->name === null) { | |
$this->name = TypeVariable::$next_variable_name; | |
TypeVariable::$next_variable_name = chr( ord(TypeVariable::$next_variable_name) + 1); | |
} | |
return $this->name; | |
} | |
function __toString() { | |
if ($this->instance === null) { | |
return (string) $this->instance; | |
} else { | |
return $this->name; | |
} | |
} | |
function __repr() { | |
return "TypeVariable(id = {$this->id})"; | |
} | |
} | |
/** An n-ary type constructor which builds a new type from old */ | |
class TypeOperator | |
{ | |
function __construct($name, $types) { | |
$this->name = $name; | |
if (is_array($types)) | |
$this->types = $types; | |
else | |
$this->types = array($types); | |
} | |
function __toString() { | |
$num_types = count($this->types); | |
if ($num_types === 0) { | |
return $this->name; | |
} else if ($num_types == 2) { | |
return (string) $this->types[0] . " " . $this->name . " " . (string) $this->types[1]; | |
} else { | |
return ($this->name . ' ' . implode(" ", $this->types)); | |
} | |
} | |
} | |
/** A binary type constructor which builds function types */ | |
class Func extends TypeOperator | |
{ | |
function __construct($from_type, $to_type) { | |
parent::__construct("->", $from_type, $to_type); | |
} | |
} | |
// Basic types are constructed with a nullary type constructor | |
$Integer = new TypeOperator("int", array()); # Basic integer | |
$Bool = new TypeOperator("bool", array()); # Basic bool | |
$String = new TypeOperator("string", array()); | |
//=======================================================# | |
// Type inference machinery | |
/** | |
* Computes the type of the expression given by node. | |
* | |
* The type of the node is computed in the context of the context of the | |
* supplied type environment env. Data types can be introduced into the | |
* language simply by having a predefined set of identifiers in the initial | |
* environment. environment; this way there is no need to change the syntax or, more | |
* importantly, the type-checking program when extending the language. | |
* | |
* Args: | |
* node: The root of the abstract syntax tree. | |
* env: The type environment is a mapping of expression identifier names | |
* to type assignments. | |
* to type assignments. | |
* non_generic: A set of non-generic variables, or None | |
* | |
* Returns: | |
* The computed type of the expression. | |
* | |
*/ | |
function analyse($node, $env, $non_generic=null) { | |
if ($non_generic === null) | |
$non_generic = array(); | |
if ($node instanceof Ident) { | |
return readType($node->name, $env, $non_generic); | |
} | |
else if ($node instanceof Apply) { | |
$fun_type = analyse( $node->fn, $env, $non_generic ); | |
$arg_type = analyse( $node->arg, $env, $non_generic ); | |
$result_type = new TypeVariable(); | |
unify(new Func($arg_type, $result_type), $fun_type); | |
return $result_type; | |
} | |
else if ($node instanceof Lambda) { | |
$arg_type = new TypeVariable(); | |
$new_env = $env; | |
$new_env[$node->v] = $arg_type; | |
$new_non_generic = $non_generic; | |
$new_non_generic[] = $arg_type; | |
$result_type = analyse($node->body, $new_env, $new_non_generic); | |
return new Func($arg_type, $result_type); | |
} | |
else if ($node instanceof Let) { | |
$defn_type = analyse($node->defn, $env, $non_generic); | |
$new_env = $env; | |
$new_env[$node->v] = $defn_type; | |
return analyse($node->body, $new_env, $non_generic); | |
} | |
else if ($node instanceof Letrec) { | |
$new_type = new TypeVariable(); | |
$new_env = $env; | |
$new_env[$node->v] = $new_type; | |
$new_non_generic = $non_generic; | |
$new_non_generic[] = $new_type; | |
$defn_type = analyse($node->defn, $new_env, $new_non_generic); | |
unify($new_type, $defn_type); | |
return analyse($node->body, $new_env, $non_generic); | |
} | |
die("Unhandled syntax node " . gettype(t)); | |
} | |
/** | |
* Get the type of identifier name from the type environment env. | |
* | |
* Args: | |
* name: The identifier name | |
* env: The type environment mapping from identifier names to types | |
* non_generic: A set of non-generic TypeVariables | |
* | |
* Raises: | |
* ParseError: Raised if name is an undefined symbol in the type | |
* environment. | |
*/ | |
function readType($name, $env, $non_generic) { | |
global $Integer; | |
if (isset($env[$name])) { | |
return fresh($env[$name], $non_generic); | |
} elseif (ctype_digit($name) || is_int($name)) { | |
return $Integer; | |
} else { | |
throw new Exception("Undefined symbol $name"); | |
} | |
} | |
/** | |
* Makes a copy of a type expression. | |
* | |
* The type t is copied. The the generic variables are duplicated and the | |
* non_generic variables are shared. | |
* | |
* Args: | |
* t: A type to be copied. | |
* non_generic: A set of non-generic TypeVariables | |
*/ | |
function fresh( $t, $non_generic) { | |
$mappings = array(); # A mapping of TypeVariables to TypeVariables | |
$freshrec = | |
function ($tp) use (&$mappings, &$non_generic, &$freshrec) { | |
$p = prune($tp); | |
if ($p instanceof TypeVariable) { | |
if (isGeneric($p, $non_generic)) { | |
if (count($mappings) === 0 || !isset($mappings[$p])) { | |
@$mappings[$p] = new TypeVariable(); | |
} | |
return @$mappings[$p]; | |
} else { | |
return $p; | |
} | |
} | |
else if ($p instanceof TypeOperator) { | |
$fresh_types = array(); | |
foreach($p->types as $x) { | |
$fresh_types[] = $freshrec($x); | |
} | |
return new TypeOperator($p->name, $fresh_types); | |
} | |
}; | |
return $freshrec($t); | |
} | |
/** | |
* helper function to zip in php | |
*/ | |
function zip() { | |
$args = func_get_args(); | |
$zipped = array(); | |
$n = count($args); | |
for ($i=0; $i<$n; ++$i) { | |
reset($args[$i]); | |
} | |
while ($n) { | |
$tmp = array(); | |
for ($i=0; $i<$n; ++$i) { | |
if (key($args[$i]) === null) { | |
break 2; | |
} | |
$tmp[] = current($args[$i]); | |
next($args[$i]); | |
} | |
$zipped[] = $tmp; | |
} | |
return $zipped; | |
} | |
/** | |
* Unify the two types t1 and t2. | |
* | |
* Makes the types t1 and t2 the same. | |
* | |
* Args: | |
* t1: The first type to be made equivalent | |
* t2: The second type to be be equivalent | |
* | |
* Returns: | |
* None | |
* | |
* Raises: | |
* TypeError: Raised if the types cannot be unified. | |
*/ | |
function unify($t1, $t2) | |
{ | |
$a = prune($t1); | |
$b = prune($t2); | |
if ($a instanceof TypeVariable) { | |
if ($a != $b) { | |
if (occursInType($a, $b)) | |
throw new TypeError("recursive unification"); | |
$a->instance = $b; | |
} | |
} else if (($a instanceof TypeOperator) and ($b instanceof TypeVariable)) { | |
unify($b, $a); | |
} else if (($a instanceof TypeOperator) and ($b instanceof TypeOperator)) { | |
if ( ($a->name != $b->name) or (count($a->types) != count($b->types))) | |
throw new TypeError("Type mismatch: ".(string)$a." != ".(string)$b); | |
foreach( zip($a->types, $b->types) as $row) { | |
unify($row[0], $row[1]); | |
} | |
} else { | |
die("Not unified"); | |
} | |
} | |
/** | |
* Returns the currently defining instance of t. | |
* | |
* As a side effect, collapses the list of type instances. The function Prune | |
* is used whenever a type expression has to be inspected: it will always | |
* return a type expression which is either an uninstantiated type variable or | |
* a type operator; i.e. it will skip instantiated variables, and will | |
* actually prune them from expressions to remove long chains of instantiated | |
* variables. | |
* | |
* Args: | |
* t: The type to be pruned | |
* | |
* Returns: | |
* An uninstantiated TypeVariable or a TypeOperator | |
*/ | |
function prune($t) { | |
if ($t instanceof TypeVariable) { | |
if ($t->instance !== null) { | |
$t->instance = prune($t->instance); | |
return $t->instance; | |
} | |
} | |
return $t; | |
} | |
/** | |
* Checks whether a given variable occurs in a list of non-generic variables | |
* | |
* Note that a variables in such a list may be instantiated to a type term, | |
* in which case the variables contained in the type term are considered | |
* non-generic. | |
* | |
* Note: Must be called with v pre-pruned | |
* | |
* Args: | |
* v: The TypeVariable to be tested for genericity | |
* non_generic: A set of non-generic TypeVariables | |
* | |
* Returns: | |
* True if v is a generic variable, otherwise False | |
*/ | |
function isGeneric($v, $non_generic) { | |
return !(occursIn($v, $non_generic)); | |
} | |
/** | |
* Checks whether a type variable occurs in a type expression. | |
* | |
* Note: Must be called with v pre-pruned | |
* | |
* Args: | |
* v: The TypeVariable to be tested for | |
* type2: The type in which to search | |
* | |
* Returns: | |
* True if v occurs in type2, otherwise False | |
*/ | |
function occursInType($v, $type2) { | |
$pruned_type2 = prune($type2); | |
if ($pruned_type2 === $v) | |
return true; | |
else if ($pruned_type2 instanceof TypeOperator) | |
return occursIn($v, $pruned_type2->types); | |
return false; | |
} | |
/** | |
* Checks whether a types variable occurs in any other types. | |
* | |
* Args: | |
* v: The TypeVariable to be tested for | |
* types: The sequence of types in which to search | |
* | |
* Returns: | |
* True if t occurs in any of types, otherwise False | |
*/ | |
function occursIn($t, $types) { | |
foreach($types as $t2) { | |
if (occursInType($t, $t2) === true) | |
return true; | |
} | |
return false; | |
} | |
//==================================================================# | |
// Example code to infrastructure above | |
/** | |
* Try to evaluate a type printing the result or reporting errors. | |
* | |
* Args: | |
* env: The type environment in which to evaluate the expression. | |
* node: The root node of the abstract syntax tree of the expression. | |
* | |
* Returns: | |
* None | |
*/ | |
function tryExp($env, $node) { | |
print( (string) $node . " : "); | |
try { | |
$t = analyse($node, $env); | |
print( (string) $t); | |
} catch (ParseError $e) { | |
print($e); | |
} catch (TypeError $e) { | |
print($e); | |
} | |
} | |
$var1 = new TypeVariable(); | |
$var2 = new TypeVariable(); | |
$pair_type = new TypeOperator("*", array($var1, $var2)); | |
$var3 = new TypeVariable(); | |
$my_env = array( | |
"pair" => new Func($var1, new Func($var2, $pair_type)), | |
"true" => $Bool, | |
"cond" => new Func($Bool, new Func($var3, new Func($var3, $var3))), | |
"zero" => new Func($Integer, $Bool), | |
"pred" => new Func($Integer, $Integer), | |
"times"=> new Func($Integer, new Func($Integer, $Integer)), | |
"concat" => new Func($String, new Func($String, $String)) | |
); | |
$pair = new Apply(new Apply(new Ident("pair"), new Apply(new Ident("f"), new Ident("4"))), new Apply(new Ident("f"), new Ident("true"))); | |
// This expression should pass | |
tryExp($my_env, | |
new Letrec("factorial", # letrec factorial = | |
new Lambda("n", # fn n => | |
new Apply( | |
new Apply( # cond (zero n) 1 | |
new Apply(new Ident("cond"), # cond (zero n) | |
new Apply(new Ident("zero"), new Ident("n"))), | |
new Ident("1")), | |
new Apply( # times n | |
new Apply(new Ident("times"), new Ident("n")), | |
new Apply(new Ident("factorial"), | |
new Apply(new Ident("pred"), new Ident("n"))) | |
) | |
) | |
), # in | |
new Apply(new Ident("factorial"), new Ident("5")) | |
) | |
); | |
echo "\n\n"; | |
# This expression should fail | |
tryExp($my_env, | |
# fn x => (pair(x(3) (x(true))) | |
new Lambda("x", | |
new Apply( | |
new Apply(new Ident("pair"), | |
new Apply(new Ident("x"), new Ident("3"))), | |
new Apply(new Ident("x"), new Ident("true")))) | |
); | |
/* OUTPUT: | |
(letrec factorial = (fn n => (((cond (zero n)) 1) ((times n) (factorial (pred n))))) in (factorial 5)) : | |
(fn x => ((pair (x 3)) (x true))) : Type mismatch: bool != int | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment