paulmillr · July 17, 2012 17:39 · vendethiel · Jul 17, 2012 · paulmillr · Jul 17, 2012
diff --git a/type-inference.coffee b/type-inference.coffee
 # Algorithm W (Damas-Hindley-Milner) in LiveScript.
 # By Paul Miller (paulmillr.com), Public domain.
 #
 # Based on Robert Smallshire's [Python code](http://bit.ly/bbVmmX).
 # Which is based on Andrew's [Scala code](http://bit.ly/aztXwD).
 # Which is based on Nikita Borisov's [Perl code](http://bit.ly/myq3uA).
 # Which is based on Luca Cardelli's [Modula-2 code](http://bit.ly/Hjpvb).
 # Something like that.

 prelude = require './prelude'
 prelude.install-prelude global

 class Set
  ->
    @keys = []
    @length = 0
    Object.seal(this)

  has: (key) ->
    @keys.index-of(key) >= 0

  push: (key) ->
    unless @has key
      @keys.push key
      @length += 1

  map: (fn) -> @keys.map(fn)
  some: (fn) -> @keys.some(fn)

 clone = (obj) ->
  if not obj? or typeof obj isnt 'object'
    obj
  else if obj instanceof Date
    new Date obj.get-time!
  else if obj instanceof RegExp
    flags = ''
    flags += 'g' if obj.global?
    flags += 'i' if obj.ignoreCase?
    flags += 'm' if obj.multiline?
    flags += 'y' if obj.sticky?
    new RegExp obj.source, flags 
  else
    newInstance = new obj.constructor!
    for key of obj
      newInstance[key] = clone obj[key]
    newInstance

 calls = Object.create(null)
 log-call = (fn) ->
  calls[fn] ?= 0
  calls[fn] += 1

 # λ abstraction.
 class Lambda
  (@v, @body) ->
    return

  to-string: ->
    "(fn #{@v} => #{@body})"

 # Identifier.
 class Ident
  (@name) ->
    return

  get-name: ->
    @name

  to-string: ->
    @name

 # Function application
 class Apply
  (@fn, @arg) ->
    return

  to-string: ->
    "(#{@fn} #{@arg})"

 # Let binding
 class Let
  (@v, @defn, @body) ->
    return

  to-string: ->
    "(let #{@v} = #{@defn} in #{@body})"

 # Letrec binding.
 class Letrec
  (@v, @defn, @body) ->
    return

  to-string: ->
    "(letrec #{@v} = #{@defn} in #{@body})"

 # Exception types
 # ---------------

 Type-error = Error
 Parse-error = Error

 # Raised if the type inference algorithm cannot infer types successfully.
 # class Type-error extends Error

 # Raised if the type environment supplied for is incomplete
 # class Parse-error extends Error

 # Types and type constructors
 # ---------------------------

 # A type variable standing for an arbitrary type.
 # All type variables have a unique id, but names are only assigned lazily,
 # when required.
 class Type-variable
  next-variable-id = 0
  next-variable-name-code = 96

  ->
    @id = next-variable-id
    next-variable-id += 1
    @instance = null
    @name = null

  get-name: ->
    next-variable-name-code += 1
    @name ?= String.from-char-code next-variable-name-code

  to-string: ->
    if @instance?
      @instance.to-string!
    else
      @get-name!

 # An n-ary type constructor which builds a new type from old.
 class Type-operator
  (@name, @types) ->
    return

  get-name: ->
    @name

  to-string: ->
    num-types = @types.length
    if num-types is 0
      @name
    else if num-types is 2
      "(#{@types[0]} #{@name} #{@types[1]})"
    else
      "#{name} #{' '.join(@types)}"

 # A binary type constructor which builds function types.
 class Function extends Type-operator
  (from-type, to-type) ->
    super '->', [from-type, to-type]

 # Basic types are constructed with a nullary type constructor
 Integer = new Type-operator 'int', []  # Basic integer.
 Bool = new Type-operator 'bool', []  # Basic boolean.

 # Type inference machinery
 # ------------------------

 # Computes the type of the expression given by node.
 analyze = (node, env, non-generic = new Set) ->
  if node instanceof Ident
    get-type node.get-name!, env, non-generic
  else if node instanceof Apply
    fun-type = analyze node.fn, env, non-generic
    arg-type = analyze node.arg, env, non-generic
    result-type = new Type-variable()
    unify new Function(arg-type, result-type), fun-type
    result-type
  else if node instanceof Lambda
    arg-type = new Type-variable()
    new-env = clone env
    new-env[node.v] = arg-type
    new-non-generic = clone non-generic
    new-non-generic.push arg-type
    result-type = analyze node.body, new-env, new-non-generic
    new Function arg-type, result-type
  else if node instanceof Let
    defn-type = analyze node.defn, env, non-generic
    new-env = clone env
    new-env[node.v] = defn-type
    analyze node.body, new-env, non-generic
  else if node instanceof Letrec
    new-type = new Type-variable()
    new-env = clone env
    new-env[node.v] = new-type
    new-non-generic = clone non-generic
    new-non-generic.push new-type
    defn-type = analyze node.defn, new-env, new-non-generic
    unify new-type, defn-type
    analyze node.body, new-env, non-generic
  else
    throw new Error "Unhandled syntax node #{node}"

 # Get the type of identifier name from the type environment env.
 get-type = (name, env, non-generic) ->
  if name of env
    fresh env[name], non-generic
  else if is-integer-literal name
    Integer
  else
    throw new ParseError "Undefined symbol #{name}"

 fresh = (type, non-generic) ->
  mappings = {}
  freshrec = (type) ->
    pruned = prune type
    if pruned instanceof Type-variable
      if is-generic pruned, non-generic
        if pruned.id not of mappings
          log-call('notin')
          mappings[pruned.id] = new Type-variable()
        mappings[pruned.id]
      else
        pruned
    else if pruned instanceof Type-operator
      new Type-operator pruned.get-name!, (pruned.types.map freshrec)
  freshrec type

 # Unify the two types type1 and type2.
 # Makes the types t1 and t2 the same.
 #
 # type1 - first type to be made equivalent.
 # type2 - second type to be made equivalent.
 #
 # Returns nothing.
 # Throws TypeError if the types cannot be unified.
 unify = (type1, type2) ->
  a = prune type1
  b = prune type2
  if a instanceof Type-variable and a isnt b
    throw new TypeError 'recursive unification' if occurs-in-type a, b
    a.instance = b
  else if a instanceof Type-operator and b instanceof Type-variable
    unify b, a
  else if a instanceof Type-operator and b instanceof Type-operator
    if (a.get-name! isnt b.get-name!) or (a.types.length isnt b.types.length)
      throw new TypeError "Type mismatch: #{a} != #{b}"
    zip(a.types, b.types).forEach ([p, q]) ->
      unify p, q
  else
    throw new Error 'Not unified'
  return

 # Returns the currently defining instance of type.
 # As a side effect, collapses the list of type instances. The function Prune
 # is used whenever a type expression has to be inspected: it will always
 # return a type expression which is either an uninstantiated type variable or
 # a type operator; i.e. it will skip instantiated variables, and will
 # actually prune them from expressions to remove long chains of instantiated
 # variables.
 prune = (type) ->
  log-call('prune')
  if type instanceof Type-variable and type.instance?
    type.instance = prune type.instance
  else
    type

 # Checks whether a type variable occurs in a type expression.
 occurs-in-type = (variable, type-expr) ->
  pruned-expr = prune type-expr
  if pruned-expr is variable
    yes
  else if pruned-expr instanceof Type-operator
    occurs-in variable, pruned-expr.types
  else
    no

 # Checks whether a types variable occurs in any other types.
 occurs-in = (type, types) ->
  types.some (sub-type) -> occurs-in-type type, sub-type

 # Checks whether a given variable occurs in a list of non-generic variables.
 is-generic = (variable, non-generic) ->
  not occurs-in variable, non-generic

 # Checks whether name is an integer literal string.
 is-integer-literal = (name) ->
  not isNaN parse-int name, 10

 # Example code to exercise the above.
 # -----------------------------------

 try-exp = (env, node) ->
  process.stdout.write "#{node}: "
  try
    t = analyze node, env
    process.stdout.write "#{t}\n"
  catch error
    process.stdout.write "Error! #{error.message}\n"

 main = ->
  var1 = new Type-variable()
  var2 = new Type-variable()
  pair-type = new Type-operator '*', [var1, var2]

  var3 = new Type-variable()

  my-env =
    pair: new Function var1, new Function var2, pair-type
    true: Bool
    cond: new Function Bool, new Function var3, new Function var3, var3
    zero: new Function Integer, Bool
    pred: new Function Integer, Integer
    times: new Function Integer, new Function Integer, Integer

  pair = new Apply(new Apply(new Ident("pair"), new Apply(new Ident("f"), new Ident("4"))), new Apply(new Ident("f"), new Ident("true")))

  examples = [
    # factorial
    new Letrec('factorial', # letrec factorial =
      new Lambda('n',    # fn n =>
        new Apply(
          new Apply(   # cond (zero n) 1
            new Apply(new Ident('cond'),     # cond (zero n)
              new Apply(new Ident('zero'), new Ident('n'))),
            new Ident('1')),
          new Apply(    # times n
            new Apply(new Ident('times'), new Ident('n')),
            new Apply(new Ident('factorial'),
              new Apply(new Ident('pred'), new Ident('n')))
          )
        )
      ),      # in
      new Apply(new Ident('factorial'), new Ident('5'))
    ),
            
    # Should fail:
    # fn x => (pair(x(3) (x(true)))
    new Lambda('x',
      new Apply(
        new Apply(new Ident('pair'),
          new Apply(new Ident('x'), new Ident('3'))),
        new Apply(new Ident('x'), new Ident('true')))),
            
    # pair(f(3), f(true))
    new Apply(
      new Apply(new Ident('pair'), new Apply(new Ident('f'), new Ident('4'))),
      new Apply(new Ident('f'), new Ident('true'))),


    # let f = (fn x => x) in ((pair (f 4)) (f true))
    new Let('f', new Lambda('x', new Ident('x')), pair),

    # fn f => f f (fail)
    new Lambda('f', new Apply(new Ident('f'), new Ident('f'))),

    # let g = fn f => 5 in g g
    new Let('g',
      new Lambda('f', new Ident('5')),
      new Apply(new Ident('g'), new Ident('g'))),

    # example that demonstrates generic and non-generic variables:
    # fn g => let f = fn x => g in pair (f 3, f true)
    # Buggy!
    # Expected: (a -> (a * a))
    # Actual: (a -> (b * c))
    new Lambda('g',
      new Let('f',
        new Lambda('x', new Ident('g')),
        new Apply(
          new Apply(new Ident('pair'),
            new Apply(new Ident('f'), new Ident('3'))
          ),
          new Apply(new Ident('f'), new Ident('true'))))),

    # Function composition
    # fn f (fn g (fn arg (f g arg)))
    # Buggy!
    # Expected: ((b -> c) -> ((c -> d) -> (b -> d)))
    # Actual: (d -> (e -> (f -> g)))
    new Lambda('f', new Lambda('g', new Lambda('arg', new Apply(new Ident('g'), new Apply(new Ident('f'), new Ident('arg'))))))
  ]

  for example in examples
    try-exp my-env, example
  for k, v of calls 
    console.log "\n#{k}=#{v}"

 main()
	# Algorithm W (Damas-Hindley-Milner) in LiveScript.
	# By Paul Miller (paulmillr.com), Public domain.
	#
	# Based on Robert Smallshire's [Python code](http://bit.ly/bbVmmX).
	# Which is based on Andrew's [Scala code](http://bit.ly/aztXwD).
	# Which is based on Nikita Borisov's [Perl code](http://bit.ly/myq3uA).
	# Which is based on Luca Cardelli's [Modula-2 code](http://bit.ly/Hjpvb).
	# Something like that.

	prelude = require './prelude'
	prelude.install-prelude global

	class Set
	->
	@keys = []
	@length = 0
	Object.seal(this)

	has: (key) ->
	@keys.index-of(key) >= 0

	push: (key) ->
	unless @has key
	@keys.push key
	@length += 1

	map: (fn) -> @keys.map(fn)
	some: (fn) -> @keys.some(fn)

	clone = (obj) ->
	if not obj? or typeof obj isnt 'object'
	obj
	else if obj instanceof Date
	new Date obj.get-time!
	else if obj instanceof RegExp
	flags = ''
	flags += 'g' if obj.global?
	flags += 'i' if obj.ignoreCase?
	flags += 'm' if obj.multiline?
	flags += 'y' if obj.sticky?
	new RegExp obj.source, flags
	else
	newInstance = new obj.constructor!
	for key of obj
	newInstance[key] = clone obj[key]
	newInstance

	calls = Object.create(null)
	log-call = (fn) ->
	calls[fn] ?= 0
	calls[fn] += 1

	# λ abstraction.
	class Lambda
	(@v, @body) ->
	return

	to-string: ->
	"(fn #{@v} => #{@body})"

	# Identifier.
	class Ident
	(@name) ->
	return

	get-name: ->
	@name

	to-string: ->
	@name

	# Function application
	class Apply
	(@fn, @arg) ->
	return

	to-string: ->
	"(#{@fn} #{@arg})"

	# Let binding
	class Let
	(@v, @defn, @body) ->
	return

	to-string: ->
	"(let #{@v} = #{@defn} in #{@body})"

	# Letrec binding.
	class Letrec
	(@v, @defn, @body) ->
	return

	to-string: ->
	"(letrec #{@v} = #{@defn} in #{@body})"

	# Exception types
	# ---------------

	Type-error = Error
	Parse-error = Error

	# Raised if the type inference algorithm cannot infer types successfully.
	# class Type-error extends Error

	# Raised if the type environment supplied for is incomplete
	# class Parse-error extends Error

	# Types and type constructors
	# ---------------------------

	# A type variable standing for an arbitrary type.
	# All type variables have a unique id, but names are only assigned lazily,
	# when required.
	class Type-variable
	next-variable-id = 0
	next-variable-name-code = 96

	->
	@id = next-variable-id
	next-variable-id += 1
	@instance = null
	@name = null

	get-name: ->
	next-variable-name-code += 1
	@name ?= String.from-char-code next-variable-name-code

	to-string: ->
	if @instance?
	@instance.to-string!
	else
	@get-name!

	# An n-ary type constructor which builds a new type from old.
	class Type-operator
	(@name, @types) ->
	return

	get-name: ->
	@name

	to-string: ->
	num-types = @types.length
	if num-types is 0
	@name
	else if num-types is 2
	"(#{@types[0]} #{@name} #{@types[1]})"
	else
	"#{name} #{' '.join(@types)}"

	# A binary type constructor which builds function types.
	class Function extends Type-operator
	(from-type, to-type) ->
	super '->', [from-type, to-type]

	# Basic types are constructed with a nullary type constructor
	Integer = new Type-operator 'int', [] # Basic integer.
	Bool = new Type-operator 'bool', [] # Basic boolean.

	# Type inference machinery
	# ------------------------

	# Computes the type of the expression given by node.
	analyze = (node, env, non-generic = new Set) ->
	if node instanceof Ident
	get-type node.get-name!, env, non-generic
	else if node instanceof Apply
	fun-type = analyze node.fn, env, non-generic
	arg-type = analyze node.arg, env, non-generic
	result-type = new Type-variable()
	unify new Function(arg-type, result-type), fun-type
	result-type
	else if node instanceof Lambda
	arg-type = new Type-variable()
	new-env = clone env
	new-env[node.v] = arg-type
	new-non-generic = clone non-generic
	new-non-generic.push arg-type
	result-type = analyze node.body, new-env, new-non-generic
	new Function arg-type, result-type
	else if node instanceof Let
	defn-type = analyze node.defn, env, non-generic
	new-env = clone env
	new-env[node.v] = defn-type
	analyze node.body, new-env, non-generic
	else if node instanceof Letrec
	new-type = new Type-variable()
	new-env = clone env
	new-env[node.v] = new-type
	new-non-generic = clone non-generic
	new-non-generic.push new-type
	defn-type = analyze node.defn, new-env, new-non-generic
	unify new-type, defn-type
	analyze node.body, new-env, non-generic
	else
	throw new Error "Unhandled syntax node #{node}"

	# Get the type of identifier name from the type environment env.
	get-type = (name, env, non-generic) ->
	if name of env
	fresh env[name], non-generic
	else if is-integer-literal name
	Integer
	else
	throw new ParseError "Undefined symbol #{name}"

	fresh = (type, non-generic) ->
	mappings = {}
	freshrec = (type) ->
	pruned = prune type
	if pruned instanceof Type-variable
	if is-generic pruned, non-generic
	if pruned.id not of mappings
	log-call('notin')
	mappings[pruned.id] = new Type-variable()
	mappings[pruned.id]
	else
	pruned
	else if pruned instanceof Type-operator
	new Type-operator pruned.get-name!, (pruned.types.map freshrec)
	freshrec type

	# Unify the two types type1 and type2.
	# Makes the types t1 and t2 the same.
	#
	# type1 - first type to be made equivalent.
	# type2 - second type to be made equivalent.
	#
	# Returns nothing.
	# Throws TypeError if the types cannot be unified.
	unify = (type1, type2) ->
	a = prune type1
	b = prune type2
	if a instanceof Type-variable and a isnt b
	throw new TypeError 'recursive unification' if occurs-in-type a, b
	a.instance = b
	else if a instanceof Type-operator and b instanceof Type-variable
	unify b, a
	else if a instanceof Type-operator and b instanceof Type-operator
	if (a.get-name! isnt b.get-name!) or (a.types.length isnt b.types.length)
	throw new TypeError "Type mismatch: #{a} != #{b}"
	zip(a.types, b.types).forEach ([p, q]) ->
	unify p, q
	else
	throw new Error 'Not unified'
	return

	# Returns the currently defining instance of type.
	# As a side effect, collapses the list of type instances. The function Prune
	# is used whenever a type expression has to be inspected: it will always
	# return a type expression which is either an uninstantiated type variable or
	# a type operator; i.e. it will skip instantiated variables, and will
	# actually prune them from expressions to remove long chains of instantiated
	# variables.
	prune = (type) ->
	log-call('prune')
	if type instanceof Type-variable and type.instance?
	type.instance = prune type.instance
	else
	type

	# Checks whether a type variable occurs in a type expression.
	occurs-in-type = (variable, type-expr) ->
	pruned-expr = prune type-expr
	if pruned-expr is variable
	yes
	else if pruned-expr instanceof Type-operator
	occurs-in variable, pruned-expr.types
	else
	no

	# Checks whether a types variable occurs in any other types.
	occurs-in = (type, types) ->
	types.some (sub-type) -> occurs-in-type type, sub-type

	# Checks whether a given variable occurs in a list of non-generic variables.
	is-generic = (variable, non-generic) ->
	not occurs-in variable, non-generic

	# Checks whether name is an integer literal string.
	is-integer-literal = (name) ->
	not isNaN parse-int name, 10

	# Example code to exercise the above.
	# -----------------------------------

	try-exp = (env, node) ->
	process.stdout.write "#{node}: "
	try
	t = analyze node, env
	process.stdout.write "#{t}\n"
	catch error
	process.stdout.write "Error! #{error.message}\n"

	main = ->
	var1 = new Type-variable()
	var2 = new Type-variable()
	pair-type = new Type-operator '*', [var1, var2]

	var3 = new Type-variable()

	my-env =
	pair: new Function var1, new Function var2, pair-type
	true: Bool
	cond: new Function Bool, new Function var3, new Function var3, var3
	zero: new Function Integer, Bool
	pred: new Function Integer, Integer
	times: new Function Integer, new Function Integer, Integer

	pair = new Apply(new Apply(new Ident("pair"), new Apply(new Ident("f"), new Ident("4"))), new Apply(new Ident("f"), new Ident("true")))

	examples = [
	# factorial
	new Letrec('factorial', # letrec factorial =
	new Lambda('n', # fn n =>
	new Apply(
	new Apply( # cond (zero n) 1
	new Apply(new Ident('cond'), # cond (zero n)
	new Apply(new Ident('zero'), new Ident('n'))),
	new Ident('1')),
	new Apply( # times n
	new Apply(new Ident('times'), new Ident('n')),
	new Apply(new Ident('factorial'),
	new Apply(new Ident('pred'), new Ident('n')))
	)
	)
	), # in
	new Apply(new Ident('factorial'), new Ident('5'))
	),

	# Should fail:
	# fn x => (pair(x(3) (x(true)))
	new Lambda('x',
	new Apply(
	new Apply(new Ident('pair'),
	new Apply(new Ident('x'), new Ident('3'))),
	new Apply(new Ident('x'), new Ident('true')))),

	# pair(f(3), f(true))
	new Apply(
	new Apply(new Ident('pair'), new Apply(new Ident('f'), new Ident('4'))),
	new Apply(new Ident('f'), new Ident('true'))),


	# let f = (fn x => x) in ((pair (f 4)) (f true))
	new Let('f', new Lambda('x', new Ident('x')), pair),

	# fn f => f f (fail)
	new Lambda('f', new Apply(new Ident('f'), new Ident('f'))),

	# let g = fn f => 5 in g g
	new Let('g',
	new Lambda('f', new Ident('5')),
	new Apply(new Ident('g'), new Ident('g'))),

	# example that demonstrates generic and non-generic variables:
	# fn g => let f = fn x => g in pair (f 3, f true)
	# Buggy!
	# Expected: (a -> (a * a))
	# Actual: (a -> (b * c))
	new Lambda('g',
	new Let('f',
	new Lambda('x', new Ident('g')),
	new Apply(
	new Apply(new Ident('pair'),
	new Apply(new Ident('f'), new Ident('3'))
	),
	new Apply(new Ident('f'), new Ident('true'))))),

	# Function composition
	# fn f (fn g (fn arg (f g arg)))
	# Buggy!
	# Expected: ((b -> c) -> ((c -> d) -> (b -> d)))
	# Actual: (d -> (e -> (f -> g)))
	new Lambda('f', new Lambda('g', new Lambda('arg', new Apply(new Ident('g'), new Apply(new Ident('f'), new Ident('arg'))))))
	]

	for example in examples
	try-exp my-env, example
	for k, v of calls
	console.log "\n#{k}=#{v}"

	main()