abadger · February 5, 2021 00:54
diff --git a/gistfile1.txt b/gistfile1.txt
 # I've been trying to reconcile Python's concept of variables as references to values instead of
 # the values themselves with type annotations.  I think I've hit upon some ideas.  Unfortunately,
 # they aren't all compatible with Python's current type annotation syntax.
 #
 # Background: References vs values
 # ================================
 #
 # In C, you might write some code like this::
 #
 #     #include<stdio.h>
 #
 #     typedef struct DataStruct {
 #         int test;
 #     } DataStruct_t;
 #
 #     int main() {
 #         DataStruct_t variable1;
 #         DataStruct_t variable2;
 #         variable1.test = 1;
 #         variable2 = variable1;
 #         variable2.test = 2;
 #         printf("%d\n", variable1.test);  /* 1 */
 #         printf("%d\n", variable2.test);  /* 2 */
 #         variable1 = 1;   /* Error */
 #         return 0;
 #     }
 #
 # `variable1` and `variable2` **are** both DataStructs.  They contain instances of the
 # DataStructure.  `variable2 = variable1` means that the data in `variable1` is copied into
 # `variable2`.  You can perform this copy when the types of the two variables are the same.
 # The `variable1 = 1` statement is an error precisely because `variable1` is a `DataStruct` but `1` is not.
 # Copying of the value, `1`, cannot happen because it would put the integer value 1 into the memory
 # space which should hold a `DataStruct` which would make no sense.
 #
 # In Python, the variables are **not** values.  They are actually labels which **reference**
 # a value.  The label can be rebound to a different value or even type of value at will::
 #
 #    class DataStruct:
 #       def __init__(self):
 #           self.test = 0
 #
 #    variable1 = DataStruct()
 #    variable2 = DataStruct()  # Included to mimic the C code but it doesn't contribute anything
 #    variable1.test = 1
 #    variable2 = variable1
 #    variable2.test = 2
 #    print(variable1.test)  # 2
 #    print(variable2.test)  # 2
 #    variable1 = 3
 #    print(variable1)  # 3
 #    print(variable2.test)  # 2
 #
 # This is different than C because in C, the **variables contain** the instances of `DataStruct`.
 # When assignment happens, the data is copied from one of those instances to the other.  In Python,
 # the variables are merely labels which refer to the two instances of `DataStruct`.  When assignment
 # happens, the label is rebound to refer to a different value.  So `variable2 = variable1` rebinds
 # `variable2` to the same `DataStruct` instance as `variable1` refers to.  `variable1 = 3` rebinds
 # `variable1` to refer to the integer value `3`.  The variables and their underlying instances are separate.
 #
 # How does this interact with type annotations?
 # =============================================
 #
 # Aligning annotations with rebound variables
 # -------------------------------------------
 #
 # When we speak informally about type annotations, we often say that the type annotation tells the
 # type checker what the type of the variable is.  This does not map well to Python's concept of
 # variables as the variable is just a label.  A label doesn't have a type.  Instead, the type belongs to
 # the value that the variable refers to.  Over a variable's lifetime, it could refer to multiple
 # different values, each with different types.  So the type annotation for variables could change
 # whenever the variable is assigned to a different value.
 #
 # Currently, mypy doesn't acknowledge this.  If you try to define a variable when it is assigned a type
 # with a different value, it will error:
 #
 #     $ cat test.py
 #     contents = '1\n2\n'    
 #     a: List[str] = contents.splitlines()
 #     a: List[int] = [int(d) for d in a]
 #     $ mypy test.py
 #     test.py:5: error: Name 'a' already defined on line 4
 #
 # But I think to work with the syntax that Python provides, annotations should be able to look something like this:
 #
 #   with open('test') as f:
 #       variable: str = f.read()
 #       variable: List[str] = variable.splitlines()
 #       variable: List[int] = [int(line) for line in variable]
 #
 # Every time the variable is assigned a new type, a new annotation is used to show what type we expect.
 # We could change our code to use temporary variables instead of each subsequent call in the transformation
 # but since Python doesn't demand that, should we have to?  We're creating new values each time.  We just
 # need the type checker to understand what the contract is that we're asking to be satisfied.
 #
 # Problem: Is this hard to do with static type checking?  Maybe.... mypy currently only allows one
 # definition of a variable, even when, for instance, there is a branch in the code which would mean
 # that only one of the variable definitions could be reached.  This probably addresses cornercases where mypy
 # would have to know about how the condition resolves in order to judge whether the values are
 # allowed in all conditions.
 #
 # But we could still do static analysis in some other common cases: It seems like it would be easy and
 # harmless to allow for duplicate definition if the conditions are the same::
 #
 #   if isinstance(value, str):
 #       variable: List[int] = [int(line) for line in value.splitlines()]
 #   elif isinstance(value, Iterable):
 #       variable: List[int] = [int(element) for element in value]
 #
 # If you can accept that, then what about deciding whether the defintions are
 # the same when you reach the end of a condition::
 #
 #   if isinstance(value, str):
 #       variable: List[str] = value.splitlines()
 #       variable: List[int] = [int(line) for line in variable]
 #   elif isinstance(value, Iterable):
 #       variable: List[int] = [int(line) for line in variable]
 #
 # The good side of this is that you can now type check your contract between the caller and callee
 # at every step.  The con is that that the cornercase still exists, just in a smaller set of
 # circumstances (if you exit the condition with `variable` set to different types in different
 # branches) and that may be harder to inform the user of.
 #
 # Update: pyre and pytype both handle reassignment the way I describe.  So this seems like a limitation of
 # mypy rather than a feature.  I need to test pyright and pycharm's builtin type checkers sometime too.
 #
 # Aligning types with the actual values
 # -------------------------------------
 #
 # Variables are just labels in Python and therefore they don't have types.  Instead, values have
 # types and variables are handy methods for the programmer to reference those values.  One subtlety
 # of this is that the current annotation syntax which places the annotation on the left hand side
 # implies that the **variable** is of type `List[int]` when in fact, it is the **value** on the
 # right hand side which has the type.  The variable only references it.  It feels like the typing
 # information really belongs on the right hand side with the value.  An example alternate syntax
 # which highlights that::
 #
 #   with open('test') as f:
 #       variable = f.read() -> str
 #       variable = variable.splitlines() -> List[str]
 #       variable = [int(line) for line in variable] -> List[int]
 #
 # Examples of other alternate syntax::
 #
 #       variable = str: f.read()
 #       variable = f.read(): str
 #       variable = str <- f.read()
 #       # Multiline like in the ML languages:
 #       variable <- str
 #       variable = f.read()
 #
 # Some further thoughts on this new syntax.... What about when we assign to a single variable
 # multiple times without changing the type?  Should there be a shortcut for that?  Perhaps so.  The
 # above idiom seems common but the below idiom seems equally common::
 #
 #   with open('test') as f:
 #       variable = str: f.read()
 #       variable = variable.strip()
 #       variable = ' '.join(variable.split('\n'))
 #
 # So in this case (which works as expected with mypy currently), the variable stays a `str` the whole
 # way through.  Would it be reasonable for the variable to keep the type of the previous value here?
 # It seems intuitive but I don't know if it interferes with the concept of binding or not.  Thoughts?
 #
 # Is this syntax wartier than the current syntax?  I'm not sure.  One of the things which has
 # bothered me about the current annotation syntax, is that it is optional to use when writing code
 # but its placement makes it required to understand when reading code which is the opposite of
 # Python's core tenet that code is read more frequently than it is written.
 #
 # What do I mean about it being required to understand?  It's that the current annotation sits in
 # the middle of the expression.  So you can't home in on the meat of the parts of the expression
 # that do something and ignore the annotation unless you know what the annotation is.  For instance:
 #
 #   variable: Optional[Union[MutableMapping[str: int], MutableMapping[str: List[int]]]] = data_processor(data)
 #
 # Even though the annotation is optional information, you have to read `variable` at the start of
 # that statement, then know that the next stuff is an annotation that you can skim through until you
 # find the equals sign.  And then you can figure out what `data_processor(data)` is going to put into
 # variable.  It feels like it would be easier to read all the essentials of the function first and
 # then only read the contract second, if and only if you think that your problem is in the contract between caller
 # and callee::
 #
 #   variable = data_processor(data) -> Optional[Union[MutableMapping[str: int], MutableMapping[str:
 #   List[int]]]]
 #
diff --git a/pyre-example.py b/pyre-example.py
 # The pyre type checker actually does what I suggest mypy should do!  It allows redefinition
 # when you rebind a variable name.  It thought of some good ways to deal with the corenercases
 # that I brought up as well.
 import random

 from typing import List

 def example1(filename: str) -> List[int]:
    """Unlike mypy, redefining variables is legal with pyre"""
    with open(filename, 'r') as f:
        result: str = f.read()
        result: List[str] = result.strip().splitlines()
        result: List[int] = [int(line) for line in result]
    return result


 def example2(filename: str) -> List[int]:
    """pyre handles branches by creating Unions.  And then strictly type checks whether the Union or
    only one of the types is expected later"""
    with open(filename, 'r') as f:
        result: str = f.read()
        if random.randint(0, 1):
            result: List[str] = result.strip().splitlines()
            result: List[int] = [int(line) for line in result]
        else:
            result = result
        # After the condition, result can either be str or List[int] so pyre records that
        # as `typing.Union[List[int], str]`.

    # This is now an error because the function is defined as returning List[int], not List[int] or
    # str:
    #  ƛ Found 1 type error!  type_test/test.py:22:4 Incompatible return type [7]: Expected
    #  `List[int]` but got `typing.Union[List[int], str]`.

    return result


 print(test('testdata.txt'))
diff --git a/test-pyre-mypy-pyright-pytype.py b/test-pyre-mypy-pyright-pytype.py
 #!/usr/bin/python3 -tt
 """
 * mypy, pytype, and pyre (pip install pyre-check) are pip installable.
  * In my experience, pyre was the most painful to setup but gave the best results.
  * pytype seems to currently require python-3.7 even though the documentation says it works with
    python-3.8.  Maybe it's unreleased.
 * sudo npm install --global pyright
  * You can create a home directory location for npm stuff but I won't go into that here

 Place this file in a subdirectory, called type_checks.
 Then invoke the various type checkers in the following ways:

 #!/bin/sh

 echo '===== mypy ====='
 mypy type_checks

 echo
 echo

 echo '===== pyre ====='
 pyre --source-directory type_checks

 echo
 echo

 echo '===== pyright ====='
 pyright type_checks
 echo
 echo

 echo '===== pytype ====='
 pytype type_checks --keep-going
 """


 import typing as t


 def returning_optional(arg: t.Optional[str]) -> str:
    """
    Error because we can return None.

    All pass
    """
    return arg


 def eliminate_none(arg: t.Optional[str]) -> str:
    """
    No error.  The is None check shields us.

    All pass
    """
    if arg is None:
        raise Exception('arg was None')
    return arg


 def convert_to_string(arg:t.Optional[str]) -> str:
    """
    No error.  Converted return value to string.

    All pass
    """
    return str(arg)


 def reassign_parameter_name(arg: t.Optional[str]) -> str:
    """
    No error.  Converted to string and reassigned the parameter name to the new value.

    Fail: mypy, pyright
    Pass: pyre, pytype
    """
    arg: str = str(arg)
    return arg


 def reassign_local_variable_name(arg: t.Optional[str]) -> str:
    """
    No error.  Converted to string and reassigned the variable name to the new value.

    Fail: mypy, pyright -- mypy infers the type of retval as Optional[str] from the assignment of
        arg and then doesn't let it be redefined.  pyright sets the type to str since that is
        explicit but then analyzes the *earlier* assignment of retval = arg and declares that
        invalid.
    Pass: pyre, pytype
    """
    retval = arg
    retval: str = str(arg)
    return retval


 def assign_values_in_separate_paths(condition: bool) -> str:
    """
    No error.  variable set to correct type in all paths.

    Fail: mypy -- doesn't allow the variable to be typed twice
    Pass: pyre, pytype, pyright -- note that pyright seems to allow typing twice as long as the
          types are the same.
    """
    if condition:
        retval: str = 'True'
    else:
        retval: str = 'False'

    return retval


 def assign_different_values_in_separate_paths(condition: bool) -> str:
    """
    Error.  Variable set to different type in different paths.

    All pass but only pyre and pytype properly describe this as failing due to returning
    Union[int, str].  mypy and pyright complain because the variable is typed twice.
    """
    if condition:
        retval: str = 'True'
    else:
        retval: int = 10

    return retval
diff --git a/typing.cast.md b/typing.cast.md
	# I've been trying to reconcile Python's concept of variables as references to values instead of
	# the values themselves with type annotations. I think I've hit upon some ideas. Unfortunately,
	# they aren't all compatible with Python's current type annotation syntax.
	#
	# Background: References vs values
	# ================================
	#
	# In C, you might write some code like this::
	#
	# #include<stdio.h>
	#
	# typedef struct DataStruct {
	# int test;
	# } DataStruct_t;
	#
	# int main() {
	# DataStruct_t variable1;
	# DataStruct_t variable2;
	# variable1.test = 1;
	# variable2 = variable1;
	# variable2.test = 2;
	# printf("%d\n", variable1.test); /* 1 */
	# printf("%d\n", variable2.test); /* 2 */
	# variable1 = 1; /* Error */
	# return 0;
	# }
	#
	# `variable1` and `variable2` are both DataStructs. They contain instances of the
	# DataStructure. `variable2 = variable1` means that the data in `variable1` is copied into
	# `variable2`. You can perform this copy when the types of the two variables are the same.
	# The `variable1 = 1` statement is an error precisely because `variable1` is a `DataStruct` but `1` is not.
	# Copying of the value, `1`, cannot happen because it would put the integer value 1 into the memory
	# space which should hold a `DataStruct` which would make no sense.
	#
	# In Python, the variables are not values. They are actually labels which reference
	# a value. The label can be rebound to a different value or even type of value at will::
	#
	# class DataStruct:
	# def __init__(self):
	# self.test = 0
	#
	# variable1 = DataStruct()
	# variable2 = DataStruct() # Included to mimic the C code but it doesn't contribute anything
	# variable1.test = 1
	# variable2 = variable1
	# variable2.test = 2
	# print(variable1.test) # 2
	# print(variable2.test) # 2
	# variable1 = 3
	# print(variable1) # 3
	# print(variable2.test) # 2
	#
	# This is different than C because in C, the variables contain the instances of `DataStruct`.
	# When assignment happens, the data is copied from one of those instances to the other. In Python,
	# the variables are merely labels which refer to the two instances of `DataStruct`. When assignment
	# happens, the label is rebound to refer to a different value. So `variable2 = variable1` rebinds
	# `variable2` to the same `DataStruct` instance as `variable1` refers to. `variable1 = 3` rebinds
	# `variable1` to refer to the integer value `3`. The variables and their underlying instances are separate.
	#
	# How does this interact with type annotations?
	# =============================================
	#
	# Aligning annotations with rebound variables
	# -------------------------------------------
	#
	# When we speak informally about type annotations, we often say that the type annotation tells the
	# type checker what the type of the variable is. This does not map well to Python's concept of
	# variables as the variable is just a label. A label doesn't have a type. Instead, the type belongs to
	# the value that the variable refers to. Over a variable's lifetime, it could refer to multiple
	# different values, each with different types. So the type annotation for variables could change
	# whenever the variable is assigned to a different value.
	#
	# Currently, mypy doesn't acknowledge this. If you try to define a variable when it is assigned a type
	# with a different value, it will error:
	#
	# $ cat test.py
	# contents = '1\n2\n'
	# a: List[str] = contents.splitlines()
	# a: List[int] = [int(d) for d in a]
	# $ mypy test.py
	# test.py:5: error: Name 'a' already defined on line 4
	#
	# But I think to work with the syntax that Python provides, annotations should be able to look something like this:
	#
	# with open('test') as f:
	# variable: str = f.read()
	# variable: List[str] = variable.splitlines()
	# variable: List[int] = [int(line) for line in variable]
	#
	# Every time the variable is assigned a new type, a new annotation is used to show what type we expect.
	# We could change our code to use temporary variables instead of each subsequent call in the transformation
	# but since Python doesn't demand that, should we have to? We're creating new values each time. We just
	# need the type checker to understand what the contract is that we're asking to be satisfied.
	#
	# Problem: Is this hard to do with static type checking? Maybe.... mypy currently only allows one
	# definition of a variable, even when, for instance, there is a branch in the code which would mean
	# that only one of the variable definitions could be reached. This probably addresses cornercases where mypy
	# would have to know about how the condition resolves in order to judge whether the values are
	# allowed in all conditions.
	#
	# But we could still do static analysis in some other common cases: It seems like it would be easy and
	# harmless to allow for duplicate definition if the conditions are the same::
	#
	# if isinstance(value, str):
	# variable: List[int] = [int(line) for line in value.splitlines()]
	# elif isinstance(value, Iterable):
	# variable: List[int] = [int(element) for element in value]
	#
	# If you can accept that, then what about deciding whether the defintions are
	# the same when you reach the end of a condition::
	#
	# if isinstance(value, str):
	# variable: List[str] = value.splitlines()
	# variable: List[int] = [int(line) for line in variable]
	# elif isinstance(value, Iterable):
	# variable: List[int] = [int(line) for line in variable]
	#
	# The good side of this is that you can now type check your contract between the caller and callee
	# at every step. The con is that that the cornercase still exists, just in a smaller set of
	# circumstances (if you exit the condition with `variable` set to different types in different
	# branches) and that may be harder to inform the user of.
	#
	# Update: pyre and pytype both handle reassignment the way I describe. So this seems like a limitation of
	# mypy rather than a feature. I need to test pyright and pycharm's builtin type checkers sometime too.
	#
	# Aligning types with the actual values
	# -------------------------------------
	#
	# Variables are just labels in Python and therefore they don't have types. Instead, values have
	# types and variables are handy methods for the programmer to reference those values. One subtlety
	# of this is that the current annotation syntax which places the annotation on the left hand side
	# implies that the variable is of type `List[int]` when in fact, it is the value on the
	# right hand side which has the type. The variable only references it. It feels like the typing
	# information really belongs on the right hand side with the value. An example alternate syntax
	# which highlights that::
	#
	# with open('test') as f:
	# variable = f.read() -> str
	# variable = variable.splitlines() -> List[str]
	# variable = [int(line) for line in variable] -> List[int]
	#
	# Examples of other alternate syntax::
	#
	# variable = str: f.read()
	# variable = f.read(): str
	# variable = str <- f.read()
	# # Multiline like in the ML languages:
	# variable <- str
	# variable = f.read()
	#
	# Some further thoughts on this new syntax.... What about when we assign to a single variable
	# multiple times without changing the type? Should there be a shortcut for that? Perhaps so. The
	# above idiom seems common but the below idiom seems equally common::
	#
	# with open('test') as f:
	# variable = str: f.read()
	# variable = variable.strip()
	# variable = ' '.join(variable.split('\n'))
	#
	# So in this case (which works as expected with mypy currently), the variable stays a `str` the whole
	# way through. Would it be reasonable for the variable to keep the type of the previous value here?
	# It seems intuitive but I don't know if it interferes with the concept of binding or not. Thoughts?
	#
	# Is this syntax wartier than the current syntax? I'm not sure. One of the things which has
	# bothered me about the current annotation syntax, is that it is optional to use when writing code
	# but its placement makes it required to understand when reading code which is the opposite of
	# Python's core tenet that code is read more frequently than it is written.
	#
	# What do I mean about it being required to understand? It's that the current annotation sits in
	# the middle of the expression. So you can't home in on the meat of the parts of the expression
	# that do something and ignore the annotation unless you know what the annotation is. For instance:
	#
	# variable: Optional[Union[MutableMapping[str: int], MutableMapping[str: List[int]]]] = data_processor(data)
	#
	# Even though the annotation is optional information, you have to read `variable` at the start of
	# that statement, then know that the next stuff is an annotation that you can skim through until you
	# find the equals sign. And then you can figure out what `data_processor(data)` is going to put into
	# variable. It feels like it would be easier to read all the essentials of the function first and
	# then only read the contract second, if and only if you think that your problem is in the contract between caller
	# and callee::
	#
	# variable = data_processor(data) -> Optional[Union[MutableMapping[str: int], MutableMapping[str:
	# List[int]]]]
	#
	# The pyre type checker actually does what I suggest mypy should do! It allows redefinition
	# when you rebind a variable name. It thought of some good ways to deal with the corenercases
	# that I brought up as well.
	import random

	from typing import List

	def example1(filename: str) -> List[int]:
	"""Unlike mypy, redefining variables is legal with pyre"""
	with open(filename, 'r') as f:
	result: str = f.read()
	result: List[str] = result.strip().splitlines()
	result: List[int] = [int(line) for line in result]
	return result


	def example2(filename: str) -> List[int]:
	"""pyre handles branches by creating Unions. And then strictly type checks whether the Union or
	only one of the types is expected later"""
	with open(filename, 'r') as f:
	result: str = f.read()
	if random.randint(0, 1):
	result: List[str] = result.strip().splitlines()
	result: List[int] = [int(line) for line in result]
	else:
	result = result
	# After the condition, result can either be str or List[int] so pyre records that
	# as `typing.Union[List[int], str]`.

	# This is now an error because the function is defined as returning List[int], not List[int] or
	# str:
	# ƛ Found 1 type error! type_test/test.py:22:4 Incompatible return type [7]: Expected
	# `List[int]` but got `typing.Union[List[int], str]`.

	return result


	print(test('testdata.txt'))
	#!/usr/bin/python3 -tt
	"""
	* mypy, pytype, and pyre (pip install pyre-check) are pip installable.
	* In my experience, pyre was the most painful to setup but gave the best results.
	* pytype seems to currently require python-3.7 even though the documentation says it works with
	python-3.8. Maybe it's unreleased.
	* sudo npm install --global pyright
	* You can create a home directory location for npm stuff but I won't go into that here

	Place this file in a subdirectory, called type_checks.
	Then invoke the various type checkers in the following ways:

	#!/bin/sh

	echo '===== mypy ====='
	mypy type_checks

	echo
	echo

	echo '===== pyre ====='
	pyre --source-directory type_checks

	echo
	echo

	echo '===== pyright ====='
	pyright type_checks
	echo
	echo

	echo '===== pytype ====='
	pytype type_checks --keep-going
	"""


	import typing as t


	def returning_optional(arg: t.Optional[str]) -> str:
	"""
	Error because we can return None.

	All pass
	"""
	return arg


	def eliminate_none(arg: t.Optional[str]) -> str:
	"""
	No error. The is None check shields us.

	All pass
	"""
	if arg is None:
	raise Exception('arg was None')
	return arg


	def convert_to_string(arg:t.Optional[str]) -> str:
	"""
	No error. Converted return value to string.

	All pass
	"""
	return str(arg)


	def reassign_parameter_name(arg: t.Optional[str]) -> str:
	"""
	No error. Converted to string and reassigned the parameter name to the new value.

	Fail: mypy, pyright
	Pass: pyre, pytype
	"""
	arg: str = str(arg)
	return arg


	def reassign_local_variable_name(arg: t.Optional[str]) -> str:
	"""
	No error. Converted to string and reassigned the variable name to the new value.

	Fail: mypy, pyright -- mypy infers the type of retval as Optional[str] from the assignment of
	arg and then doesn't let it be redefined. pyright sets the type to str since that is
	explicit but then analyzes the earlier assignment of retval = arg and declares that
	invalid.
	Pass: pyre, pytype
	"""
	retval = arg
	retval: str = str(arg)
	return retval


	def assign_values_in_separate_paths(condition: bool) -> str:
	"""
	No error. variable set to correct type in all paths.

	Fail: mypy -- doesn't allow the variable to be typed twice
	Pass: pyre, pytype, pyright -- note that pyright seems to allow typing twice as long as the
	types are the same.
	"""
	if condition:
	retval: str = 'True'
	else:
	retval: str = 'False'

	return retval


	def assign_different_values_in_separate_paths(condition: bool) -> str:
	"""
	Error. Variable set to different type in different paths.

	All pass but only pyre and pytype properly describe this as failing due to returning
	Union[int, str]. mypy and pyright complain because the variable is typed twice.
	"""
	if condition:
	retval: str = 'True'
	else:
	retval: int = 10

	return retval