lvidarte · November 28, 2011 13:11
diff --git a/bom.py b/bom.py
 import codecs

 # By Doug Hellmann

 # Multibytes encodings, such as UTF-16 and UTF-32, pose a problem
 # when transferring data between different computer systems, either
 # by copying a file directly or using network communication. Different
 # systems use different ordering of the high- and low-order bytes.
 # This characteristic of the data, known as its endianness, depends
 # on factors such as the hardware architecture and choices made by
 # the operating system and application developer. There is not always
 # a way to know in advance what byte order to use for a given set of
 # data, so the multibyte encodings include a byte-order marker (BOM)
 # as the first few bytes of encoded output. For example, UTF-16 is
 # defined in such a way that 0xFFFE and 0xFEFF are not valid
 # characters and can be used to indicate the byte-order.
 # codecs defines constants for the byte-order markers used by UTF-16
 # and UTF-32

 with open('/tmp/pi.txt', mode='wb') as f:
    f.write(codecs.BOM_UTF16_BE)
    f.write(u'pi: \u03c0'.encode('utf_16_be'))

 with codecs.open('/tmp/pi.txt', mode='rt', encoding='utf-16') as f:
    print f.read()
	import codecs

	# By Doug Hellmann

	# Multibytes encodings, such as UTF-16 and UTF-32, pose a problem
	# when transferring data between different computer systems, either
	# by copying a file directly or using network communication. Different
	# systems use different ordering of the high- and low-order bytes.
	# This characteristic of the data, known as its endianness, depends
	# on factors such as the hardware architecture and choices made by
	# the operating system and application developer. There is not always
	# a way to know in advance what byte order to use for a given set of
	# data, so the multibyte encodings include a byte-order marker (BOM)
	# as the first few bytes of encoded output. For example, UTF-16 is
	# defined in such a way that 0xFFFE and 0xFEFF are not valid
	# characters and can be used to indicate the byte-order.
	# codecs defines constants for the byte-order markers used by UTF-16
	# and UTF-32

	with open('/tmp/pi.txt', mode='wb') as f:
	f.write(codecs.BOM_UTF16_BE)
	f.write(u'pi: \u03c0'.encode('utf_16_be'))

	with codecs.open('/tmp/pi.txt', mode='rt', encoding='utf-16') as f:
	print f.read()