eschen42 · May 7, 2020 17:26
diff --git a/.gitattributes b/.gitattributes
 # ref: https://help.github.com/en/github/using-git/configuring-git-to-handle-line-endings#per-repository-settings
 # Files that are truly binary and should not be modified.
 #eg# *.png binary
 donne.utf16le text working-tree-encoding=UTF-16 eol=CRLF

 # Files to be normalized and converted to native line endings on checkout.
 #eg# *.c text

 # Files that will always have CRLF line endings on checkout.
 #eg# *.sln text eol=crlf

 # Set the default behavior, in case people don't have core.autocrlf set.
 * text=auto
diff --git a/README.md b/README.md
diff --git a/donne.utf16le b/donne.utf16le
 No man is an Iland, intire of itselfe;
  every man is a peece of the Continent,
  a part of the maine;
 if a Clod bee washed away by the Sea,
  Europe is the lesse,
  as well as if a Promontorie were,
  as well as if a Manor of thy friends
  or of thine owne were;
 any mans death diminishes me,
  because I am involved in Mankinde;
 And therefore never send to know
  for whom the bell tolls;
  It tolls for thee.

 Excerpt from:
 John Donne, 1624, Devotions upon Emergent Occasions, Meditation XVII.
diff --git a/utf16letoutf8.icn b/utf16letoutf8.icn
 ############################################################################
 #
 # File: utf16letoutf8.icn
 #
 # Subject: An Icon tool to convert UTF-16LE to UTF-8
 #
 # Author: Arthur Eschenlauer (https://orcid.org/0000-0002-2882-0508)
 #
 # Date: 5 May, 2020
 #
 # URL: https://gist.github.com/eschen42/ed5d727e21a42a9b675e38186018fa47
 #
 ############################################################################
 #
 # This program provides a native Icon implementation a UTF-16LE to UTF-8
 # converter.  This provides an alternative to calling out to iconv, which
 # may be available on some platforms (Unix) but not others (Windows).
 #
 # It provides two procedures that may be adapted to taste:
 #
 # codepoint2utf8(codepoint) - compute utf8 from Unicode codepoint
 #
 # utf16le2utf8(Lproducer) - compute utf8 from a list of producer
 #                           co-expreessions that produce strings that are
 #                           in fact even-numbered byte lengths of sequential
 #                           substrings of a little-endian UTF-16 string.
 #
 # main(args) - demonstrate "programmer-defined control structures" style
 #              invocation of for a UTF-16LE string of, if args are supplied
 #              naming input files, UTF-16LE files
 #
 # This code was adapted from the descriptions and examples at:
 # - https://en.wikipedia.org/wiki/UTF-16#Examples
 #   - UTF-16LE to Unicode codepoint
 # - https://stackoverflow.com/a/42013433
 #   - Unicode codepoint to UTF-8
 #
 # Credit for all flaws belongs to the author of this file.
 #
 ############################################################################
 #
 # Requires: co-expressions
 #
 ############################################################################
 #
 # Links: printf (for tracing only)
 #
 ############################################################################
 #
 # This file is in the public domain. Art Eschenlauer has waived all
 # copyright and related or neighboring rights to:
 #   utf16letoutf8.icn - An Icon tool to convert UTF-16LE to UTF-8
 # For details, see:
 #   https://creativecommons.org/publicdomain/zero/1.0/
 #
 # If you require a specific license and public domain status is not
 # sufficient for your needs, please substitute the MIT license, bearing
 # in mind that the copyright "claim" is solely to meet your requirements
 # and does not imply any restriction on use or copying by the author:
 #
 #   Copyright (c) 2020, Arthur Eschenlauer
 #
 #   Permission is hereby granted, free of charge, to any person obtaining
 #   a copy of this software and associated documentation files (the
 #   "Software"), to deal in the Software without restriction, including
 #   without limitation the rights to use, copy, modify, merge, publish,
 #   distribute, sublicense, and/or sell copies of the Software, and to
 #   permit persons to whom the Software is furnished to do so, subject
 #   to the following conditions:
 #
 #   The above copyright notice and this permission notice shall be
 #   included in all copies or substantial portions of the Software.
 #
 #   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 #   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 #   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 #   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 #   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 #   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 #   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 #
 ############################################################################

 # ref: https://en.wikipedia.org/wiki/UTF-16#Examples
 # - UTF-16LE to Unicode codepoint
 # ref: https://stackoverflow.com/a/42013433
 # - Unicode codepoint to UTF-8

 link printf  # for tracing only

 global our_trace

 procedure codepoint2utf8(
  codepoint # 32 bit unicode codepoint
  )
  local my_trace
  my_trace := our_trace # set to &null to suppress tracing
  # Code adapted from https://stackoverflow.com/a/42013433
  fprintf(\my_trace & &errout,"codepoint2utf8: codepoint 16r%08x\n",codepoint)

  #     if (code <= 0x7F) {
  #         buffer[0] = code;
  #         return 1;
  #     }
  if 16r7F < codepoint then write(\my_trace & &errout,"codepoint2utf8: not one-byte\n")
  if 16r7F >= codepoint
    then return char(codepoint)
  #     if (code <= 0x7FF) {
  #         buffer[0] = 0xC0 | (code >> 6);            /* 110xxxxx */
  #         buffer[1] = 0x80 | (code & 0x3F);          /* 10xxxxxx */
  #         return 2;
  #     }
  if 16r7FF < codepoint then write(\my_trace & &errout,"codepoint2utf8: not two-bytes\n")
  if 16r7FF >= codepoint then return (
    char(ior(16rC0,     ishift(codepoint,-6))) ||
    char(ior(16r80,iand(codepoint,16r3F)))
    )
  #     if (code <= 0xFFFF) {
  #         buffer[0] = 0xE0 | (code >> 12);           /* 1110xxxx */
  #         buffer[1] = 0x80 | ((code >> 6) & 0x3F);   /* 10xxxxxx */
  #         buffer[2] = 0x80 | (code & 0x3F);          /* 10xxxxxx */
  #         return 3;
  #     }
  if 16rFFFF < codepoint then write(\my_trace & &errout,"codepoint2utf8: not three-bytes\n")
  if 16rFFFF >= codepoint then return (
    char(ior(16rE0,     ishift(codepoint,-12))) ||
    char(ior(16r80,iand(ishift(codepoint,-6),16r3F))) ||
    char(ior(16r80,iand(codepoint,16r3F)))
    )
  #     if (code <= 0x10FFFF) {
  #         buffer[0] = 0xF0 | (code >> 18);           /* 11110xxx */
  #         buffer[1] = 0x80 | ((code >> 12) & 0x3F);  /* 10xxxxxx */
  #         buffer[2] = 0x80 | ((code >> 6) & 0x3F);   /* 10xxxxxx */
  #         buffer[3] = 0x80 | (code & 0x3F);          /* 10xxxxxx */
  #         return 4;
  #     }
  if 16r10FFFF < codepoint then write(\my_trace & &errout,"codepoint2utf8: not four-bytes\n")
  if 16r10FFFF >= codepoint then return (
    char(ior(16rF0,     ishift(codepoint,-18))) ||
    char(ior(16r80,iand(ishift(codepoint,-12),16r3F))) ||
    char(ior(16r80,iand(ishift(codepoint,-6),16r3F))) ||
    char(ior(16r80,iand(codepoint,16r3F)))
    )
 end

 procedure utf16le2utf8(
  Lproducer # list of producer co-expressions that produce strings
            #   that are in fact even-numbered byte lengths of a
            #   little-endian UTF16 string
  )
  # Code adapted from https://en.wikipedia.org/wiki/UTF-16#Examples

  # N.B.: ishift is  left-shift  for positive values of arg 2
  #              and right-shift for negative values of arg 2

  local lead, trail, utf16, utf16lo, utf16hi, producer, line, hiword, codepoint, utf8
  static my_trace, cset_nonzero
  initial {
    my_trace     := our_trace # set to &null to turn off tracing
    cset_nonzero := &cset[2:0]
    }
  hiword       := 0
  # For each producer co-expression, suspend the characters 
  #   in the corresponding UTF8 string.
  every producer := !Lproducer do {
    while line := @producer do {
      # While it is possible to read the little-endian words from the input string
      line ? while (utf16lo := ord(move(1)), utf16hi := ord(move(1))) do {
        # Construct a word from the butes read
        utf16 := ishift(utf16hi,8) + utf16lo
        # Check for the byte-order mark in the first word for
        #   the first string produced by the producer
        if *producer = 1 & &pos = 3 & utf16 = 16rFEFF
          then next
        # Because this code is not designed for big-endian UTF-16,
        #   abort production if the byte-order mark identifies this as such.
        if *producer = 1 & &pos = 3 & utf16 = 16rFFFE
          then break
        # Check for a lead surrogate (used for ten-byte Unicode code points);
        # - See: https://en.wikipedia.org/wiki/UTF-16#Examples
        # - Note that this check is bypassed when the preceding word is a
        #   lead surrogate
        if hiword = 0 & 16rD800 <= utf16 <= 16rDFFF then {
          # Save lead surrogate
          hiword := utf16
          fprintf(\my_trace & &errout,"hiword %4x\n",hiword)
          # Next get the tail surrogate
          next
          }
        # At this point, either this is not a surrogate or we have both lead and tail surrogates.

        # These statements are only for tracing
        fprintf(\my_trace & &errout,"utf16 %4x\n",utf16)
        if 0 < hiword then {
          fprintf(\my_trace & &errout,"        hiword  - 16rD800     16r%04x\n",        hiword  - 16rD800    )
          fprintf(\my_trace & &errout,"ishift((hiword) - 16rD800,10) 16r%04x\n",ishift((hiword) - 16rD800,10))
          fprintf(\my_trace & &errout,"utf16 - 16rDC00 16r%04x\n",utf16 - 16rDC00)
          }
        # If we have surrogates, hiword > 0, so compute the Unicode codepoint;
        #   otherwise, utf16 is the codepoint
        codepoint := ( ishift((0<hiword) - 16rD800,10) + utf16  + 16r10000 - 16rDC00 ) | utf16
        fprintf(\my_trace & &errout,"codepoint 16r%08x\n",codepoint)
        # Convert the codepoint to utf8
        if utf8 := codepoint2utf8(codepoint) then {
          write(\my_trace & &errout,"image of utf8 is ",image(utf8))
          suspend utf8
          }
        # reset lead surrogate (This has no effect when last word was not tail surrogete.)
        hiword := 0
        }
      }
    }
 end

 procedure main(args)
  local line, f, foo, chunk, source, result
  our_trace := &null # set to non-&null to turn on tracing statements
  #every write(&features)
  if *args = 0 then {
    line := char(16rFF)||char(16rFE)||
      char(16r01)||char(16rD8)||char(16r37)||char(16rDC)|| # Unicode codepoint 10437
      char(16r52)||char(16rD8)||char(16r62)||char(16rDF)|| # Unicode codepoint 24b62
      char(16r32)||char(16r00)                             # Unicode coodepint    32
    result := ""
    every chunk := utf16le2utf8{line} do {
      # This is notably slow.  I assume that it is because Icon strings are
      #   immutable, so it's an order N^2 charcter-copy operation.  However,
      #   that's all happening in the Icon string implementation, so I don't
      #   expect its impact to be as noticeable as it is.
      result ||:= chunk
      }
    foo := open("foo.txt", "wu")
    write(foo, result)
    close(foo)
    }
  else {
    every f := !args do {
      f := open(f,"ru")
      # The following is much more complex than the otherwise more intuitive
      #   chunk := reads(f,2) do {
      #     if chunk ~== char(16rFF)||char(16rFE)
      #       then writes(utf16le2utf8{chunk})
      #     }
      #   while chunk := reads(f,512) do {
      #     every writes(utf16le2utf8{chunk})
      #     }
      # This following code addresses this scenario: if a chunk boundary
      #   splits a pair of UTF-16LE "surrogates" (i.e., two words
      #   that represent a Unicode codepoint having more than 32 bits)
      #   then it will be necessary to "drop" the first surrogate from
      #   the preceding chunk and "carry" it by prepending it to the
      #   next chunk.  I would prefer not to do that here but rather
      #   in the utf16le2utf8 procedure itself and then come back to
      #   adjust the code here, but I have not yet determined a way to
      #   do this succinctly.
      every writes(
        utf16le2utf8([
          create {
            # Skip byte-order mark when found
            chunk := reads(f,2)
            if chunk ~== char(16rFF)||char(16rFE) then chunk @ &source
            # Produce chunks of the rest of the file
            # - Probably reading 4096 bytes would be more appropriate for
            #   purposes other than demonstration.
            while chunk := reads(f,512) do chunk @ &source
            close(f)
            write(\our_trace & &errout,"argument to utf16le2utf8 complete")
            &fail
            }
          ])
        )
      }
    }
 end
 # vim: sw=2 ts=2 et ai nu ru syntax=icon :
	# ref: https://help.github.com/en/github/using-git/configuring-git-to-handle-line-endings#per-repository-settings
	# Files that are truly binary and should not be modified.
	#eg# *.png binary
	donne.utf16le text working-tree-encoding=UTF-16 eol=CRLF

	# Files to be normalized and converted to native line endings on checkout.
	#eg# *.c text

	# Files that will always have CRLF line endings on checkout.
	#eg# *.sln text eol=crlf

	# Set the default behavior, in case people don't have core.autocrlf set.
	* text=auto
	No man is an Iland, intire of itselfe;
	every man is a peece of the Continent,
	a part of the maine;
	if a Clod bee washed away by the Sea,
	Europe is the lesse,
	as well as if a Promontorie were,
	as well as if a Manor of thy friends
	or of thine owne were;
	any mans death diminishes me,
	because I am involved in Mankinde;
	And therefore never send to know
	for whom the bell tolls;
	It tolls for thee.

	Excerpt from:
	John Donne, 1624, Devotions upon Emergent Occasions, Meditation XVII.
	############################################################################
	#
	# File: utf16letoutf8.icn
	#
	# Subject: An Icon tool to convert UTF-16LE to UTF-8
	#
	# Author: Arthur Eschenlauer (https://orcid.org/0000-0002-2882-0508)
	#
	# Date: 5 May, 2020
	#
	# URL: https://gist.github.com/eschen42/ed5d727e21a42a9b675e38186018fa47
	#
	############################################################################
	#
	# This program provides a native Icon implementation a UTF-16LE to UTF-8
	# converter. This provides an alternative to calling out to iconv, which
	# may be available on some platforms (Unix) but not others (Windows).
	#
	# It provides two procedures that may be adapted to taste:
	#
	# codepoint2utf8(codepoint) - compute utf8 from Unicode codepoint
	#
	# utf16le2utf8(Lproducer) - compute utf8 from a list of producer
	# co-expreessions that produce strings that are
	# in fact even-numbered byte lengths of sequential
	# substrings of a little-endian UTF-16 string.
	#
	# main(args) - demonstrate "programmer-defined control structures" style
	# invocation of for a UTF-16LE string of, if args are supplied
	# naming input files, UTF-16LE files
	#
	# This code was adapted from the descriptions and examples at:
	# - https://en.wikipedia.org/wiki/UTF-16#Examples
	# - UTF-16LE to Unicode codepoint
	# - https://stackoverflow.com/a/42013433
	# - Unicode codepoint to UTF-8
	#
	# Credit for all flaws belongs to the author of this file.
	#
	############################################################################
	#
	# Requires: co-expressions
	#
	############################################################################
	#
	# Links: printf (for tracing only)
	#
	############################################################################
	#
	# This file is in the public domain. Art Eschenlauer has waived all
	# copyright and related or neighboring rights to:
	# utf16letoutf8.icn - An Icon tool to convert UTF-16LE to UTF-8
	# For details, see:
	# https://creativecommons.org/publicdomain/zero/1.0/
	#
	# If you require a specific license and public domain status is not
	# sufficient for your needs, please substitute the MIT license, bearing
	# in mind that the copyright "claim" is solely to meet your requirements
	# and does not imply any restriction on use or copying by the author:
	#
	# Copyright (c) 2020, Arthur Eschenlauer
	#
	# Permission is hereby granted, free of charge, to any person obtaining
	# a copy of this software and associated documentation files (the
	# "Software"), to deal in the Software without restriction, including
	# without limitation the rights to use, copy, modify, merge, publish,
	# distribute, sublicense, and/or sell copies of the Software, and to
	# permit persons to whom the Software is furnished to do so, subject
	# to the following conditions:
	#
	# The above copyright notice and this permission notice shall be
	# included in all copies or substantial portions of the Software.
	#
	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
	# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
	# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
	# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
	# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	# SOFTWARE.
	#
	############################################################################

	# ref: https://en.wikipedia.org/wiki/UTF-16#Examples
	# - UTF-16LE to Unicode codepoint
	# ref: https://stackoverflow.com/a/42013433
	# - Unicode codepoint to UTF-8

	link printf # for tracing only

	global our_trace

	procedure codepoint2utf8(
	codepoint # 32 bit unicode codepoint
	)
	local my_trace
	my_trace := our_trace # set to &null to suppress tracing
	# Code adapted from https://stackoverflow.com/a/42013433
	fprintf(\my_trace & &errout,"codepoint2utf8: codepoint 16r%08x\n",codepoint)

	# if (code <= 0x7F) {
	# buffer[0] = code;
	# return 1;
	# }
	if 16r7F < codepoint then write(\my_trace & &errout,"codepoint2utf8: not one-byte\n")
	if 16r7F >= codepoint
	then return char(codepoint)
	# if (code <= 0x7FF) {
	# buffer[0] = 0xC0 \| (code >> 6); /* 110xxxxx */
	# buffer[1] = 0x80 \| (code & 0x3F); /* 10xxxxxx */
	# return 2;
	# }
	if 16r7FF < codepoint then write(\my_trace & &errout,"codepoint2utf8: not two-bytes\n")
	if 16r7FF >= codepoint then return (
	char(ior(16rC0, ishift(codepoint,-6))) \|\|
	char(ior(16r80,iand(codepoint,16r3F)))
	)
	# if (code <= 0xFFFF) {
	# buffer[0] = 0xE0 \| (code >> 12); /* 1110xxxx */
	# buffer[1] = 0x80 \| ((code >> 6) & 0x3F); /* 10xxxxxx */
	# buffer[2] = 0x80 \| (code & 0x3F); /* 10xxxxxx */
	# return 3;
	# }
	if 16rFFFF < codepoint then write(\my_trace & &errout,"codepoint2utf8: not three-bytes\n")
	if 16rFFFF >= codepoint then return (
	char(ior(16rE0, ishift(codepoint,-12))) \|\|
	char(ior(16r80,iand(ishift(codepoint,-6),16r3F))) \|\|
	char(ior(16r80,iand(codepoint,16r3F)))
	)
	# if (code <= 0x10FFFF) {
	# buffer[0] = 0xF0 \| (code >> 18); /* 11110xxx */
	# buffer[1] = 0x80 \| ((code >> 12) & 0x3F); /* 10xxxxxx */
	# buffer[2] = 0x80 \| ((code >> 6) & 0x3F); /* 10xxxxxx */
	# buffer[3] = 0x80 \| (code & 0x3F); /* 10xxxxxx */
	# return 4;
	# }
	if 16r10FFFF < codepoint then write(\my_trace & &errout,"codepoint2utf8: not four-bytes\n")
	if 16r10FFFF >= codepoint then return (
	char(ior(16rF0, ishift(codepoint,-18))) \|\|
	char(ior(16r80,iand(ishift(codepoint,-12),16r3F))) \|\|
	char(ior(16r80,iand(ishift(codepoint,-6),16r3F))) \|\|
	char(ior(16r80,iand(codepoint,16r3F)))
	)
	end

	procedure utf16le2utf8(
	Lproducer # list of producer co-expressions that produce strings
	# that are in fact even-numbered byte lengths of a
	# little-endian UTF16 string
	)
	# Code adapted from https://en.wikipedia.org/wiki/UTF-16#Examples

	# N.B.: ishift is left-shift for positive values of arg 2
	# and right-shift for negative values of arg 2

	local lead, trail, utf16, utf16lo, utf16hi, producer, line, hiword, codepoint, utf8
	static my_trace, cset_nonzero
	initial {
	my_trace := our_trace # set to &null to turn off tracing
	cset_nonzero := &cset[2:0]
	}
	hiword := 0
	# For each producer co-expression, suspend the characters
	# in the corresponding UTF8 string.
	every producer := !Lproducer do {
	while line := @producer do {
	# While it is possible to read the little-endian words from the input string
	line ? while (utf16lo := ord(move(1)), utf16hi := ord(move(1))) do {
	# Construct a word from the butes read
	utf16 := ishift(utf16hi,8) + utf16lo
	# Check for the byte-order mark in the first word for
	# the first string produced by the producer
	if *producer = 1 & &pos = 3 & utf16 = 16rFEFF
	then next
	# Because this code is not designed for big-endian UTF-16,
	# abort production if the byte-order mark identifies this as such.
	if *producer = 1 & &pos = 3 & utf16 = 16rFFFE
	then break
	# Check for a lead surrogate (used for ten-byte Unicode code points);
	# - See: https://en.wikipedia.org/wiki/UTF-16#Examples
	# - Note that this check is bypassed when the preceding word is a
	# lead surrogate
	if hiword = 0 & 16rD800 <= utf16 <= 16rDFFF then {
	# Save lead surrogate
	hiword := utf16
	fprintf(\my_trace & &errout,"hiword %4x\n",hiword)
	# Next get the tail surrogate
	next
	}
	# At this point, either this is not a surrogate or we have both lead and tail surrogates.

	# These statements are only for tracing
	fprintf(\my_trace & &errout,"utf16 %4x\n",utf16)
	if 0 < hiword then {
	fprintf(\my_trace & &errout," hiword - 16rD800 16r%04x\n", hiword - 16rD800 )
	fprintf(\my_trace & &errout,"ishift((hiword) - 16rD800,10) 16r%04x\n",ishift((hiword) - 16rD800,10))
	fprintf(\my_trace & &errout,"utf16 - 16rDC00 16r%04x\n",utf16 - 16rDC00)
	}
	# If we have surrogates, hiword > 0, so compute the Unicode codepoint;
	# otherwise, utf16 is the codepoint
	codepoint := ( ishift((0<hiword) - 16rD800,10) + utf16 + 16r10000 - 16rDC00 ) \| utf16
	fprintf(\my_trace & &errout,"codepoint 16r%08x\n",codepoint)
	# Convert the codepoint to utf8
	if utf8 := codepoint2utf8(codepoint) then {
	write(\my_trace & &errout,"image of utf8 is ",image(utf8))
	suspend utf8
	}
	# reset lead surrogate (This has no effect when last word was not tail surrogete.)
	hiword := 0
	}
	}
	}
	end

	procedure main(args)
	local line, f, foo, chunk, source, result
	our_trace := &null # set to non-&null to turn on tracing statements
	#every write(&features)
	if *args = 0 then {
	line := char(16rFF)\|\|char(16rFE)\|\|
	char(16r01)\|\|char(16rD8)\|\|char(16r37)\|\|char(16rDC)\|\| # Unicode codepoint 10437
	char(16r52)\|\|char(16rD8)\|\|char(16r62)\|\|char(16rDF)\|\| # Unicode codepoint 24b62
	char(16r32)\|\|char(16r00) # Unicode coodepint 32
	result := ""
	every chunk := utf16le2utf8{line} do {
	# This is notably slow. I assume that it is because Icon strings are
	# immutable, so it's an order N^2 charcter-copy operation. However,
	# that's all happening in the Icon string implementation, so I don't
	# expect its impact to be as noticeable as it is.
	result \|\|:= chunk
	}
	foo := open("foo.txt", "wu")
	write(foo, result)
	close(foo)
	}
	else {
	every f := !args do {
	f := open(f,"ru")
	# The following is much more complex than the otherwise more intuitive
	# chunk := reads(f,2) do {
	# if chunk ~== char(16rFF)\|\|char(16rFE)
	# then writes(utf16le2utf8{chunk})
	# }
	# while chunk := reads(f,512) do {
	# every writes(utf16le2utf8{chunk})
	# }
	# This following code addresses this scenario: if a chunk boundary
	# splits a pair of UTF-16LE "surrogates" (i.e., two words
	# that represent a Unicode codepoint having more than 32 bits)
	# then it will be necessary to "drop" the first surrogate from
	# the preceding chunk and "carry" it by prepending it to the
	# next chunk. I would prefer not to do that here but rather
	# in the utf16le2utf8 procedure itself and then come back to
	# adjust the code here, but I have not yet determined a way to
	# do this succinctly.
	every writes(
	utf16le2utf8([
	create {
	# Skip byte-order mark when found
	chunk := reads(f,2)
	if chunk ~== char(16rFF)\|\|char(16rFE) then chunk @ &source
	# Produce chunks of the rest of the file
	# - Probably reading 4096 bytes would be more appropriate for
	# purposes other than demonstration.
	while chunk := reads(f,512) do chunk @ &source
	close(f)
	write(\our_trace & &errout,"argument to utf16le2utf8 complete")
	&fail
	}
	])
	)
	}
	}
	end
	# vim: sw=2 ts=2 et ai nu ru syntax=icon :