bogdanRada · November 22, 2018 05:49
diff --git a/gistfile1.rb b/gistfile1.rb
 # encoding:  UTF-8

 utf8_resume = "Résumé"
 latin1_resume = utf8_resume.encode("ISO-8859-1")
 latin9_resume = utf8_resume.encode("ISO-8859-15")

 lower_spanish = "\u00E1 \u00E9 \u00ED \u00F3 \u00FA \u00F1".encode("UTF-8")
 puts lower_spanish
 upper_spanish = "\u00C1 \u00C9 \u00CD \u00D3 \u00DA \u00D1".encode("UTF-8")
 puts upper_spanish

 puts 'Display each byte (hex)'
 utf8_resume.bytesize.times {|i| printf "%X ", utf8_resume.getbyte(i)}
 puts "= #{utf8_resume} encoded as UTF-8"
 latin1_resume.bytesize.times {|i| printf "%X ", latin1_resume.getbyte(i)}
 puts "= #{latin1_resume.encode('UTF-8')} encoded as ISO-8859-1"
 latin9_resume.bytesize.times {|i| printf "%X ", latin9_resume.getbyte(i)}
 puts "= #{latin9_resume.encode('UTF-8')} encoded as ISO-8859-15"
 puts

 puts 'Display each character as a Unicode codepoint (hex)'
 utf8_resume.each_codepoint {|c| printf "%X ", c}
 puts "= #{utf8_resume} encoded as UTF-8"
 latin1_resume.each_codepoint {|c| printf "%X ", c}
 puts "= #{utf8_resume} encoded as ISO-8859-1"
 latin9_resume.each_codepoint {|c| printf "%X ", c}
 puts "= #{utf8_resume} encoded as ISO-8859-15"
 puts

 utf8_money = "\u{20AC A4 A3 A5}"
 latin1_money = "\u{A4 A3 A5}".encode("ISO-8859-1")
 latin9_money = "\u{20AC A3 A5}".encode("ISO-8859-15")

 puts 'Display each byte (hex)'
 utf8_money.bytesize.times {|i| printf "%X ", utf8_money.getbyte(i)}
 puts "= #{utf8_money} encoded as UTF-8"
 latin1_money.bytesize.times {|i| printf "%X ", latin1_money.getbyte(i)}
 puts "= #{latin1_money.encode('UTF-8')} encoded as ISO-8859-1 (20AC is invalid)"
 latin9_money.bytesize.times {|i| printf "%X ", latin9_money.getbyte(i)}
 puts "= #{latin9_money.encode('UTF-8')} encoded as ISO-8859-15 (A4 is invalid, 20AC gets converted into A4)"
 puts

 puts 'Display each character as a Unicode codepoint (hex)'
 utf8_money.each_codepoint {|c| printf "%X ", c}
 puts "= #{utf8_money} encoded as UTF-8"
 latin1_money.each_codepoint {|c| printf "%X ", c}
 puts "= #{latin1_money.encode('UTF-8')} encoded as ISO-8859-1 (20AC is invalid)"
 latin9_money.each_codepoint {|c| printf "%X ", c}
 puts "= #{latin9_money.encode('UTF-8')} encoded as ISO-8859-15 (A4 is invalid, 20AC gets converted into A4)"
 puts

 puts <<EOT
 NOTE:
 \u00A4 (called currency symbol) in Latin-1 changed to \u20AC in Latin-9
 http://en.wikipedia.org/wiki/ISO/IEC_8859-15
 EOT
	# encoding: UTF-8

	utf8_resume = "Résumé"
	latin1_resume = utf8_resume.encode("ISO-8859-1")
	latin9_resume = utf8_resume.encode("ISO-8859-15")

	lower_spanish = "\u00E1 \u00E9 \u00ED \u00F3 \u00FA \u00F1".encode("UTF-8")
	puts lower_spanish
	upper_spanish = "\u00C1 \u00C9 \u00CD \u00D3 \u00DA \u00D1".encode("UTF-8")
	puts upper_spanish

	puts 'Display each byte (hex)'
	utf8_resume.bytesize.times {\|i\| printf "%X ", utf8_resume.getbyte(i)}
	puts "= #{utf8_resume} encoded as UTF-8"
	latin1_resume.bytesize.times {\|i\| printf "%X ", latin1_resume.getbyte(i)}
	puts "= #{latin1_resume.encode('UTF-8')} encoded as ISO-8859-1"
	latin9_resume.bytesize.times {\|i\| printf "%X ", latin9_resume.getbyte(i)}
	puts "= #{latin9_resume.encode('UTF-8')} encoded as ISO-8859-15"
	puts

	puts 'Display each character as a Unicode codepoint (hex)'
	utf8_resume.each_codepoint {\|c\| printf "%X ", c}
	puts "= #{utf8_resume} encoded as UTF-8"
	latin1_resume.each_codepoint {\|c\| printf "%X ", c}
	puts "= #{utf8_resume} encoded as ISO-8859-1"
	latin9_resume.each_codepoint {\|c\| printf "%X ", c}
	puts "= #{utf8_resume} encoded as ISO-8859-15"
	puts

	utf8_money = "\u{20AC A4 A3 A5}"
	latin1_money = "\u{A4 A3 A5}".encode("ISO-8859-1")
	latin9_money = "\u{20AC A3 A5}".encode("ISO-8859-15")

	puts 'Display each byte (hex)'
	utf8_money.bytesize.times {\|i\| printf "%X ", utf8_money.getbyte(i)}
	puts "= #{utf8_money} encoded as UTF-8"
	latin1_money.bytesize.times {\|i\| printf "%X ", latin1_money.getbyte(i)}
	puts "= #{latin1_money.encode('UTF-8')} encoded as ISO-8859-1 (20AC is invalid)"
	latin9_money.bytesize.times {\|i\| printf "%X ", latin9_money.getbyte(i)}
	puts "= #{latin9_money.encode('UTF-8')} encoded as ISO-8859-15 (A4 is invalid, 20AC gets converted into A4)"
	puts

	puts 'Display each character as a Unicode codepoint (hex)'
	utf8_money.each_codepoint {\|c\| printf "%X ", c}
	puts "= #{utf8_money} encoded as UTF-8"
	latin1_money.each_codepoint {\|c\| printf "%X ", c}
	puts "= #{latin1_money.encode('UTF-8')} encoded as ISO-8859-1 (20AC is invalid)"
	latin9_money.each_codepoint {\|c\| printf "%X ", c}
	puts "= #{latin9_money.encode('UTF-8')} encoded as ISO-8859-15 (A4 is invalid, 20AC gets converted into A4)"
	puts

	puts <<EOT
	NOTE:
	\u00A4 (called currency symbol) in Latin-1 changed to \u20AC in Latin-9
	http://en.wikipedia.org/wiki/ISO/IEC_8859-15
	EOT