michael-simons · November 11, 2020 22:46 · sharmasourabh · May 25, 2018
diff --git a/UTF8.java b/UTF8.java
 public class UTF8 {
  public static void main(String[] args) {
 	  	// See http://www.fileformat.info/info/unicode/char/1f4a9/index.htm
 		final String poo = "A pile of poo: 💩."; 
 		
 		System.out.println(poo);
 		// Length of chars doesn't equals the "real" length, that is: the number of actual codepoints
 		System.out.println(poo.length() + " vs " + poo.codePointCount(0, poo.length()));
 		
 		// Iterating over all chars
 		for(int i=0; i<poo.length();++i) {
 			char c = poo.charAt(i);
 			// If there's a char left, we chan check if the current and the next char 
 			// form a surrogate pair
 			if(i<poo.length()-1 && Character.isSurrogatePair(c, poo.charAt(i+1))) {
 				// if so, the codepoint must be stored on a 32bit int as char is only 16bit
 				int codePoint = poo.codePointAt(i);
 				// show the code point and the char
 				System.out.println(String.format("%6d:%s", codePoint, new String(new int[]{codePoint}, 0, 1)));
 				++i;
 			}
 			// else this can only be a "normal" char
 			else 
 				System.out.println(String.format("%6d:%s", (int)c, c));
 		}
 		
 		// constructing a string constant with two \\u unicode escape sequences
 		System.out.println("\ud83d\udca9".equals("💩"));
 	}
 }
	public class UTF8 {
	public static void main(String[] args) {
	// See http://www.fileformat.info/info/unicode/char/1f4a9/index.htm
	final String poo = "A pile of poo: 💩.";

	System.out.println(poo);
	// Length of chars doesn't equals the "real" length, that is: the number of actual codepoints
	System.out.println(poo.length() + " vs " + poo.codePointCount(0, poo.length()));

	// Iterating over all chars
	for(int i=0; i<poo.length();++i) {
	char c = poo.charAt(i);
	// If there's a char left, we chan check if the current and the next char
	// form a surrogate pair
	if(i<poo.length()-1 && Character.isSurrogatePair(c, poo.charAt(i+1))) {
	// if so, the codepoint must be stored on a 32bit int as char is only 16bit
	int codePoint = poo.codePointAt(i);
	// show the code point and the char
	System.out.println(String.format("%6d:%s", codePoint, new String(new int[]{codePoint}, 0, 1)));
	++i;
	}
	// else this can only be a "normal" char
	else
	System.out.println(String.format("%6d:%s", (int)c, c));
	}

	// constructing a string constant with two \\u unicode escape sequences
	System.out.println("\ud83d\udca9".equals("💩"));
	}
	}
No results found