String编码(2) 证明JAVA的char编码为UTF-16
String编码(二) 证明JAVA的char编码为UTF-16
1.简介
通过程序证明JAVA的char内部编码为UTF-16,而与Charset.defaultCharset()无关。
2.程序
package com.siyuan.jdk.test; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.util.Arrays; public class CharCodeTest { /** * 将byte转换为对应的二进制字符串 * @param src 要转换成二进制字符串的byte值 * @return */ public static String byteToBinary(byte src) { StringBuilder result = new StringBuilder(); for (int i = 0; i < 8; i++) { result.append(src%2 == 0 ? '0' : '1'); src = (byte)(src >>> 1); } return result.reverse().toString(); } public static void main(String[] args) throws UnsupportedEncodingException { String str = "I AM 中国人"; System.out.println(Charset.defaultCharset()); byte[] utf16 = str.getBytes("UTF-16"); System.out.println(Arrays.toString(utf16)); for (int i = 0; i < str.length(); i++) { System.out.println(str.charAt(i)); byte high = (byte)(str.charAt(i) >>> 8); byte low = (byte) str.charAt(i); System.out.println(byteToBinary(high) + byteToBinary(low)); System.out.println(byteToBinary(utf16[2+2*i]) + byteToBinary(utf16[2+2*i+1])); System.out.println((byteToBinary(high) + byteToBinary(low)).equals(byteToBinary(utf16[2+2*i]) + byteToBinary(utf16[2+2*i+1]))); } } }
3.运行结果
GBK [-2, -1, 0, 73, 0, 32, 0, 65, 0, 77, 0, 32, 78, 45, 86, -3, 78, -70] I 0000000001001001 0000000001001001 true 0000000000100000 0000000000100000 true A 0000000001000001 0000000001000001 true M 0000000001001101 0000000001001101 true 0000000000100000 0000000000100000 true 中 0100111000101101 0100111000101101 true 国 0101011011111101 0101011011111101 true 人 0100111010111010 0100111010111010 true