gbk资料转utf-8 Demo
gbk文件转utf-8 Demo
读取gbk编码的文件,并将其转为utf-8编码后输出:
import java.io.File;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
public class Test1 {
public static String readTxt(int off,int leng) {
RandomAccessFile r;
String str = "";
try {
r = new RandomAccessFile(new File("D:/test.txt"), "r");
byte[] c = new byte[leng];
r.seek(off);
r.read(c);
str = new String(c);
} catch (Exception e) {
e.printStackTrace();
}
return str;
}
public static byte[] gbk2utf8(String chenese){
char c[] = chenese.toCharArray();
byte [] fullByte =new byte[3*c.length];
for(int i=0; i<c.length; i++){
int m = (int)c[i];
String word = Integer.toBinaryString(m);
StringBuffer sb = new StringBuffer();
int len = 16 - word.length();
for(int j=0; j<len; j++){
sb.append("0");
}
sb.append(word);
sb.insert(0, "1110");
sb.insert(8, "10");
sb.insert(16, "10");
String s1 = sb.substring(0,8);
String s2 = sb.substring(8,16);
String s3 = sb.substring(16);
byte b0 = Integer.valueOf(s1, 2).byteValue();
byte b1 = Integer.valueOf(s2, 2).byteValue();
byte b2 = Integer.valueOf(s3, 2).byteValue();
byte[] bf = new byte[3];
bf[0] = b0;
fullByte[i*3] = bf[0];
bf[1] = b1;
fullByte[i*3+1] = bf[1];
bf[2] = b2;
fullByte[i*3+2] = bf[2];
}
return fullByte;
}
public static void main(String[] args) {
String str = readTxt(0,731);
byte [] fullByte = gbk2utf8(str);
String fullStr = "";
try {
fullStr = new String(fullByte, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
System.out.println(fullStr);
}
}
测试文档D:\test.txt内容:
[账号],[日期],[收支种类],[币种],[收入],[支出],[余额],[摘要]
3819660311,2004-2006-01-05,支出,人民币,0,54.20, 6403.89,消费款
中文可以,正确转换
英文乱码
读取gbk编码的文件,并将其转为utf-8编码后输出:
import java.io.File;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
public class Test1 {
public static String readTxt(int off,int leng) {
RandomAccessFile r;
String str = "";
try {
r = new RandomAccessFile(new File("D:/test.txt"), "r");
byte[] c = new byte[leng];
r.seek(off);
r.read(c);
str = new String(c);
} catch (Exception e) {
e.printStackTrace();
}
return str;
}
public static byte[] gbk2utf8(String chenese){
char c[] = chenese.toCharArray();
byte [] fullByte =new byte[3*c.length];
for(int i=0; i<c.length; i++){
int m = (int)c[i];
String word = Integer.toBinaryString(m);
StringBuffer sb = new StringBuffer();
int len = 16 - word.length();
for(int j=0; j<len; j++){
sb.append("0");
}
sb.append(word);
sb.insert(0, "1110");
sb.insert(8, "10");
sb.insert(16, "10");
String s1 = sb.substring(0,8);
String s2 = sb.substring(8,16);
String s3 = sb.substring(16);
byte b0 = Integer.valueOf(s1, 2).byteValue();
byte b1 = Integer.valueOf(s2, 2).byteValue();
byte b2 = Integer.valueOf(s3, 2).byteValue();
byte[] bf = new byte[3];
bf[0] = b0;
fullByte[i*3] = bf[0];
bf[1] = b1;
fullByte[i*3+1] = bf[1];
bf[2] = b2;
fullByte[i*3+2] = bf[2];
}
return fullByte;
}
public static void main(String[] args) {
String str = readTxt(0,731);
byte [] fullByte = gbk2utf8(str);
String fullStr = "";
try {
fullStr = new String(fullByte, "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
System.out.println(fullStr);
}
}
测试文档D:\test.txt内容:
[账号],[日期],[收支种类],[币种],[收入],[支出],[余额],[摘要]
3819660311,2004-2006-01-05,支出,人民币,0,54.20, 6403.89,消费款
中文可以,正确转换
英文乱码