数据库编程之编码变换实例--乱码分析
1问题描述
最近有人问我问题,说在Java中以UTF-8编码读UTF-8编码的文件test.htm,存入oracle数据库后再读出,一切正常。在Java中UTF-8编码读UTF-8编码的文件test.htm,存入kingbase数据库后再读出,文件中出现乱码,不知为何。
2具体现象
具体现象如下:
Java中读UTF-8编码的文件test.htm,存入数据库后再读出,文件中出现乱码
原文件图如下:
3 存入数据库再读出后出现乱码如下:
注:前面两幅图中“ ”前面的那个“”变成了“ ”。
4 相关代码 下面是模拟这个过程的读写文件和写读数据库的java代码:
import java.io.ByteArrayOutputStream; import java.io.FileInputStream; import java.io.FileOutputStream; import java.sql.Clob; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; public class TestEoding { public static void main(String[] args) throws Exception { //create table and insert 1 recode //oracle,kingbase // create table t_test (id int,txt clob); // insert into t_test (id,txt) values(1,''); //pg // create table t_test (id int,txt text); // insert into t_test (id,txt) values(1,''); //kingbase String driver = "com.kingbase.Driver"; String url = "jdbc:kingbase://127.0.0.1:54321/BEIGANG?ClientEncoding=UNICODE"; String usr = "BEIGANG"; String passwd = "beigang"; String fn = "E:/test.htm"; /* //pg String driver = "org.postgresql.Driver"; String url = "jdbc:postgresql://127.0.0.1:5432/beigang"; String usr = "beigang"; String passwd = "beigang"; String fn = "E:/test.htm"; */ if (args.length > 0) { fn = args[0]; } String value = getValue(fn); testDB(driver, url, usr, passwd, fn + ".kb", value); /* //sqlserver driver = "net.sourceforge.jtds.jdbc.Driver"; url = "jdbc:jtds:sqlserver://127.0.0.1:1433/beigang"; usr = "beigang"; passwd = "beigang"; testDB2(driver, url, usr, passwd, fn + ".mssql", value); driver = "oracle.jdbc.driver.OracleDriver"; url = "jdbc:oracle:thin:@127.0.0.1:1521:beigang"; usr = "beigang"; passwd = "beigang"; testDB2(driver, url, usr, passwd, fn + ".oracle", value); */ } protected static void testDB(String driver, String url, String usr, String passwd, String fn, String value) throws SQLException, Exception { Class.forName(driver); Connection conn = DriverManager.getConnection(url, usr, passwd); String sql = "update t_test set txt = ? where id=1"; PreparedStatement stmt = conn.prepareStatement(sql); stmt.setString(1, value); stmt.executeUpdate(); stmt.close(); sql = "select txt from t_test where id=1"; stmt = conn.prepareStatement(sql); ResultSet rs = stmt.executeQuery(); if (rs.next()) { writeFile(fn, rs.getString(1)); } /* * pg String sql = "update t_test set txt = '" +value +"' where id=1"; ResultSet rs; stmt.executeUpdate(sql); sql = "select txt from t_test where id=1"; rs = stmt.executeQuery(sql); if (rs.next()) { writeFile(fn, rs.getString(1)); } */ } protected static void testDB2(String driver, String url, String usr, String passwd, String fn, String value) throws SQLException, Exception { Class.forName(driver); Connection conn = DriverManager.getConnection(url, usr, passwd); String sql = "update t_test set txt = ? where id=1"; PreparedStatement stmt = conn.prepareStatement(sql); stmt.setString(1, value); stmt.executeUpdate(); stmt.close(); sql = "select txt from t_test where id=1"; stmt = conn.prepareStatement(sql); ResultSet rs = stmt.executeQuery(); if (rs.next()) { writeFile(fn, rs.getString(1)); } } private static void writeFile(String fn, String string) throws Exception { FileOutputStream fos = new FileOutputStream(fn); byte[] content = string.getBytes("UTF-8"); fos.write(content); fos.flush(); fos.close(); } private static String getValue(String fn) throws Exception { ByteArrayOutputStream baos = new ByteArrayOutputStream(1024); FileInputStream fins = new FileInputStream(fn); int b = -1; while ((b = fins.read()) != -1) { baos.write(b); } return baos.toString("UTF-8"); //return baos.toString("GBK"); } }
4 结论
结果测试确认,发现oracle、kingbase等数据库中存在同样问题,问题原因是编码"UTF8"的字符0xe383bb(就是“”)在"GBK"没有相对应物。刚开始说的oracle不出现乱码,而kingbase出现乱码是他搞错了数据库服务器端编码,oracle用来UTF-8的编码,kingbase用了GBK的编码导致。
根本原因是"UTF8"编码的字符0xe383bb(就是" ")在"GBK"没有相对应物。
5 数据库字符集和编码转换
参见我的博客http://beigang.iteye.com/admin/blogs/1254738
6
把test.htm传上来,压缩文件使test.rar,有兴趣可以实践下