验证码分辨代码(Java)
验证码识别代码(Java)
此代码为自动识别12306火车货运验证码,由于12306火车货运验证码较简单,故本代码没有做图像上的过多变换,只是做了图像灰化,识别正确率90%以上,还可以让它学习更多的验证码样本来提高识别正确率。本代码测试通过后未做过多优化。有验证码识别需求的coder们可以参考下。话不多说,上源码
import java.awt.color.ColorSpace; import java.awt.image.BufferedImage; import java.awt.image.ColorConvertOp; import java.io.File; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import javax.imageio.ImageIO; // 字库处理类 class DBOp { // 添加字库 public static void insertRec(String psChar, String psBits) throws ClassNotFoundException, SQLException { // 1 建立数据库连接 Class.forName("org.sqlite.JDBC"); Connection loConn = null; loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db"); Statement loExecuter = loConn.createStatement(); loExecuter.setQueryTimeout(1); // 2 查找Char的记录 ResultSet loChars = loExecuter .executeQuery("select * from chars where char ='" + psChar + "'"); if (loChars.next()) { int liCharId = loChars.getInt("id"); ResultSet loBitsRecs = loExecuter .executeQuery("select char_bits from bits where char_id =" + liCharId); while (loBitsRecs.next()) { String lsOldBits = loBitsRecs.getString("char_bits"); if (lsOldBits.equals(psBits)) { loConn.close(); return; } } loExecuter.executeUpdate("insert into bits values(" + liCharId + ",'" + psBits + "')"); loConn.close(); } else { loConn.close(); throw new SQLException("数据库格式错误"); } } // 获取字库内所有字的bit码 public static ArrayList<String> getAllBits(int piOffset, int piLimit) throws ClassNotFoundException, SQLException { // 1 建立数据库连接 Class.forName("org.sqlite.JDBC"); Connection loConn = null; loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db"); Statement loExecuter = loConn.createStatement(); loExecuter.setQueryTimeout(1); // 2 获取Bits ArrayList<String> loResults = new ArrayList<String>(); ResultSet loBits = loExecuter .executeQuery("select char_bits from bits limit " + piLimit + " offset " + piOffset); while (loBits.next()) loResults.add(loBits.getString("char_bits")); loConn.close(); return loResults; } // 根据字的bit码获取字 public static String getCharByBits(String psBits) throws ClassNotFoundException, SQLException { // 1 建立数据库连接 Class.forName("org.sqlite.JDBC"); Connection loConn = null; loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db"); Statement loExecuter = loConn.createStatement(); loExecuter.setQueryTimeout(1); // 2 获取Bits ResultSet loCharIds = loExecuter .executeQuery("select char_id from bits where char_bits = '" + psBits + "'"); String lsResult = null; if (loCharIds.next()) { ResultSet loChars = loExecuter .executeQuery("select char from chars where id = " + loCharIds.getString("char_id") + ""); if (loChars.next()) lsResult = loChars.getString("char"); } loConn.close(); return lsResult; } // 获取所有CHar public static ArrayList<String> getAllChars() throws ClassNotFoundException, SQLException { // 1 建立数据库连接 Class.forName("org.sqlite.JDBC"); Connection loConn = null; loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db"); Statement loExecuter = loConn.createStatement(); loExecuter.setQueryTimeout(1); // 2 获取Char ArrayList<String> loResults = new ArrayList<String>(); ResultSet loChars = loExecuter.executeQuery("select * from chars"); while (loChars.next()) loResults.add(loChars.getString("char")); loConn.close(); return loResults; } // 根据CHar获取所有字节记录 public static ArrayList<String> getAllBitsByChar(String psChar) throws ClassNotFoundException, SQLException { // 1 建立数据库连接 Class.forName("org.sqlite.JDBC"); Connection loConn = null; loConn = DriverManager.getConnection("jdbc:sqlite:char_bits.db"); Statement loExecuter = loConn.createStatement(); loExecuter.setQueryTimeout(1); // 2 查找 ArrayList<String> loResults = new ArrayList<String>(); ResultSet loChars = loExecuter .executeQuery("select * from chars where char ='" + psChar + "'"); if (loChars.next()) { ResultSet loBitsRecs = loExecuter .executeQuery("select char_bits from bits where char_id =" + loChars.getInt("id")); while (loBitsRecs.next()) loResults.add(loBitsRecs.getString("char_bits")); } else throw new SQLException("数据库中找不到" + psChar + "的记录"); loConn.close(); return loResults; } } public class AuthRecg { private static final String PATH_IMGS = "F:\\Projects\\Test\\yanzhengma"; private static final int THRESHOLD_BIT_EFFECT = 80; private static final float THRESHOLD_COS_SAME_CHAR = 0.87f; // 验证码识别公共方法 public static String recg(BufferedImage poImg) { // 1 扫描图片 ArrayList<String> laScanChars = scan(poImg); // 2 针对每个字符,分别识别 String lsResult = ""; for (String lsCharBits : laScanChars) { String lsRecgChar = recgChar(lsCharBits); lsResult += lsRecgChar == null ? "?" : lsRecgChar; } return lsResult; } // 产生灰色图片 private static BufferedImage getGrayPicture(BufferedImage poImg) { BufferedImage loGrayPicture; int liWidth = poImg.getWidth(); int liHight = poImg.getHeight(); loGrayPicture = new BufferedImage(liWidth, liHight, BufferedImage.TYPE_3BYTE_BGR); ColorConvertOp loCco = new ColorConvertOp( ColorSpace.getInstance(ColorSpace.CS_GRAY), null); loCco.filter(poImg, loGrayPicture); return loGrayPicture; } // 获取图像bits码 private static ArrayList<String> scan(BufferedImage poImg) { ArrayList<String> loResults = new ArrayList<String>(); BufferedImage loGrayImg = getGrayPicture(poImg); int liWidth = loGrayImg.getWidth(); int liHeight = loGrayImg.getHeight(); boolean lbStarted = false; int liMinX = -1, liMinY = -1, liMaxX = -1, liMaxY = -1; ArrayList<int[]> loCharCols = new ArrayList<int[]>(); for (int x = 0; x < liWidth; x++) { int[] laCol = new int[liHeight]; for (int y = 0; y < liHeight; y++) { int liValue = poImg.getRGB(x, y) % 0x100; if (liValue < -THRESHOLD_BIT_EFFECT) { laCol[y] = 1; lbStarted = true; if (liMinX == -1 || liMinX > x) liMinX = x; if (liMinY == -1 || liMinY > y) liMinY = y; if (liMaxX == -1 || liMaxX < x) liMaxX = x; if (liMaxY == -1 || liMaxY < y) liMaxY = y; } else laCol[y] = 0; } if (lbStarted) { loCharCols.add(laCol); lbStarted = false; if (x == liWidth - 1) { String lsCharBits = ""; for (int i = liMinX; i <= liMaxX; i++) { int[] laColBits = loCharCols.get(i - liMinX); for (int j = liMinY; j <= liMaxY; j++) { lsCharBits += laColBits[j]; } } lsCharBits += "|" + (liMaxX - liMinX + 1) + "," + (liMaxY - liMinY + 1); loResults.add(lsCharBits); } } else { if (loCharCols.size() > 0) { String lsCharBits = ""; for (int i = liMinX; i <= liMaxX; i++) { int[] laColBits = loCharCols.get(i - liMinX); for (int j = liMinY; j <= liMaxY; j++) { lsCharBits += laColBits[j]; } } lsCharBits += "|" + (liMaxX - liMinX + 1) + "," + (liMaxY - liMinY + 1); loResults.add(lsCharBits); liMinX = -1; liMinY = -1; liMaxX = -1; liMaxY = -1; loCharCols = new ArrayList<int[]>(); } } } return loResults; } // 字库学习样本 public static void study(BufferedImage poImg, String psChars) { ArrayList<String> loResults = scan(poImg); if (loResults.size() == psChars.length()) { char[] laChars = psChars.toCharArray(); for (int i = 0; i < laChars.length; i++) try { DBOp.insertRec(String.valueOf(laChars[i]), loResults.get(i)); } catch (SQLException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } } // 向量余弦值计算 private static float calcCos(char[] paBits1, char[] paBits2) { float lfMolecule = 0; // 两个向量内积 int liLen1 = 0, liLen2 = 0; // 两个向量的长度 for (int i = 0; i < paBits1.length; i++) { int liBit1 = Integer.parseInt("" + paBits1[i]); int liBit2 = Integer.parseInt("" + paBits2[i]); lfMolecule += liBit1 & liBit2; liLen1 += liBit1; liLen2 += liBit2; } float lfLen1 = (float) Math.sqrt(liLen1); float lfLen2 = (float) Math.sqrt(liLen2); float lfDenominator = lfLen1 * lfLen2; return lfDenominator == 0 ? 0 : lfMolecule / lfDenominator; } // 识别单一字 private static String recgChar(String psBits) { String[] lsTwoParts = psBits.split("\\|"); int liOffset = 0, liLimit = 50; while (true) { ArrayList<String> laBits = null; try { laBits = DBOp.getAllBits(liOffset, liLimit); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } for (String lsBits : laBits) { String[] lsParts = lsBits.split("\\|"); if (lsTwoParts[1].equals(lsParts[1])) { if (calcCos(lsTwoParts[0].toCharArray(), lsParts[0].toCharArray()) >= THRESHOLD_COS_SAME_CHAR) try { return DBOp.getCharByBits(lsBits); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } } } if (laBits.size() < liLimit) break; liOffset += liLimit; } return null; } public static void main(String[] args) throws IOException, ClassNotFoundException { // 1 给出一些样本,供字库学习 String[] laPics = new String[201]; for (int i = 0; i < 201; i++) laPics[i] = "untitled" + i + ".png"; String[] loStrs = new String[] { "piade", "eusts", "tafe", "snund", "baips", "gike", "fenes", "lein", "caws", "falo", "abys", "aift", "wiin", "hoeds", "pise", "hiker", "opes", "nain", "tene", "nased", "loop", "eaips", "camp", "traly", "hate", "noped", "coll", "rirs", "bolk", "modid", "thre", "knder", "bots", "safer", "thrds", "flins", "coory", "ilper", "juyer", "goll", "soar", "foen", "dewls", "slme", "flows", "baoks", "plugh", "tames", "lorm", "boler", "wift", "feded", "knfer", "lark", "chat", "tots", "barst", "ouard", "sots", "ouing", "neory", "bave", "buab", "baces", "mebar", "baof", "badly", "baast", "bages", "baed", "baue", "bager", "bogs", "baled", "beree", "balds", "baamp", "baugh", "bater", "beart", "bere", "caed", "daick", "cuual", "icer", "muack", "cave", "coeed", "ferce", "costs", "ceam", "clen", "caer", "cast", "wace", "coep", "paced", "half", "bofts", "loft", "fimy", "maaf", "foeam", "falys", "bafes", "soght", "yege", "args", "kiged", "geman", "stge", "geugh", "baing", "goner", "gocer", "gird", "gorns", "thger", "cluth", "hern", "thts", "derch", "hook", "kixed", "haown", "seach", "sath", "geve", "soaly", "wames", "yaows", "vies", "marm", "trows", "yased", "zoink", "liys", "evss", "soys", "tayed", "clove", "woows", "jute", "voled", "fewns", "saow", "roxes", "wiyal", "roves", "fiyed", "wigs", "waxes", "twter", "evrk", "evee", "piys", "voes", "zoced", "werry", "velps", "eyns", "evce", "vikes", "ovat", "guve", "slfe", "taes", "usice", "jaade", "knva", "saar", "cazac", "bems", "hafe", "trons", "uslp", "sexed", "faed", "tild", "toow", "toar", "drall", "hazes", "yaad", "seubt", "strns", "buap", "redly", "shgs", "sapt", "kins", "buee", "famed", "coack", "hamy", "puder", "puod", "jaons", "jarks", "poate", "thlks" }; for (int i = 0; i < 201; i++) { System.out.println(i); study(ImageIO.read(new File(PATH_IMGS + "\\untitled" + i + ".png")), loStrs[i]); } // 2 获取11个验证码,进行识别 for (int i = 0; i < 11; i++) System.out.println(recg(ImageIO.read(new File( "F:\\Projects\\Test\\tests\\untitled" + i + ".png")))); } }
字库采用SQLITE数据库存储,表结构如下:
训练学习样本201个验证码图片:
测试图片:
执行结果: