简单图形验证码的识别
2007-12-23 12:28:29 来源:WEB开发网核心提示:文章来源:http://zzzhc.spaces.MSN.com/blog/cns!3905D34B10C3C381!129.entry 对于简单的图形验证码(字体规则,没有杂点或杂点容易过滤掉),用模板匹配的方式可以比较容易地识别出来.0.图片黑白化,用1表示有字的像素,0表示无字的像素1.字块分隔,将图片分隔成只包
文章来源:
http://zzzhc.spaces.MSN.com/blog/cns!3905D34B10C3C381!129.entry
对于简单的图形验证码(字体规则,没有杂点或杂点容易过滤掉),
用模板匹配的方式可以比较容易地识别出来.
0.图片黑白化,用1表示有字的像素,0表示无字的像素
1.字块分隔,将图片分隔成只包含成单字的最小块
2.生成模板,将字块与字符关联
3.识别,将新图片分块并与模板匹配
java(jdk1.5)实现:
//先运行TemplateCreator创建模板,再运行Recognize2识别
//图片数据表示,也用来表示字块 package pay365; import java.awt.image.BufferedImage; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; public class ImageData { public int[][] data; public int w; public int h; public char code; public ImageData() { } public ImageData(BufferedImage bi) { this(bi,new WhiteFilter()); } public ImageData(BufferedImage bi,Filter filter) { h = bi.getHeight(); w = bi.getWidth(); data = new int[h][w]; for (int i = 0; i < h; i++) { for (int j = 0; j < w; j++) { int p = bi.getRGB(j, i); data[i][j] = p; } } filter.doFilter(data); } public ImageData[] split() { ArrayList list = new ArrayList(); ImageIterator ite = new ImageIterator(this); while (ite.hasNext()) { list.add(ite.next()); } return (ImageData[]) list.toArray(new ImageData[0]); } int skipEmpty(int begin, boolean isX, int value) { if (isX) { for (int i = begin; i < w; i++) { for (int j = 0; j < h; j++) { if (data[j][i] != value) { return i; } } } return -1; } else { for (int i = begin; i < h; i++) { for (int j = 0; j < w; j++) { if (data[i][j] != value) { return i; } } } return -1; } } int skipEntity(int begin, boolean isX, int value) { if (isX) { for (int i = begin; i < w; i++) { for (int j = 0; j < h; j++) { if (data[j][i] == value) { break; } if (j == h - 1) return i; } } return -1; } else { for (int i = begin; i < h; i++) {
for (int j = 0; j < w; j++) { if (data[i][j] == value) { break; } if (j == w - 1) return i; } } return -1; } } class ImageIterator implements Iterator { int x; ImageData ia; ImageData next; public ImageIterator(ImageData ia) { this.ia = ia; } public boolean hasNext() { if (next != null) return true; next = getNext(); return next != null; } ImageData getNext() { int x1 = skipEmpty(x, true, 0); if (x1 == -1) { return null; } int x2 = skipEntity(x1, true, 1); if (x2 == -1) { x2 = w; } x = x2; int y1 = skipEmpty(0, false, 0); if (y1 == -1) return null; int y2 = skipEntity(y1, false, 1); if (y2 == -1) y2 = h; return ia.clone(x1, y1, x2 - x1, y2 - y1); } public Object next() { ImageData temp = next; next = null; return temp; } public void remove() { } } ImageData clone(int x, int y, int w0, int h0) { ImageData ia = new ImageData(); ia.w = w0; ia.h = h0; ia.data = new int[ia.h][ia.w]; for (int i = 0; i < h0; i++) { for (int j = 0; j < w0; j++) { ia.data[i][j] = data[i + y][j + x]; } } return ia; } public void show() { System.out.PRintln(); for (int i = 0; i < h; i++) { for (int j = 0; j < w; j++) { System.out.print((data[i][j] == 1 ? "1" : " ") + ""); } System.out.println(); } System.out.println(); } public int hashCode() { int code = w ^ h; int count = 0; for (int i = 0; i < h; i++) { for (int j = 0; j < w; j++) { if (data[i][j] == 1) count++; } } code ^= count; return code; } public boolean equals(Object obj) { if (this == obj) { return true; } if (obj instanceof ImageData) { ImageData o = (ImageData) obj; if (o.h != h) return false; if (o.w != w) return false; for (int i = 0; i < h; i++) { for (int j = 0; j < w; j++) { if (o.data[i][j] != data[i][j]) return false; } } return true; } else { return false;
} } public static ImageData[] decodeFromFile(String path) throws IOException { BufferedReader reader = new BufferedReader(new FileReader( new File(path))); String line; ArrayList list = new ArrayList(); while ((line = reader.readLine()) != null) { ImageData ia = decode(line); if (ia != null) { list.add(ia); } } return (ImageData[]) list.toArray(new ImageData[0]); } public static ImageData decode(String s) { String[] ss = s.split("\\,", 4); if (ss.length != 4) return null; if (ss[0].length() != 1) return null; ImageData ia = new ImageData(); ia.code = ss[0].charAt(0); ia.w = Integer.parseInt(ss[1]); ia.h = Integer.parseInt(ss[2]); if (ss[3].length() != ia.w * ia.h) { return null; } ia.data = new int[ia.h][ia.w]; for (int i = 0; i < ia.h; i++) { for (int j = 0; j < ia.w; j++) { if (ss[3].charAt(i * ia.w + j) =='1') { ia.data[i][j] = 1; } else { ia.data[i][j] = 0; } } } return ia; } public String encode() { StringBuffer sb = new StringBuffer(); sb.append(code).append(","); sb.append(w).append(","); sb.append(h).append(","); for (int i = 0; i < h; i++) { for (int j = 0; j < w; j++) { if (data[i][j] == 1) { sb.append('1'); } else { sb.append('0'); } } } return sb.toString(); } } //像素过滤接口 package pay365; public interface Filter { void doFilter(int[][] data); } // package pay365; public abstract class AbstractFilter implements Filter { public void doFilter(int[][] data) { int h = data.length; if (h<=0) return; int w = data[0].length; if (w<=0) return ; for(int i=0;i< h;i++) { for(int j=0;j< w;j++) { data[i][j] = filter(data[i][j]); } } } protected abstract int filter(int p); } //过滤csdn验证码的过滤器 package pay365; public class CsdnFilter extends AbstractFilter { protected int filter(int p) { return isNotWhite(p)?1:0; } private boolean isNotWhite(int p) { boolean b = (p & 0×0ff) == 255 && (p >> 8 & 0×0ff) == 255 && (p >> 16 & 0xff) == 255; return !b; } } //过滤前景色为白色的过滤器 package pay365; public class WhiteFilter extends AbstractFilter { protected int filter(int p) { if (isWhite(p)) { return 1; } else { return 0;
} } private boolean isWhite(int p) { return (p & 0x0ff) > 240 && (p >> 8 & 0x0ff) > 240 && (p >> 16 & 0xff) > 240; } } //模板创建类 package pay365; import java.awt.image.BufferedImage; import java.io.BufferedReader; import java.io.File; import java.io.InputStreamReader; import java.io.PrintWriter; import java.net.URL; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import javax.imageio.ImageIO; public class TemplateCreator { /** * @param args */ public static void main(String[] args) throws Exception { Set set = new HashSet(); String url = "http://passport.csdn.net/member/ShowEXPwd.aspx"; String filterClazz = "pay365.CsdnFilter"; if (args.length>=1) { url = args[0]; } if (args.length>=2) { filterClazz = args[1]; } Filter csdnFilter = (Filter) Class.forName(filterClazz).newInstance(); for (int i = 1; i < 10; i++) { URL u = new URL(url); BufferedImage bi = ImageIO.read(u); ImageData ia2 = new ImageData(bi,csdnFilter); ImageData[] ii = ia2.split(); for (int x = 0; x < ii.length; x++) { ImageData imageArr = ii[x]; set.add(imageArr); } // set.addAll(Arrays.asList(ia2.split())); } System.out.println(set.size()); for (Iterator iter = set.iterator(); iter.hasNext();) { ImageData ele = (ImageData) iter.next(); ele.show(); System.out.print("char:"); String s = readLine(); if (s.length() == 1) { ele.code = s.charAt(0); } } PrintWriter pw = new PrintWriter(new File("template.data")); for (Iterator iter = set.iterator(); iter.hasNext();) { ImageData ele = (ImageData) iter.next(); pw.println(ele.encode()); } pw.flush(); pw.close(); } private static BufferedReader reader = new BufferedReader( new InputStreamReader(System.in)); private static String readLine() { try { return reader.readLine(); } catch (Exception e) { e.printStackTrace(); return ""; } } } //识别类 package pay365; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import javax.imageio.ImageIO; public class Recognize2 { static String url = "http://passport.csdn.net/member/ShowExPwd.aspx"; static Filter filter;
public static void main(String[] args) throws Exception { if (args.length >= 1) { url = args[0]; } String filterClazz = "pay365.CsdnFilter"; if (args.length >= 2) { filterClazz = args[1]; } filter = (Filter) Class.forName(filterClazz).newInstance(); int total = 10; int count = 0; for (int i = 0; i < total; i++) { boolean b = recognize(i); if (b) count++; } System.out.println("rate:" + (count * 1.0 / total * 100) + "%100"); } /** * @throws IOException */ private static boolean recognize(int num) throws IOException { BufferedImage bi = ImageIO.read(new URL(url)); ImageIO.write(bi,"png",new File(num+".png")); ImageData ia2 = new ImageData(bi, filter); ImageData[] ii = ia2.split(); ArrayList list = new ArrayList(); ImageData[] template = ImageData.decodeFromFile("template.data"); HashMap map = new HashMap(); for (int i = 0; i < template.length; i++) { map.put(template[i], new Character(template[i].code)); } for (int x = 0; x < ii.length; x++) { ImageData imageArr = ii[x]; if (imageArr.w > 15) continue; Character c = (Character) map.get(imageArr); if (c != null) { list.add(c); } } String s = ""; System.out.print(num + ":"); for (Iterator iter = list.iterator(); iter.hasNext();) { Character c = (Character) iter.next(); s += c; System.out.print(c); } System.out.println(); return s.length() != 0; } }
(出处:http://www.cncms.com)
[]
更多精彩
赞助商链接