PHP图形(验证码)识别

因为一个项目的需要,需要验证码的识别,以前并没有接触过,后来研究了一下,做了个简单的PHP脚本(文章中的仅是做了简单整理,不是很严谨),基本上满足了现在的需要,来分享一下,适合于基础选手。

验证码是这样的没有背景色,并且只有在深色的背景下才能看清,一共只有4个数字。

看了部分资料知道了思路,就很好办了。

1、图形按坐标转换为0,1

2、去除干扰(这个图片里没有)

3、切割

4、对比(主要是字典强大,用到的主要函数就是similar_text)

有了这四步就可以开工了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
public $keyvalues;
public $hexarray = array();
public $pwidth;
public function __construct(){
for($i=0;$i<256;$i++){
$this->hexarray[$i] = dechex($i);
}
$this->keyvalues = array(
array(0,'001110001100100100001010000101000010100001010000101100100011100'),
array(0,'111000010010000000100000010000001000000100000010010010001110000'),
array(0,'011100011001001000010100001010000101000010100001011001000111000'),
array(0,'111000100100000010000010000010000010000010100100111000'),
array(1,'000010000011000010100000010000001000000100000010000001000000100'),
array(1,'000001000011000101000001000001000001000001000001000001'),
array(1,'100000100000110000101000000110000011000001100000110000010000001'),
array(1,'001000001100001010001001000100100010010001001000100100010010000'),
array(2,'011110000001100000010000001000001000011000011000001000001111110'),
array(2,'011110000000110000000100000001000000100000110000011000000100000011111100'),
array(2,'011110000011000001000001000010001100011000010000111111'),
array(2,'001111000011001100110000100000000100000001100000001000000010000000100000001000000011000000111111100'),
array(3,'011100010001000000100001100000011000000100000010010010000111000'),
array(3,'001110001000100000010000110000001100000010000001001001000011100'),
array(3,'000111000100010000001000011000000110000001000000100100100001110'),
array(3,'111000000100000100011000001100000100000100001000111000'),
array(3,'011100001000100100001001001100010001100100001001000010011001000101110000'),
array(4,'000010000011000010100001010001001001000100111111000001000000100'),
array(4,'000010000110001010001010010010100010111111000010000010'),
array(5,'011111001000000100000111110010001100000010000001011001100111100'),
array(5,'000111100010000001000001111100100010000000000000000110010001111'),
array(5,'011111010000010000111110100011000001000001110011011110'),
array(6,'000001100010000100000100000011111101000001100001001001000010000'),
array(6,'000111000100110100000010111001100110100001010000101100100011110'),
array(6,'011100010011000000000011100010011000000100000010010010001111000'),
array(6,'0001110000110010011000110100000001011100011000110100000101000001010000010010001000011100'),
array(7,'111111100000010000010000010000010000001000001000001000001000000'),
array(7,'011111100000100000010000010000001000000100000100000010000001000'),
array(7,'111111000010000010000100000100000100001000001000001000'),
array(8,'011110010000101000010011110011001101000010100001011001100111100'),
array(9,'001110001100100100001010000101100110011101000000101000100011100'),
array(9,'0111100011000100100001101000001010000010110001100111101000000010100001101100110001111000'),
array(9,'001111000011000100010000110010000010010000010011000110001111010000000010010000110011001100001111000'),
);
}
//获取图片
public function getImage($url){
if(!$url)return false;
$dot = explode(".",$url);
if($dot[count($dot)-1]=="gif"){
$img = imagecreatefromgif($url);
}
else if($dot[count($dot)-1]=="png"){
$img = imagecreatefrompng($url);
}
else if($dot[count($dot)-1]=="jpg"){
$img = imagecreatefromjpeg($url);
}else{
$img = false;
}
return $img;
}
//转为01
public function imgTobin($image){
$data = array();
for($y = 0; $y < imagesy($image); $y++)
{
for($x = 0; $x < imagesx($image); $x++)
{
$colors_reg = imagecolorsforindex($image, imagecolorat($image, $x, $y));
if( $this->hexarray[ $colors_reg['red'] ].$this->hexarray[ $colors_reg['green'] ].$this->hexarray[ $colors_reg['blue'] ]=='abacad'){
$data[$y][$x]=0;
}else{
if($this->hexarray[ $colors_reg['red'] ].$this->hexarray[ $colors_reg['green'] ].$this->hexarray[ $colors_reg['blue'] ]>"969696"){
$data[$y][$x]=1;
}else{
$data[$y][$x]=0;
}
}
}
}
return $data;
}
//获取区域
public function getArea($data){
foreach($data as $lkey => $line){
foreach($line as $fkey => $filed){
if($line[$fkey-1]!=$line[$fkey]){
$sprx[] = "$fkey";
$spry[] = "$lkey";
}
}
}
sort($sprx);
sort($spry);
$sqare['min']['x'] = $sprx[0];
$sqare['min']['y'] = $spry[0];
$sqare['max']['x'] = $sprx[count($sprx)-1];
$sqare['max']['y'] = $spry[count($spry)-1];
$this->pwidth = ($sqare['max']['x']-$sqare['min']['x'])/4;
$xx = 0;
$yy = 0;
foreach($data as $lkey => $line){
foreach($line as $fkey => $filed){
if($fkey>=$sqare['min']['x']&amp;&amp;$lkey>=$sqare['min']['y']&amp;&amp;$fkey<=$sqare['max']['x']&amp;&amp;$lkey<=$sqare['max']['y']){
$ndata[$yy][$xx]=$line[$fkey];
$xx++;
}
}
if($lkey>=$sqare['min']['y']&amp;&amp;$lkey<=$sqare['max']['y']){
$yy++;
}
}
return $ndata;
}
//切割
public function pExplode($ndata){
for($i=0;$i<4;$i++){
$x = 0;
foreach($ndata as $lkey => $line){
foreach($line as $fkey => $filed){
if($x>=$this->pwidth*$i&amp;&amp;$x<$this->pwidth*($i+1)){
$word[$i].= $line[$fkey];
}
$x++;
}
$x = 0;
}
}
return $word;
}
//对比
public function getNum($word){
$i = 0;
foreach($word as $numString)
{
$num = 0;
foreach($this->keyvalues as $value)
{
$percent=0.0;
similar_text($value[1], $numString,$percent);
$n[$i]['percent'] = intval($percent);
$n[$i]['key'] = $value[0];
$i++;
}
$percent = 0;
foreach($n as $v){
if($v['percent']>$percent){
$percent = $v['percent'];
$num = $v['key'];
}
}
unset($n);
$result.=$num;
}
return $result;
}
}
$code = new code();
$img = $code->getImage("1.gif");
if($img){
$bin = $code->imgTobin($img);
foreach($bin as $lkey => $line){
foreach($line as $fkey => $filed){
echo $line[$fkey]==1?$line[$fkey]:"0";
}
echo "\n";
}
echo "\n";
if($bin){
$are = $code->getArea($bin);
if($are){
foreach($are as $lkey => $line){
foreach($line as $fkey => $filed){
echo $line[$fkey]==1?$line[$fkey]:"0";
}
echo "\n";
}
echo "\n";
foreach($are as $lkey => $line){
foreach($line as $fkey => $filed){
echo $line[$fkey]==1?$line[$fkey]:" ";
}
echo "\n";
}
echo "\n";
$exp = $code->pExplode($are);
if($exp){
$num = $code->getNum($exp);
echo $num;
}else{
echo "Explode area error!";
}
}else{
echo "get area error!" ;
}
}else{
echo "get bin error!" ;
}
}else{
echo "img type error!" ;
}

最后的效果

这个脚本只是为了满足我的这个验证码写的,如果需要验证其他的,还需要在图形01转换,和切割上做点文章,经过几天的字典填充,现在识别率还不错。