Clover
03-24-2009, 03:01 PM
I just posted this in NS&H, but it's a proof of concept that it doesn't take much to create your own captcha cracker, for use in bots and whatnot. The captcha that's in it (slpctrl.freehostia.com/captcha.php) I'm not sure if I had completely secured that captcha or not, but I'm thinking the latter is correct. I don't feel like testing it right now, but here is the code:
<?php
/*
not the best way to do it but a very straightforward PoC.
even for this approach of 'ocr' there are still many optimizations
that could be done.. like skipping whiteblocks or doing some
preprocessing to see which rows have the most pixels and test
rows adjacent to that first...etc
*/
//note: you must have fopen wrappers w/ url support enabled for the following func to work
$captcha = imagecreatefrompng("http://slpctrl.freehostia.com/captcha.php");
imagepng($captcha, ".\\tmp.png");
//load all possible text combos
$kewlImages = array();
//different letters can compare equally.. most 'full' letters go first
$possibilities = "08bf74de123569ac";
//compares to bl's of images.. each letter - no matter the size - takes a fixed amount of iterations.
function block_compare($image, $startx, $starty, $compare) {
$maxx = imagesx($image);
$maxy = imagesy($image);
$black = imagecolorallocate($image,0,0,0);
for($y = $starty; ($y < $starty + 15 && $y < $maxy); $y++) {
for($x = $startx; ($x < $startx + 10 && $x < $maxx); $x++) {
$i = imagecolorat($image, $x, $y);
$c = imagecolorat($compare, ($x-$startx), ($y-$starty));
if($c == $black && $i != $black) return FALSE;
}
}
if($y == $maxy || $x == $maxx) return FALSE;
return TRUE;
}
//largest font to smallest
for($x = 5; $x >= 1; $x--) {
for($y = 0; $y < strlen($possibilities); $y++) {
$capture = imagecreatetruecolor(10, 15);
imagefill($capture, 0, 0, imagecolorallocate($capture, 255, 255, 255));
$black = imagecolorallocate($capture,0,0,0);
/*cheap hack fix.. small fonts are better at being big fonts than
big fonts.. need to shift index one pixel for smaller fonts*/
if($x < 4)
imagestring($capture, $x, 0, 0, $possibilities[$y], $black);
else
imagestring($capture, $x, 1, 0, $possibilities[$y], $black);
array_push($kewlImages, $capture);
}
}
$maxx = imagesx($captcha);
$maxy = imagesy($captcha);
$maxz = count($kewlImages);
$output = "";
$starty = 0;
$startz = 0;
$charfound = false;
$len = 0;
//we iterate through every (x,y) pixel coordinate and through each letter in our database
//and compare the image blocks to see if they are a match.. largest font sizes compared first
for($x = 0; $x < $maxx; $x++) {
for($y = $starty; $y < $maxy; $y++) {
$charfound = false;
for($z = $startz; $z < $maxz; $z++) {
if(block_compare($captcha, $x, $y, $kewlImages[$z])) {
$output .= ($possibilities[$z%16]);
//update limits
$starty = $y;
$maxy = $y+1;
//update fonts
$startz = ($z-($z%16));
$maxz = $startz + 16;
$len += 1;
$charfound = true;
}
if($charfound) break;
}
if($charfound) break;
if($len == 5) break;
}
if($len == 5) break;
}
echo $output . "<br>";
?>
<?php
/*
not the best way to do it but a very straightforward PoC.
even for this approach of 'ocr' there are still many optimizations
that could be done.. like skipping whiteblocks or doing some
preprocessing to see which rows have the most pixels and test
rows adjacent to that first...etc
*/
//note: you must have fopen wrappers w/ url support enabled for the following func to work
$captcha = imagecreatefrompng("http://slpctrl.freehostia.com/captcha.php");
imagepng($captcha, ".\\tmp.png");
//load all possible text combos
$kewlImages = array();
//different letters can compare equally.. most 'full' letters go first
$possibilities = "08bf74de123569ac";
//compares to bl's of images.. each letter - no matter the size - takes a fixed amount of iterations.
function block_compare($image, $startx, $starty, $compare) {
$maxx = imagesx($image);
$maxy = imagesy($image);
$black = imagecolorallocate($image,0,0,0);
for($y = $starty; ($y < $starty + 15 && $y < $maxy); $y++) {
for($x = $startx; ($x < $startx + 10 && $x < $maxx); $x++) {
$i = imagecolorat($image, $x, $y);
$c = imagecolorat($compare, ($x-$startx), ($y-$starty));
if($c == $black && $i != $black) return FALSE;
}
}
if($y == $maxy || $x == $maxx) return FALSE;
return TRUE;
}
//largest font to smallest
for($x = 5; $x >= 1; $x--) {
for($y = 0; $y < strlen($possibilities); $y++) {
$capture = imagecreatetruecolor(10, 15);
imagefill($capture, 0, 0, imagecolorallocate($capture, 255, 255, 255));
$black = imagecolorallocate($capture,0,0,0);
/*cheap hack fix.. small fonts are better at being big fonts than
big fonts.. need to shift index one pixel for smaller fonts*/
if($x < 4)
imagestring($capture, $x, 0, 0, $possibilities[$y], $black);
else
imagestring($capture, $x, 1, 0, $possibilities[$y], $black);
array_push($kewlImages, $capture);
}
}
$maxx = imagesx($captcha);
$maxy = imagesy($captcha);
$maxz = count($kewlImages);
$output = "";
$starty = 0;
$startz = 0;
$charfound = false;
$len = 0;
//we iterate through every (x,y) pixel coordinate and through each letter in our database
//and compare the image blocks to see if they are a match.. largest font sizes compared first
for($x = 0; $x < $maxx; $x++) {
for($y = $starty; $y < $maxy; $y++) {
$charfound = false;
for($z = $startz; $z < $maxz; $z++) {
if(block_compare($captcha, $x, $y, $kewlImages[$z])) {
$output .= ($possibilities[$z%16]);
//update limits
$starty = $y;
$maxy = $y+1;
//update fonts
$startz = ($z-($z%16));
$maxz = $startz + 16;
$len += 1;
$charfound = true;
}
if($charfound) break;
}
if($charfound) break;
if($len == 5) break;
}
if($len == 5) break;
}
echo $output . "<br>";
?>