`
lanlansnss
  • 浏览: 44458 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

php OCR图片

    博客分类:
  • php
 
阅读更多
之前做了一个数据的采集。 有一个需求,是要将图片转化为字符串。 这个时候就需要php对图片的ocr 原理很简单, 就是对图片分解成0,1矩阵,然后根据特征, 转化成相应的字符串
$src  = 'shop_addr/944846_21.gif';
$ocr  = new ocr;
$text = $ocr->fontOcr($src);
echo "图片文件:<img src='{$src}'><br>";
if($text=='' or strstr($text, 'null')) {
	echo "<b style='color:#ea0000'>识别失败:{$text}</b>";
} else {
	echo "<b style='color:#009900'>识别成功:{$text}</b>";
}
print_r($ocr->bitMap);

class ocr
{
	public  $bitMap       = array();
	private $marginTop    = 0;
	private $marginBottom = 0;
	private $OCR          = array(
		array('1,1,1', '-'),
		array('011111110,100000001,100000001,100000001,011111110', '0'),
		array('010000001,111111111,000000001',                     '1'),
		array('011000001,100000011,100000101,100011001,011100011', '2'),
		array('000000001,010000001,100010001,100110010,011001100', '3'),
		array('000001100,000110100,011000100,111111111,000000100', '4'),
		array('000000001,111100001,100100001,100100010,100011100', '5'),
		array('001111110,010100001,100100001,100100001,100011110', '6'),
		array('110000000,100000000,100000111,100111000,111000000', '7'),
		array('011101110,100010001,100010001,100010001,011101110', '8'),
		array('011110001,100001001,100001001,100001010,011111100', '9'),
		array('11101011001,10101110001,10101011111,11101000001,00111111110,00100000001,00101001110,11111110000,10101011111,10101010001,00110000011', '號'),
		array('00100110000,11111111111,00101000000,00010001001,01111101001,01010101101,01010111011,11111101010,01010101101,01111101001,00010001000', '樓'),
		array('00000000011,00100001100,00100000010,00100000101,10100001001,01100010001,00100100001,00101000001,00110000001,00100000010,00000000010', '之'),
		array('00001001000,01001001000,01001010000,11111111111,01001010101,01001010101,01001010101,11111011101,01001100001,01001010111,00001001000', '巷'),
		array('00001001001,10001001001,10101001010,10101111100,10101001000,11111001000,10101001000,10101111111,10101001000,10001001000,00001001000', '弄'),
		array('00101001001,01101110110,00110011000,11111011100,00110010100,01101111111,00100010100,11111111111,10001000000,10110100100,11000011100', '鄰'),
		array('01100000001,01010010101,01010010101,01010110101,01011010101,11010011111,01010110101,01010100101,01011100101,01010110101,01100000001', '室'),
	);

	public function fontOcr($src)
	{
		$this->getBitMap($src);
		return $this->getRes();
	}

	private function getBitMap($src)
	{
		$info   = getimagesize($src);
		$width  = $info[0];
		$height = $info[1];
		$handle = @ImageCreateFromgif($src);
		if(!$handle) { return; }

		$blank = false;
		$num   = 0;
		$this->marginTop    = $height;
		$this->marginBottom = 0;

		for($x=1; $x<$width; $x++) {
			$bits = array();
			for($y=0; $y<$height; $y++) {
				$bits['y_'.$y] = (imagecolorat($handle, $x, $y) == 255) ? '0' : '1';
				if($bits['y_'.$y]=='1') {
					if($y<$this->marginTop)    { $this->marginTop    = $y; }
					if($y>$this->marginBottom) { $this->marginBottom = $y; }
				}
			}

			if(in_array('1', $bits)) {
				if($blank) { $num++; }
				$this->bitMap['f_'.$num]['x_'.$x] = $bits;
				$blank = false;
			} else {
				$blank = true;
			}
		}
	}

	private function getRes()
	{
		$res = array();
		foreach($this->bitMap as $font) {
			$temp = array();
			foreach($font as $line) {
				$s = '';
				for($i=$this->marginTop; $i<=$this->marginBottom; $i++) {
					$s .= $line['y_'.$i];
				}
				$temp[] = $s;
			}

			$bitStr = implode(',', $temp);
			$bitStr = $this->clean($bitStr);
			$value  = 'null';
			foreach($this->OCR as $o) {
				if($bitStr == $o[0]) { $value = $o[1]; }
			}

			$res[] = $value;
		}
		return implode('', $res);
	}

	private function clean($string)
	{
		$temp  = ','.$string.',';
		$times = substr_count($temp, ',')-1;
		for($i=20; $i>0; $i--) {
			$c = str_repeat('0', $i);
			preg_match_all("/,".$c."/", $temp, $r);
			if($r and $r[0] and  count($r[0])==$times) {
				$temp = str_replace(','.$c, ',', $temp);
			}

			preg_match_all("/".$c.",/", $temp, $r);
			if($r and $r[0] and  count($r[0])==$times) {
				$temp = str_replace($c.',', ',', $temp);
			}
		}

		return substr($temp, 1, -1);
	}
}
分享到:
评论
1 楼 ycdyx 2015-09-01  
试了,没用~

相关推荐

Global site tag (gtag.js) - Google Analytics