titaneric · July 1, 2019 06:33
diff --git a/OCR.py b/OCR.py
 from selenium import webdriver
 from selenium.webdriver.common.keys import Keys
 import urllib.request
 from PIL import Image
 from selenium.webdriver.support.select import Select
 from PIL import ImageEnhance
 import matplotlib.pyplot as plt
 import cv2
 import numpy as np
 import urllib.request
 from lxml import etree
 import os
 import shutil
 import sqlite3
















 ## get the image source
 #img = driver.find_element_by_xpath('/html/body/img')
 dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')

 for file in os.listdir(dir_path):
    if file.endswith('.png'):
        os.remove(dir_path + "\\" + file)

 #dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')

 #for file in os.listdir(dir_path):
 #    if file.endswith('.png'):
 #        os.remove(dir_path + "\\" + file)

 src = 'https://isdna1.yzu.edu.tw/CnStdSel/SelRandomImage.aspx'
 #download the image
 urllib.request.urlretrieve(src, "captcha.png")
 #driver.close()
 Im = Image.open('captcha.png')
 width, height = Im.size
 quartersizedIm = Im.resize((int(width * 6), int(height * 6)))
 quartersizedIm.save('foursized.png')
 im = Image.open('foursized.png')

 xsize, ysize = im.size



 	
 
 	
 

 enhancer = ImageEnhance.Contrast(im)
 im = enhancer.enhance(3.0)
 enhancer = ImageEnhance.Brightness(im)
 im = enhancer.enhance(10.0)
 xsize, ysize = im.size
 im.save('enhance.png')


 pix = im.load()
 for y in range(0, ysize):
    for x in range(0, xsize):
        if pix[x , y][1] != 255 and pix[x , y][2] == 255 and pix[x , y][3] == 255:
            pix[x , y] = (0 , 0 , 0 , 255)
        else:
            pix[x , y] = (255 , 255 , 255 , 255)

 im.save('new.png')

 kernel = np.ones((1, 1), np.uint16)
 image = cv2.imread('new.png')
 erosion = cv2.erode(image, kernel, iterations = 1)
 blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
 edged = cv2.Canny(blurred, 30, 150)
 plt.imshow(edged)
 #plt.show()
 dilation = cv2.dilate(edged, kernel, iterations = 1)
 plt.imshow(dilation)
 #plt.show()
 cv2.imwrite('process.png', dilation)
 img1 = cv2.imread('new.png')
 img2 = cv2.imread('process.png')
 img1 = cv2.bitwise_not(img1)
 final = cv2.add(img1, img2)

 #erosion = cv2.erode(final, kernel, iterations = 1)
 #blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
 final = cv2.bitwise_not(final)
 plt.imshow(final)
 #plt.show()
 #cv2.imwrite('final.png', final)
 gray = cv2.cvtColor(final,cv2.COLOR_BGR2GRAY)
 kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
 res = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
 plt.imshow(res)
 #plt.show()
 cv2.imwrite('perfect.png', res)

 def mse(imgA, imgB):
    err = np.sum(imgA.astype("float") - imgB.astype("float") ** 2)
    err /= float(imgA.shape[0] * imgA.shape[1])
    return err
 def findBorder(axis, color, leng, img):
    
    findNonTextColor = False
    black = [0, 0, 0]
    white = [255, 255, 255]
    
    
    textBorder = axis + 1
    
    
    textColorNum = leng
    cntWhite = 0

    while  (not findNonTextColor):
        #cntNum = 0
        #grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        for i in range(leng):#decideRange[0], decideRange[1]):
            if leng == img.shape[0]:
                coordinate = i, axis
            else:
                coordinate = axis, i

            # if column of image is black
            
            if (img.item(coordinate[0], coordinate[1], 0) == color[0]) and (img.item(coordinate[0], coordinate[1], 1) == color[1]) and (img.item(coordinate[0], coordinate[1], 2) == color[2]):
                
                #cntNum += 1
                
                if color == black:
                    findNonTextColor = True
                    textBorder = axis
                    break
                else:
                    cntWhite += 1
        if cntWhite == leng:
            textBorder = axis
            findNonTextColor = True
            break
        axis += 1
        
        cntWhite = 0
        
    return textBorder
 def splitWord(img, currentCol):
    height, width = img.shape[:2]
    black = [0, 0, 0]
    white = [255, 255, 255]    

    line = currentCol + 1
    leftSide = findBorder(line, black, height, img)
    
    line = leftSide
    rightSide = findBorder(line, white,height, img)
    line = 0
    topSide = findBorder(line, black, width, img)
    
    line = topSide
    bottomSide = findBorder(line, white, width, img)

    charImg = img[topSide:bottomSide, leftSide:rightSide]
    return charImg, rightSide
 def rotateImage(image, angle):
    (h, w) = image.shape[:2]
    center = (w / 2, h / 2)
 
    
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h), borderMode = cv2.BORDER_CONSTANT, borderValue = (255, 255, 255))
    

    return rotated
 def makeInnerBorder(img, left, right, top, bottom, color):
    height, width = img.shape[:2]
    for i in range(height):
        for l in range(left):
            img[i, l] = color
        for r in range(right):
            img[i, r] = color
    for j in range(width):
        for t in range(top):
            img[t, j] = color
        for b in range(bottom):
            img[b, j] = color
    return img
 def verticalProjection(img):
    height, width = img.shape[:2]
    projDict = dict()
    for i in range(width):
        projDict[i] = 0
    for h in range(height):
        
        for w in range(width):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                projDict[w] += 1

    
    min_value = min(projDict.values())
    result = [key for key, value in projDict.items() if value == min_value]
    while min(result) < 45:
        projDict.pop(min(result))
        if len(projDict) != 0:
            min_value = min(projDict.values())
            result = [key for key, value in projDict.items() if value == min_value]
    if min(result) < 45:
        return (width - 1)
    else:
        return min(result)

 def findRectangleBorder(img):
    height, width = img.shape[:2]
    #find the left border
    leftBorder = 0
    for w in range(width):
        for h in range(height):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                leftBorder = w
                break
    rightBorder = width - 1

    #find the right border
    for w in range(width - 1, 1, -1):
        for h in range(height):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                rightBorder = w
                break
    topBorder = 0
    #find the top border
    for h in range(height):
        for w in range(width):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                topBorder = h
                break

    #find the bottom border
    bottomBorder = height - 1
    for h in range(height - 1, 1, -1):
        for w in range(width):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                bottomBorder = h
                break

    cutImg = img[bottomBorder:topBorder, rightBorder:leftBorder]
    return cutImg

 def elementaryRevise(img):
    img = findRectangleBorder(img)
    img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value = (255, 255, 255))
    img = cv2.resize(img, (60 ,80))
    return img

 def countRate(file):
    img = cv2.imread(file)
    height, width = img.shape[:2]
    count = 0
    for h in range(height):
        for w in range(width):
            if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
                count += 1
    if (count / img.size) < 0.05:
        os.remove(file)
    
 def segmentation(img):
    cntChar = 0
    currentCol = 0
    rotateImg = rotateImage(img, 180)
    #cv2.imwrite('rotate.png', rotateImg)
    rotateImg = makeInnerBorder(rotateImg, 1, 1, 1, 1, [255, 255, 255])
    
    
    right = findBorder(0, [0,0,0], rotateImg.shape[0], rotateImg)
    right = img.shape[1] - right + 2
    #print(right)
    while abs(currentCol - right) > 2:
        #print(currentCol)
        image, currentCol = splitWord(img, currentCol)
        image = cv2.copyMakeBorder(image, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
        #image = elementaryRevise(image)
        if image.shape[1] < 200:
            image = elementaryRevise(image)
            cv2.imwrite('{cntChar}.png'.format(**locals()), image)
            cntChar += 1
        
        else:
            cntDict = dict()
            for i in range(1, 21):
                rotateChar = rotateImage(image, i)
                rotateChar = findRectangleBorder(rotateChar)
                #cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)
               
                cntDict[i] = verticalProjection(rotateChar)
            bestAngle = min(cntDict, key = cntDict.get)
            rotatedImg = rotateImage(image, bestAngle)
            rotatedImg = findRectangleBorder(rotatedImg)
            min_cut = cntDict[bestAngle]
            cv2.imwrite('rotated_{bestAngle}_degree.png'.format(**locals()), rotatedImg)
            new_cut = rotatedImg
            cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)
            
            #new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]
            
            new_cut = rotateImage(new_cut, 360 - bestAngle)
            new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
            new_cut = elementaryRevise(new_cut)
            cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
            cntChar += 1            
            rotatedImg = cv2.imread('rotated_{bestAngle}_degree.png'.format(**locals()))
            remainImg = rotatedImg
            cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
            #remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
            #plt.imshow(remainImg)
            #plt.show()
            
            #the real angle to rotate back need to estimate
            remainImg = rotateImage(remainImg, 360 - bestAngle)
            remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
            #deal the remaining character
            if remainImg.shape[1] < 200:
                
                remainImg = elementaryRevise(remainImg)
                cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
                cntChar += 1
            while remainImg.shape[1] > 200:
                #cv2.imwrite('remain.png', remainImg)
                #for i in range(1, 21):
                    #rotateChar = rotateImage(remainImg, i)
                    #rotateChar = findRectangleBorder(rotateChar)
                    #cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)
                rotateChar = rotateImage(remainImg, bestAngle)
                rotateChar = findRectangleBorder(rotateChar)
                cv2.imwrite('rotated_{bestAngle}.png'.format(**locals()), rotateChar)
                cntDict[bestAngle] = verticalProjection(rotateChar)
                #bestAngle = min(cntDict, key = cntDict.get)
                #rotatedImg = rotateImage(image, bestAngle)
                #rotatedImg = findRectangleBorder(rotatedImg)
                min_cut = cntDict[bestAngle]
                new_cut = rotateChar
                cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)
            
                #new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]
                
                new_cut = rotateImage(new_cut, 360 - bestAngle)
                new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
               
                new_cut = elementaryRevise(new_cut)
                cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
                cntChar += 1
                rotatedImg = cv2.imread('rotated_{bestAngle}.png'.format(**locals()))
                remainImg = rotatedImg
                #plt.imshow(remainImg)
                #plt.show()
                cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
                #remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
                #plt.imshow(remainImg)
                #plt.show()
            
            
                remainImg = rotateImage(remainImg, 360 - bestAngle)
                remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
                
                if remainImg.shape[1] < 200:
                    remainImg = elementaryRevise(remainImg)
                    cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
                    cntChar += 1

            
            #print('rotated_{bestAngle}_degree.png'.format(**locals()), bestAngle, min_cut)
        


    
    
    
        #find the least number of text color of column
        #if cntNum < textColorNum:
        #    textColorNum = cntNum




 image = cv2.imread("perfect.png")

 image = cv2.copyMakeBorder(image, 30, 30, 30, 30,cv2.BORDER_CONSTANT, value = (255, 255, 255))
 segmentation(image)
 #dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')



 for file in os.listdir(dir_path):
    if file.endswith('.png'):
        countRate(file)
        if len(file) == 5 and os.path.isfile(file) :
            shutil.move('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\' + file,'C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set\\' + file)

 dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')



 def trainData(img):
    conn = sqlite3.connect('dataset.sqlite')
    cur = conn.cursor()
    cur.execute('SELECT * FROM Data')
    min = float('inf')
    for row in cur:
        ablob = row[0]
        
        with open('test.png', 'wb') as output_file:
            output_file.write(ablob)
        dataImg = cv2.imread('test.png')
        err = mse(dataImg, img)
        if err < min:
            min = err
            char = row[1]

    return char

 def mse(img1, img2):
    err = np.sum((img1.astype('float') - img2.astype('float'))**2)
    err /= float(img1.shape[0] * img1.shape[1])
    return err

 for file in os.listdir(dir_path):
    imgFile = cv2.imread(dir_path + "\\" + file)
    plt.imshow(imgFile)
    plt.show()
    char = trainData(imgFile)
    print(char)
    judge = input()
    if judge == 'y':
        f = open(dir_path + "\\" + file, 'rb')
        ablob = f.read()
        conn = sqlite3.connect('dataset.sqlite')
        cur = conn.cursor()
    
        
        cur.execute('''
        INSERT INTO Data(img, label)
        VALUES(?, ?)''',(sqlite3.Binary(ablob), char.upper()))
        conn.commit()
        conn.close()
        f.close()

 for file in os.listdir(dir_path):
    if file.endswith('.png'):
        os.remove(dir_path + "\\" + file)



 def productTestData():
    for file in os.listdir(dir_path):
        imgFile = cv2.imread(dir_path + "\\" + file)
        plt.imshow(imgFile)
        plt.show()
        target = input("The target is ")
        if not target.isspace():
            f = open(dir_path + "\\" + file, 'rb')
            ablob = f.read()
            conn = sqlite3.connect('dataset.sqlite')
            cur = conn.cursor()
    
        
            cur.execute('''
            INSERT INTO Data(img, label)
            VALUES(?, ?)''',(sqlite3.Binary(ablob), target.upper()))
            conn.commit()
            conn.close()
            f.close()

       
        


























 #sm = browser.find_element_by_name('ibnSubmit').click()
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys
	import urllib.request
	from PIL import Image
	from selenium.webdriver.support.select import Select
	from PIL import ImageEnhance
	import matplotlib.pyplot as plt
	import cv2
	import numpy as np
	import urllib.request
	from lxml import etree
	import os
	import shutil
	import sqlite3
















	## get the image source
	#img = driver.find_element_by_xpath('/html/body/img')
	dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')

	for file in os.listdir(dir_path):
	if file.endswith('.png'):
	os.remove(dir_path + "\\" + file)

	#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')

	#for file in os.listdir(dir_path):
	# if file.endswith('.png'):
	# os.remove(dir_path + "\\" + file)

	src = 'https://isdna1.yzu.edu.tw/CnStdSel/SelRandomImage.aspx'
	#download the image
	urllib.request.urlretrieve(src, "captcha.png")
	#driver.close()
	Im = Image.open('captcha.png')
	width, height = Im.size
	quartersizedIm = Im.resize((int(width * 6), int(height * 6)))
	quartersizedIm.save('foursized.png')
	im = Image.open('foursized.png')

	xsize, ysize = im.size








	enhancer = ImageEnhance.Contrast(im)
	im = enhancer.enhance(3.0)
	enhancer = ImageEnhance.Brightness(im)
	im = enhancer.enhance(10.0)
	xsize, ysize = im.size
	im.save('enhance.png')


	pix = im.load()
	for y in range(0, ysize):
	for x in range(0, xsize):
	if pix[x , y][1] != 255 and pix[x , y][2] == 255 and pix[x , y][3] == 255:
	pix[x , y] = (0 , 0 , 0 , 255)
	else:
	pix[x , y] = (255 , 255 , 255 , 255)

	im.save('new.png')

	kernel = np.ones((1, 1), np.uint16)
	image = cv2.imread('new.png')
	erosion = cv2.erode(image, kernel, iterations = 1)
	blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
	edged = cv2.Canny(blurred, 30, 150)
	plt.imshow(edged)
	#plt.show()
	dilation = cv2.dilate(edged, kernel, iterations = 1)
	plt.imshow(dilation)
	#plt.show()
	cv2.imwrite('process.png', dilation)
	img1 = cv2.imread('new.png')
	img2 = cv2.imread('process.png')
	img1 = cv2.bitwise_not(img1)
	final = cv2.add(img1, img2)

	#erosion = cv2.erode(final, kernel, iterations = 1)
	#blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
	final = cv2.bitwise_not(final)
	plt.imshow(final)
	#plt.show()
	#cv2.imwrite('final.png', final)
	gray = cv2.cvtColor(final,cv2.COLOR_BGR2GRAY)
	kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
	res = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
	plt.imshow(res)
	#plt.show()
	cv2.imwrite('perfect.png', res)

	def mse(imgA, imgB):
	err = np.sum(imgA.astype("float") - imgB.astype("float") ** 2)
	err /= float(imgA.shape[0] * imgA.shape[1])
	return err
	def findBorder(axis, color, leng, img):

	findNonTextColor = False
	black = [0, 0, 0]
	white = [255, 255, 255]


	textBorder = axis + 1


	textColorNum = leng
	cntWhite = 0

	while (not findNonTextColor):
	#cntNum = 0
	#grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	for i in range(leng):#decideRange[0], decideRange[1]):
	if leng == img.shape[0]:
	coordinate = i, axis
	else:
	coordinate = axis, i

	# if column of image is black

	if (img.item(coordinate[0], coordinate[1], 0) == color[0]) and (img.item(coordinate[0], coordinate[1], 1) == color[1]) and (img.item(coordinate[0], coordinate[1], 2) == color[2]):

	#cntNum += 1

	if color == black:
	findNonTextColor = True
	textBorder = axis
	break
	else:
	cntWhite += 1
	if cntWhite == leng:
	textBorder = axis
	findNonTextColor = True
	break
	axis += 1

	cntWhite = 0

	return textBorder
	def splitWord(img, currentCol):
	height, width = img.shape[:2]
	black = [0, 0, 0]
	white = [255, 255, 255]

	line = currentCol + 1
	leftSide = findBorder(line, black, height, img)

	line = leftSide
	rightSide = findBorder(line, white,height, img)
	line = 0
	topSide = findBorder(line, black, width, img)

	line = topSide
	bottomSide = findBorder(line, white, width, img)

	charImg = img[topSide:bottomSide, leftSide:rightSide]
	return charImg, rightSide
	def rotateImage(image, angle):
	(h, w) = image.shape[:2]
	center = (w / 2, h / 2)


	M = cv2.getRotationMatrix2D(center, angle, 1.0)
	rotated = cv2.warpAffine(image, M, (w, h), borderMode = cv2.BORDER_CONSTANT, borderValue = (255, 255, 255))


	return rotated
	def makeInnerBorder(img, left, right, top, bottom, color):
	height, width = img.shape[:2]
	for i in range(height):
	for l in range(left):
	img[i, l] = color
	for r in range(right):
	img[i, r] = color
	for j in range(width):
	for t in range(top):
	img[t, j] = color
	for b in range(bottom):
	img[b, j] = color
	return img
	def verticalProjection(img):
	height, width = img.shape[:2]
	projDict = dict()
	for i in range(width):
	projDict[i] = 0
	for h in range(height):

	for w in range(width):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	projDict[w] += 1


	min_value = min(projDict.values())
	result = [key for key, value in projDict.items() if value == min_value]
	while min(result) < 45:
	projDict.pop(min(result))
	if len(projDict) != 0:
	min_value = min(projDict.values())
	result = [key for key, value in projDict.items() if value == min_value]
	if min(result) < 45:
	return (width - 1)
	else:
	return min(result)

	def findRectangleBorder(img):
	height, width = img.shape[:2]
	#find the left border
	leftBorder = 0
	for w in range(width):
	for h in range(height):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	leftBorder = w
	break
	rightBorder = width - 1

	#find the right border
	for w in range(width - 1, 1, -1):
	for h in range(height):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	rightBorder = w
	break
	topBorder = 0
	#find the top border
	for h in range(height):
	for w in range(width):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	topBorder = h
	break

	#find the bottom border
	bottomBorder = height - 1
	for h in range(height - 1, 1, -1):
	for w in range(width):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	bottomBorder = h
	break

	cutImg = img[bottomBorder:topBorder, rightBorder:leftBorder]
	return cutImg

	def elementaryRevise(img):
	img = findRectangleBorder(img)
	img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	img = cv2.resize(img, (60 ,80))
	return img

	def countRate(file):
	img = cv2.imread(file)
	height, width = img.shape[:2]
	count = 0
	for h in range(height):
	for w in range(width):
	if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
	count += 1
	if (count / img.size) < 0.05:
	os.remove(file)

	def segmentation(img):
	cntChar = 0
	currentCol = 0
	rotateImg = rotateImage(img, 180)
	#cv2.imwrite('rotate.png', rotateImg)
	rotateImg = makeInnerBorder(rotateImg, 1, 1, 1, 1, [255, 255, 255])


	right = findBorder(0, [0,0,0], rotateImg.shape[0], rotateImg)
	right = img.shape[1] - right + 2
	#print(right)
	while abs(currentCol - right) > 2:
	#print(currentCol)
	image, currentCol = splitWord(img, currentCol)
	image = cv2.copyMakeBorder(image, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	#image = elementaryRevise(image)
	if image.shape[1] < 200:
	image = elementaryRevise(image)
	cv2.imwrite('{cntChar}.png'.format(**locals()), image)
	cntChar += 1

	else:
	cntDict = dict()
	for i in range(1, 21):
	rotateChar = rotateImage(image, i)
	rotateChar = findRectangleBorder(rotateChar)
	#cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)

	cntDict[i] = verticalProjection(rotateChar)
	bestAngle = min(cntDict, key = cntDict.get)
	rotatedImg = rotateImage(image, bestAngle)
	rotatedImg = findRectangleBorder(rotatedImg)
	min_cut = cntDict[bestAngle]
	cv2.imwrite('rotated_{bestAngle}_degree.png'.format(**locals()), rotatedImg)
	new_cut = rotatedImg
	cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)

	#new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]

	new_cut = rotateImage(new_cut, 360 - bestAngle)
	new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	new_cut = elementaryRevise(new_cut)
	cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
	cntChar += 1
	rotatedImg = cv2.imread('rotated_{bestAngle}_degree.png'.format(**locals()))
	remainImg = rotatedImg
	cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
	#remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
	#plt.imshow(remainImg)
	#plt.show()

	#the real angle to rotate back need to estimate
	remainImg = rotateImage(remainImg, 360 - bestAngle)
	remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
	#deal the remaining character
	if remainImg.shape[1] < 200:

	remainImg = elementaryRevise(remainImg)
	cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
	cntChar += 1
	while remainImg.shape[1] > 200:
	#cv2.imwrite('remain.png', remainImg)
	#for i in range(1, 21):
	#rotateChar = rotateImage(remainImg, i)
	#rotateChar = findRectangleBorder(rotateChar)
	#cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)
	rotateChar = rotateImage(remainImg, bestAngle)
	rotateChar = findRectangleBorder(rotateChar)
	cv2.imwrite('rotated_{bestAngle}.png'.format(**locals()), rotateChar)
	cntDict[bestAngle] = verticalProjection(rotateChar)
	#bestAngle = min(cntDict, key = cntDict.get)
	#rotatedImg = rotateImage(image, bestAngle)
	#rotatedImg = findRectangleBorder(rotatedImg)
	min_cut = cntDict[bestAngle]
	new_cut = rotateChar
	cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)

	#new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]

	new_cut = rotateImage(new_cut, 360 - bestAngle)
	new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))

	new_cut = elementaryRevise(new_cut)
	cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
	cntChar += 1
	rotatedImg = cv2.imread('rotated_{bestAngle}.png'.format(**locals()))
	remainImg = rotatedImg
	#plt.imshow(remainImg)
	#plt.show()
	cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
	#remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
	#plt.imshow(remainImg)
	#plt.show()


	remainImg = rotateImage(remainImg, 360 - bestAngle)
	remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))

	if remainImg.shape[1] < 200:
	remainImg = elementaryRevise(remainImg)
	cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
	cntChar += 1


	#print('rotated_{bestAngle}_degree.png'.format(**locals()), bestAngle, min_cut)






	#find the least number of text color of column
	#if cntNum < textColorNum:
	# textColorNum = cntNum




	image = cv2.imread("perfect.png")

	image = cv2.copyMakeBorder(image, 30, 30, 30, 30,cv2.BORDER_CONSTANT, value = (255, 255, 255))
	segmentation(image)
	#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')



	for file in os.listdir(dir_path):
	if file.endswith('.png'):
	countRate(file)
	if len(file) == 5 and os.path.isfile(file) :
	shutil.move('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\' + file,'C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set\\' + file)

	dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')



	def trainData(img):
	conn = sqlite3.connect('dataset.sqlite')
	cur = conn.cursor()
	cur.execute('SELECT * FROM Data')
	min = float('inf')
	for row in cur:
	ablob = row[0]

	with open('test.png', 'wb') as output_file:
	output_file.write(ablob)
	dataImg = cv2.imread('test.png')
	err = mse(dataImg, img)
	if err < min:
	min = err
	char = row[1]

	return char

	def mse(img1, img2):
	err = np.sum((img1.astype('float') - img2.astype('float'))**2)
	err /= float(img1.shape[0] * img1.shape[1])
	return err

	for file in os.listdir(dir_path):
	imgFile = cv2.imread(dir_path + "\\" + file)
	plt.imshow(imgFile)
	plt.show()
	char = trainData(imgFile)
	print(char)
	judge = input()
	if judge == 'y':
	f = open(dir_path + "\\" + file, 'rb')
	ablob = f.read()
	conn = sqlite3.connect('dataset.sqlite')
	cur = conn.cursor()


	cur.execute('''
	INSERT INTO Data(img, label)
	VALUES(?, ?)''',(sqlite3.Binary(ablob), char.upper()))
	conn.commit()
	conn.close()
	f.close()

	for file in os.listdir(dir_path):
	if file.endswith('.png'):
	os.remove(dir_path + "\\" + file)



	def productTestData():
	for file in os.listdir(dir_path):
	imgFile = cv2.imread(dir_path + "\\" + file)
	plt.imshow(imgFile)
	plt.show()
	target = input("The target is ")
	if not target.isspace():
	f = open(dir_path + "\\" + file, 'rb')
	ablob = f.read()
	conn = sqlite3.connect('dataset.sqlite')
	cur = conn.cursor()


	cur.execute('''
	INSERT INTO Data(img, label)
	VALUES(?, ?)''',(sqlite3.Binary(ablob), target.upper()))
	conn.commit()
	conn.close()
	f.close()





























	#sm = browser.find_element_by_name('ibnSubmit').click()