Skip to content

Instantly share code, notes, and snippets.

@titaneric
Last active July 1, 2019 06:33
Show Gist options
  • Save titaneric/6a3e3991521e81e45772559b76c32e3a to your computer and use it in GitHub Desktop.
Save titaneric/6a3e3991521e81e45772559b76c32e3a to your computer and use it in GitHub Desktop.
OCR.py
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import urllib.request
from PIL import Image
from selenium.webdriver.support.select import Select
from PIL import ImageEnhance
import matplotlib.pyplot as plt
import cv2
import numpy as np
import urllib.request
from lxml import etree
import os
import shutil
import sqlite3
## get the image source
#img = driver.find_element_by_xpath('/html/body/img')
dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')
for file in os.listdir(dir_path):
if file.endswith('.png'):
os.remove(dir_path + "\\" + file)
#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')
#for file in os.listdir(dir_path):
# if file.endswith('.png'):
# os.remove(dir_path + "\\" + file)
src = 'https://isdna1.yzu.edu.tw/CnStdSel/SelRandomImage.aspx'
#download the image
urllib.request.urlretrieve(src, "captcha.png")
#driver.close()
Im = Image.open('captcha.png')
width, height = Im.size
quartersizedIm = Im.resize((int(width * 6), int(height * 6)))
quartersizedIm.save('foursized.png')
im = Image.open('foursized.png')
xsize, ysize = im.size
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(3.0)
enhancer = ImageEnhance.Brightness(im)
im = enhancer.enhance(10.0)
xsize, ysize = im.size
im.save('enhance.png')
pix = im.load()
for y in range(0, ysize):
for x in range(0, xsize):
if pix[x , y][1] != 255 and pix[x , y][2] == 255 and pix[x , y][3] == 255:
pix[x , y] = (0 , 0 , 0 , 255)
else:
pix[x , y] = (255 , 255 , 255 , 255)
im.save('new.png')
kernel = np.ones((1, 1), np.uint16)
image = cv2.imread('new.png')
erosion = cv2.erode(image, kernel, iterations = 1)
blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
edged = cv2.Canny(blurred, 30, 150)
plt.imshow(edged)
#plt.show()
dilation = cv2.dilate(edged, kernel, iterations = 1)
plt.imshow(dilation)
#plt.show()
cv2.imwrite('process.png', dilation)
img1 = cv2.imread('new.png')
img2 = cv2.imread('process.png')
img1 = cv2.bitwise_not(img1)
final = cv2.add(img1, img2)
#erosion = cv2.erode(final, kernel, iterations = 1)
#blurred = cv2.GaussianBlur(erosion, (5, 5), 0)
final = cv2.bitwise_not(final)
plt.imshow(final)
#plt.show()
#cv2.imwrite('final.png', final)
gray = cv2.cvtColor(final,cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
res = cv2.morphologyEx(gray, cv2.MORPH_OPEN, kernel)
plt.imshow(res)
#plt.show()
cv2.imwrite('perfect.png', res)
def mse(imgA, imgB):
err = np.sum(imgA.astype("float") - imgB.astype("float") ** 2)
err /= float(imgA.shape[0] * imgA.shape[1])
return err
def findBorder(axis, color, leng, img):
findNonTextColor = False
black = [0, 0, 0]
white = [255, 255, 255]
textBorder = axis + 1
textColorNum = leng
cntWhite = 0
while (not findNonTextColor):
#cntNum = 0
#grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
for i in range(leng):#decideRange[0], decideRange[1]):
if leng == img.shape[0]:
coordinate = i, axis
else:
coordinate = axis, i
# if column of image is black
if (img.item(coordinate[0], coordinate[1], 0) == color[0]) and (img.item(coordinate[0], coordinate[1], 1) == color[1]) and (img.item(coordinate[0], coordinate[1], 2) == color[2]):
#cntNum += 1
if color == black:
findNonTextColor = True
textBorder = axis
break
else:
cntWhite += 1
if cntWhite == leng:
textBorder = axis
findNonTextColor = True
break
axis += 1
cntWhite = 0
return textBorder
def splitWord(img, currentCol):
height, width = img.shape[:2]
black = [0, 0, 0]
white = [255, 255, 255]
line = currentCol + 1
leftSide = findBorder(line, black, height, img)
line = leftSide
rightSide = findBorder(line, white,height, img)
line = 0
topSide = findBorder(line, black, width, img)
line = topSide
bottomSide = findBorder(line, white, width, img)
charImg = img[topSide:bottomSide, leftSide:rightSide]
return charImg, rightSide
def rotateImage(image, angle):
(h, w) = image.shape[:2]
center = (w / 2, h / 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), borderMode = cv2.BORDER_CONSTANT, borderValue = (255, 255, 255))
return rotated
def makeInnerBorder(img, left, right, top, bottom, color):
height, width = img.shape[:2]
for i in range(height):
for l in range(left):
img[i, l] = color
for r in range(right):
img[i, r] = color
for j in range(width):
for t in range(top):
img[t, j] = color
for b in range(bottom):
img[b, j] = color
return img
def verticalProjection(img):
height, width = img.shape[:2]
projDict = dict()
for i in range(width):
projDict[i] = 0
for h in range(height):
for w in range(width):
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
projDict[w] += 1
min_value = min(projDict.values())
result = [key for key, value in projDict.items() if value == min_value]
while min(result) < 45:
projDict.pop(min(result))
if len(projDict) != 0:
min_value = min(projDict.values())
result = [key for key, value in projDict.items() if value == min_value]
if min(result) < 45:
return (width - 1)
else:
return min(result)
def findRectangleBorder(img):
height, width = img.shape[:2]
#find the left border
leftBorder = 0
for w in range(width):
for h in range(height):
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
leftBorder = w
break
rightBorder = width - 1
#find the right border
for w in range(width - 1, 1, -1):
for h in range(height):
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
rightBorder = w
break
topBorder = 0
#find the top border
for h in range(height):
for w in range(width):
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
topBorder = h
break
#find the bottom border
bottomBorder = height - 1
for h in range(height - 1, 1, -1):
for w in range(width):
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
bottomBorder = h
break
cutImg = img[bottomBorder:topBorder, rightBorder:leftBorder]
return cutImg
def elementaryRevise(img):
img = findRectangleBorder(img)
img = cv2.copyMakeBorder(img, 5, 5, 5, 5, cv2.BORDER_CONSTANT, value = (255, 255, 255))
img = cv2.resize(img, (60 ,80))
return img
def countRate(file):
img = cv2.imread(file)
height, width = img.shape[:2]
count = 0
for h in range(height):
for w in range(width):
if img.item(h, w, 0) == 0 and img.item(h, w, 1) == 0 and img.item(h, w, 2) == 0:
count += 1
if (count / img.size) < 0.05:
os.remove(file)
def segmentation(img):
cntChar = 0
currentCol = 0
rotateImg = rotateImage(img, 180)
#cv2.imwrite('rotate.png', rotateImg)
rotateImg = makeInnerBorder(rotateImg, 1, 1, 1, 1, [255, 255, 255])
right = findBorder(0, [0,0,0], rotateImg.shape[0], rotateImg)
right = img.shape[1] - right + 2
#print(right)
while abs(currentCol - right) > 2:
#print(currentCol)
image, currentCol = splitWord(img, currentCol)
image = cv2.copyMakeBorder(image, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
#image = elementaryRevise(image)
if image.shape[1] < 200:
image = elementaryRevise(image)
cv2.imwrite('{cntChar}.png'.format(**locals()), image)
cntChar += 1
else:
cntDict = dict()
for i in range(1, 21):
rotateChar = rotateImage(image, i)
rotateChar = findRectangleBorder(rotateChar)
#cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)
cntDict[i] = verticalProjection(rotateChar)
bestAngle = min(cntDict, key = cntDict.get)
rotatedImg = rotateImage(image, bestAngle)
rotatedImg = findRectangleBorder(rotatedImg)
min_cut = cntDict[bestAngle]
cv2.imwrite('rotated_{bestAngle}_degree.png'.format(**locals()), rotatedImg)
new_cut = rotatedImg
cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)
#new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]
new_cut = rotateImage(new_cut, 360 - bestAngle)
new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
new_cut = elementaryRevise(new_cut)
cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
cntChar += 1
rotatedImg = cv2.imread('rotated_{bestAngle}_degree.png'.format(**locals()))
remainImg = rotatedImg
cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
#remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
#plt.imshow(remainImg)
#plt.show()
#the real angle to rotate back need to estimate
remainImg = rotateImage(remainImg, 360 - bestAngle)
remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
#deal the remaining character
if remainImg.shape[1] < 200:
remainImg = elementaryRevise(remainImg)
cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
cntChar += 1
while remainImg.shape[1] > 200:
#cv2.imwrite('remain.png', remainImg)
#for i in range(1, 21):
#rotateChar = rotateImage(remainImg, i)
#rotateChar = findRectangleBorder(rotateChar)
#cv2.imwrite('revised{i}.png'.format(**locals()), rotateChar)
rotateChar = rotateImage(remainImg, bestAngle)
rotateChar = findRectangleBorder(rotateChar)
cv2.imwrite('rotated_{bestAngle}.png'.format(**locals()), rotateChar)
cntDict[bestAngle] = verticalProjection(rotateChar)
#bestAngle = min(cntDict, key = cntDict.get)
#rotatedImg = rotateImage(image, bestAngle)
#rotatedImg = findRectangleBorder(rotatedImg)
min_cut = cntDict[bestAngle]
new_cut = rotateChar
cv2.rectangle(new_cut, (min_cut + 1, 0), (new_cut.shape[1], new_cut.shape[0]), (255, 255, 255), -1)
#new_cut = rotatedImg[0:rotatedImg.shape[0], 0:min_cut]
new_cut = rotateImage(new_cut, 360 - bestAngle)
new_cut = cv2.copyMakeBorder(new_cut, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
new_cut = elementaryRevise(new_cut)
cv2.imwrite('{cntChar}.png'.format(**locals()), new_cut)
cntChar += 1
rotatedImg = cv2.imread('rotated_{bestAngle}.png'.format(**locals()))
remainImg = rotatedImg
#plt.imshow(remainImg)
#plt.show()
cv2.rectangle(remainImg, (0, 0), (min_cut, remainImg.shape[0]), (255, 255, 255), -1)
#remainImg = rotatedImg[0:rotatedImg.shape[0], (min_cut + 1):rotatedImg.shape[1]]
#plt.imshow(remainImg)
#plt.show()
remainImg = rotateImage(remainImg, 360 - bestAngle)
remainImg = cv2.copyMakeBorder(remainImg, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
if remainImg.shape[1] < 200:
remainImg = elementaryRevise(remainImg)
cv2.imwrite('{cntChar}.png'.format(**locals()), remainImg)
cntChar += 1
#print('rotated_{bestAngle}_degree.png'.format(**locals()), bestAngle, min_cut)
#find the least number of text color of column
#if cntNum < textColorNum:
# textColorNum = cntNum
image = cv2.imread("perfect.png")
image = cv2.copyMakeBorder(image, 30, 30, 30, 30,cv2.BORDER_CONSTANT, value = (255, 255, 255))
segmentation(image)
#dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR')
for file in os.listdir(dir_path):
if file.endswith('.png'):
countRate(file)
if len(file) == 5 and os.path.isfile(file) :
shutil.move('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\' + file,'C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set\\' + file)
dir_path = os.path.realpath('C:\\Users\\cheny_000\\Documents\\Visual Studio 2015\\Projects\\OCR\\OCR\\data_set')
def trainData(img):
conn = sqlite3.connect('dataset.sqlite')
cur = conn.cursor()
cur.execute('SELECT * FROM Data')
min = float('inf')
for row in cur:
ablob = row[0]
with open('test.png', 'wb') as output_file:
output_file.write(ablob)
dataImg = cv2.imread('test.png')
err = mse(dataImg, img)
if err < min:
min = err
char = row[1]
return char
def mse(img1, img2):
err = np.sum((img1.astype('float') - img2.astype('float'))**2)
err /= float(img1.shape[0] * img1.shape[1])
return err
for file in os.listdir(dir_path):
imgFile = cv2.imread(dir_path + "\\" + file)
plt.imshow(imgFile)
plt.show()
char = trainData(imgFile)
print(char)
judge = input()
if judge == 'y':
f = open(dir_path + "\\" + file, 'rb')
ablob = f.read()
conn = sqlite3.connect('dataset.sqlite')
cur = conn.cursor()
cur.execute('''
INSERT INTO Data(img, label)
VALUES(?, ?)''',(sqlite3.Binary(ablob), char.upper()))
conn.commit()
conn.close()
f.close()
for file in os.listdir(dir_path):
if file.endswith('.png'):
os.remove(dir_path + "\\" + file)
def productTestData():
for file in os.listdir(dir_path):
imgFile = cv2.imread(dir_path + "\\" + file)
plt.imshow(imgFile)
plt.show()
target = input("The target is ")
if not target.isspace():
f = open(dir_path + "\\" + file, 'rb')
ablob = f.read()
conn = sqlite3.connect('dataset.sqlite')
cur = conn.cursor()
cur.execute('''
INSERT INTO Data(img, label)
VALUES(?, ?)''',(sqlite3.Binary(ablob), target.upper()))
conn.commit()
conn.close()
f.close()
#sm = browser.find_element_by_name('ibnSubmit').click()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment