-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathOCR.py
106 lines (85 loc) · 3.46 KB
/
OCR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import cv2
import numpy as np
import pytesseract
import time
def order_points(pts):
"""Order the points of a contour into a top-left, top-right, bottom-right, bottom-left format."""
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)] # Top-left
rect[2] = pts[np.argmax(s)] # Bottom-right
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)] # Top-right
rect[3] = pts[np.argmax(diff)] # Bottom-left
return rect
def four_point_transform(image, pts):
"""Perform a perspective transform to obtain a top-down view of the Sudoku grid."""
rect = order_points(pts)
(tl, tr, br, bl) = rect
# Compute the width and height of the new image
widthA = np.linalg.norm(br - bl)
widthB = np.linalg.norm(tr - tl)
maxWidth = max(int(widthA), int(widthB))
heightA = np.linalg.norm(tr - br)
heightB = np.linalg.norm(tl - bl)
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
def scan_puzzle(image_path="puzzle.jpg"):
"""
Scans the Sudoku puzzle from the given image and returns a 9x9 numpy array
of the puzzle, with 0 for empty cells.
"""
# Load the image
image = cv2.imread(image_path)
if image is None:
raise FileNotFoundError(f"Could not load image from {image_path}")
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Blur and threshold
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
thresh = cv2.bitwise_not(thresh)
# Find contours to detect the largest square (the Sudoku grid)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if not contours:
raise ValueError("No contours found. Check the image quality.")
# Assume largest contour is the Sudoku puzzle
contour = max(contours, key=cv2.contourArea)
# Approximate the contour to a polygon
peri = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
if len(approx) != 4:
raise ValueError("Sudoku grid not detected as a 4-sided polygon.")
# Warp perspective to get a top-down view of the puzzle
warped = four_point_transform(image, approx.reshape(4, 2))
# The warped image should now contain a clean, top-down Sudoku grid
# Split into 9x9 cells
puzzle = np.zeros((9, 9), dtype=int)
warped_gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
# Dimensions of each cell
h, w = warped_gray.shape
cell_height = h // 9
cell_width = w // 9
for row in range(9):
for col in range(9):
x_start = col * cell_width
y_start = row * cell_height
cell = warped[y_start:y_start+cell_height, x_start:x_start+cell_width]
digit = extract_digit(cell)
puzzle[row, col] = digit
return puzzle
if __name__ == "__main__":
# Example usage:
# Ensure you have a "puzzle.jpg" image in the same directory:
# The image should contain a sudoku puzzle.
puzzle_matrix = scan_puzzle("sudoku_puzzle1.jpg")
print("Scanned Puzzle:")
print(puzzle_matrix)