PYTHON PROJECT
CODE :
i# Import required packages
import cv2
import pytesseract
# Mention the installed location of Tesseract-OCR in your system
[Link].tesseract_cmd = '/opt/homebrew/bin/tesseract'
# Read image from which text needs to be extracted
img = [Link]("[Link]")
# Preprocessing the image starts
# Convert the image to gray scale
gray = [Link](img, cv2.COLOR_BGR2GRAY)
# Performing OTSU threshold
ret, thresh1 = [Link](gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
# Specify structure shape and kernel size.
# Kernel size increases or decreases the area
# of the rectangle to be detected.
# A smaller value like (10, 10) will detect
# each word instead of a sentence.
rect_kernel = [Link](cv2.MORPH_RECT, (18, 18))
# Applying dilation on the threshold image
dilation = [Link](thresh1, rect_kernel, iterations = 1)
# Finding contours
contours, hierarchy = cv2. ndContours(dilation, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
# Creating a copy of image
im2 = [Link]()
# A text le is created and ushed
le = open("[Link]", "w+")
[Link]("")
[Link]()
# Looping through the identi ed contours
# Then rectangular part is cropped and passed on
# to pytesseract for extracting text from it
# Extracted text is then written into the text le
for cnt in contours:
x, y, w, h = [Link](cnt)
# Drawing a rectangle on copied image
rect = [Link](im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Cropping the text block for giving input to OCR
cropped = im2[y:y + h, x:x + w]
# Open the le in append mode
fi
fi
fi
fi
fi
fi
fl
fi
fi
le = open("[Link]", "a")
# Apply OCR on the cropped image
text = pytesseract.image_to_string(cropped)
# Appending the text into le
[Link](text)
[Link]("\n")
# Close the le
[Link]
OUTPUT :
IMAGE FILE :
RECOGNIZED TEXT :
fi
fi
fi
fi
fi
fi