deathCertReader

Sleeping

App Files Files Community

deathCertReader / app.py

Alealejandrooo

Fixing the dimensions in extract_detected_entries_pdl()

922a160 over 1 year ago

raw

history blame

10.1 kB

	import re
	import cv2
	import numpy as np
	from paddleocr import PaddleOCR
	from PIL import Image
	import matplotlib.pyplot as plt
	import pandas as pd
	import matplotlib.pyplot as plt
	import onnxruntime
	import gradio as gr

	# initialize the OCR
	ocr = PaddleOCR(lang='sl',
	enable_mkldnn=True,
	cls=False,
	show_log= False)

	# initialize the models
	model_deskew = onnxruntime.InferenceSession("./models/CNN_deskew_v0.0.2.onnx")
	model_denoise = onnxruntime.InferenceSession("./models/autoencoder_denoise_v0.0.2.onnx")

	##### All Functions #####

	def preprocess_image(image):
	'''
	Function: preprocess image to make it lighter to work on
	Input: resized image
	Output: image
	'''
	image = np.array(image)
	scale = 1.494
	width = int(image.shape[1] / scale)
	height = int(image.shape[0] / scale)
	dim = (width, height)
	image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
	return image


	def deskew(image, model):
	'''
	Function: deskew an image
	Input: takes an image as an array
	Output: deskewed image
	'''
	# map the model classes to the actual degree of skew
	map = { 0: '-1', 1: '-10', 2: '-11', 3: '-12', 4: '-13',
	5: '-14',6: '-15', 7: '-2', 8: '-3', 9: '-4',
	10: '-5',11: '-6',12: '-7', 13: '-8', 14: '-9',
	15: '0', 16: '1', 17: '10', 18: '11', 19: '12',
	20: '13',21: '14',22: '15', 23: '180',24: '2',
	25: '270',26: '3',27: '4', 28: '5', 29: '6',
	30: '7', 31: '8',32: '9', 33: '90'}

	image_d = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	width = int(image_d.shape[1] * 0.2)
	height = int(image_d.shape[0] * 0.2)
	dim = (width, height)
	# resize image
	res = cv2.resize(image_d, dim, interpolation = cv2.INTER_AREA)
	resized = cv2.resize(res, (200, 200))
	# add two dimensions to feed to the model
	resized = resized.astype('float32').reshape(1, 200, 200 ,1)
	# normalize
	resized = resized/255
	# predictions
	predictions = model.run(None, {'conv2d_input': resized})
	# best prediction
	pred = predictions[0].argmax()
	# angle of skew
	angle = int(map[pred])
	skew_confidence = predictions[0][0][pred] * 100
	# deskew original image
	if angle == 90:
	deskewed_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
	return deskewed_image, angle, skew_confidence
	if angle == 270:
	deskewed_image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
	return deskewed_image, angle, skew_confidence

	(h, w) = image.shape[:2]
	center = (w // 2, h // 2)
	M = cv2.getRotationMatrix2D(center, -angle, 1.0)
	deskewed_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC,
	borderMode=cv2.BORDER_REPLICATE)
	return deskewed_image, angle, skew_confidence


	def prepare_image_to_autoencoder(image):
	'''
	Function: prepare the image to be passed to the autoencoder.
	Input: image (_type_): deskewed image
	Output: resized image to be passed to the autoencoder
	'''
	height, width = image.shape[:2]
	target_height = 600
	target_width = 600
	image = image[int(height/3.6): int(height/1.87), int(width/3.67): int(width/1.575)]
	# reshape image to fixed size
	image = cv2.resize(image, (target_width, target_height))
	image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# normalize images
	image = image / 255.0
	# reshape to pass image to autoencoder
	image = image.reshape(target_height, target_width, 1)
	return image


	def autoencode_ONNX(image, model):
	'''
	Function: remove noise from image
	Input: image and autoencoder model
	Output: image
	'''
	image = image.astype(np.float32).reshape(1, 600, 600, 1)
	image = model.run(None, {'input_2': image})
	image = image[0]
	image = image.squeeze()
	image = image * 255
	image = image.astype('uint8')
	return image

	def extract_detected_entries_pdl(image):
	"""
	Extracts text, scores, and boundary boxes from an image using OCR and returns a DataFrame.

	This function takes an input image, applies OCR to detect text in the image, and then extracts
	the detected text, confidence scores, and boundary boxes for each text entry. The extracted
	information is returned in a DataFrame with columns "Text", "Score", and "Boundary Box".

	Parameters
	----------
	image : numpy.ndarray
	The input image to be processed.

	Returns
	-------
	pandas.DataFrame
	A DataFrame containing the extracted text, confidence scores, and boundary boxes
	for each detected text entry. The DataFrame has the following columns:
	- "Text": The detected text.
	- "Score": The confidence score for the detected text.
	- "Boundary Box": The coordinates of the boundary box for the detected text.
	"""
	# run the OCR
	result = ocr.ocr(image)
	# creates Pandas Dataframe
	txt = []
	scores = []
	boxes = []
	for r in result[0]:
	txt.append(cleanString_basic(r[-1][0]))
	scores.append(r[-1][1])
	boxes.append(tuple(map(tuple, r[0])))

	return pd.DataFrame({"Text": txt, "Score": scores, "Boundary Box": boxes})

	def cleanString_basic(word):
	word = word.replace("$", "s")
	return word

	def clean_string_start(string: 'str'):

	names_flags = "√"
	chars_to_remove = ['!', "'", '[', ']', '*', '\|', '.', ':', '\\', '/']
	if string.startswith(tuple(chars_to_remove)):
	names_flags = string[0]
	string = string[1:]
	return string, names_flags

	def clean_string_end(string: 'str'):

	names_flags = "√"
	chars_to_remove = ['!', "'", '[', ']', '*', '\|', '.', ':', '\\', '/']
	if string.endswith(tuple(chars_to_remove)):
	names_flags = string[-1]
	string = string[:-1]
	return string, names_flags

	def clean_dates(date: 'str'):
	'''
	Function: cleans the fields "datum smrti" and returns the char removed.
	Input: date (string format)
	Output: cleaned frame
	'''

	date_flags = "Y"
	# finds special characters in the string
	special_char = re.findall(r'[a-zA-Z!\[\\|]', date)
	if len(special_char) > 0:
	date_flags = special_char
	# remove special characters in the string
	string = re.sub(r'[a-zA-Z!\[\\|]', '', date)
	return string, date_flags


	##### Main Function #####

	def pdf_extract_gr(image):
	extractimg = preprocess_image(image)
	#extractimg = np.array(image)
	# deskew the image
	deskewed_image, angle, skew_confidence = deskew(extractimg, model_deskew)
	# prepare the image for the autoencoder
	cleanimg = prepare_image_to_autoencoder(deskewed_image)
	# clean the image
	img = autoencode_ONNX(cleanimg, model_denoise)
	# extract the entries from the image
	df = extract_detected_entries_pdl(img)
	# first name
	firstnamerow = df.iloc[0]
	firstname = firstnamerow[0]
	firstnameconfidence = round(float(firstnamerow[1]) * 100,3)
	firstnameconfidence = f"{firstnameconfidence}%"
	# surname
	surnamerow = df.iloc[1]
	surname = surnamerow[0]
	surnameconfidence = round(float(surnamerow[1]) * 100,3)
	surnameconfidence = f"{surnameconfidence}%"
	# death date condifence
	dodrow = df.iloc[2]
	dodname = dodrow[0]
	dodconfidence = round(float(dodrow[1]) * 100,3)
	dodconfidence = f"{dodconfidence}%"
	# return all the results
	return df, deskewed_image, angle, skew_confidence, img, firstname, firstnameconfidence, surname, surnameconfidence, dodname, dodconfidence


	##### Gradio Style #####

	css = """
	.run_container {
	display: flex;
	flex-direction: column;
	align-items: center;
	gap: 10px;
	}
	.run_btn {
	margin: auto;
	width: 50%;
	display: flex;
	}
	.upload_cell {
	margin: auto;
	display: flex;
	}
	.results_container {
	display: flex;
	justify-content: space-evenly;
	}
	.results_cell {
	}
	"""

	##### Gradio Blocks #####

	with gr.Blocks(css = css) as demo:
	gr.Markdown("""
	# Death Certificate Extraction
	""", elem_classes = "h1")
	gr.Markdown("Upload a PDF, extract data")
	with gr.Box(elem_classes = "run_container"):
	# ExtractInput = gr.File(label = "Death Certificate", elem_classes="upload_cell")
	ExtractButton = gr.Button(label = "Extract", elem_classes="run_btn")
	with gr.Row(elem_id = "hide"):
	with gr.Column():
	ExtractInput = gr.Image()
	with gr.Column():
	# ExtractResult = gr.Image(label = "result")
	with gr.Row(elem_classes = "results_container"):
	FirstNameBox = gr.Textbox(label = "First Name", elem_classes = "results_cell")
	FirstNameConfidenceBox = gr.Textbox(label = "First Name Confidence", elem_classes = "results_cell")
	with gr.Row(elem_classes = "results_container"):
	SurnameNameBox = gr.Textbox(label = "Surname", elem_classes = "results_cell")
	SurnameNameConfidenceBox = gr.Textbox(label = "Surname Confidence", elem_classes = "results_cell")
	with gr.Row(elem_classes = "results_container"):
	DODBox = gr.Textbox(label = "Date of Death", elem_classes = "results_cell")
	DODConfidenceBox = gr.Textbox(label = "Date of Death Confidence", elem_classes = "results_cell")

	with gr.Accordion("Full Results", open = False):
	ExtractDF = gr.Dataframe(label = "Results")

	with gr.Accordion("Clean Image", open = False):
	CleanOutput = gr.Image()

	with gr.Accordion("Deskew", open = False):
	DeskewOutput = gr.Image()
	with gr.Column():
	DeskewAngle = gr.Number(label = "Angle")
	with gr.Column():
	DeskewConfidence = gr.Number(label = "Confidence")

	ExtractButton.click(fn=pdf_extract_gr,
	inputs = ExtractInput,
	outputs = [ExtractDF, DeskewOutput, DeskewAngle,
	DeskewConfidence, CleanOutput, FirstNameBox,
	FirstNameConfidenceBox, SurnameNameBox,
	SurnameNameConfidenceBox, DODBox, DODConfidenceBox])

	demo.launch(show_api=True, share=False, debug=True)