Alexander Slessor
commited on
Commit
•
01a1c19
1
Parent(s):
7086666
updated readme
Browse files- README.md +8 -2
- handler.py +11 -12
README.md
CHANGED
@@ -1,7 +1,13 @@
|
|
1 |
---
|
2 |
language: en
|
3 |
license: cc-by-nc-sa-4.0
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
---
|
6 |
|
7 |
# LayoutLMv2
|
@@ -24,7 +30,7 @@ Examples & Guides
|
|
24 |
- https://mccormickml.com/2020/03/10/question-answering-with-a-fine-tuned-BERT/
|
25 |
|
26 |
|
27 |
-
#
|
28 |
|
29 |
```
|
30 |
The class LayoutLMv2FeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use LayoutLMv2ImageProcessor instead.
|
|
|
1 |
---
|
2 |
language: en
|
3 |
license: cc-by-nc-sa-4.0
|
4 |
+
tags:
|
5 |
+
- endpoints-template
|
6 |
+
library_name: generic
|
7 |
+
model-index:
|
8 |
+
- name: layoutlmv2-base-uncased
|
9 |
+
results: []
|
10 |
+
pipeline_tag: other
|
11 |
---
|
12 |
|
13 |
# LayoutLMv2
|
|
|
30 |
- https://mccormickml.com/2020/03/10/question-answering-with-a-fine-tuned-BERT/
|
31 |
|
32 |
|
33 |
+
# Warnings
|
34 |
|
35 |
```
|
36 |
The class LayoutLMv2FeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use LayoutLMv2ImageProcessor instead.
|
handler.py
CHANGED
@@ -8,17 +8,17 @@ from transformers import LayoutLMv2TokenizerFast
|
|
8 |
from transformers.tokenization_utils_base import BatchEncoding
|
9 |
from transformers.tokenization_utils_base import TruncationStrategy
|
10 |
from transformers.utils import TensorType
|
11 |
-
from transformers.modeling_outputs import (
|
12 |
-
QuestionAnsweringModelOutput as QuestionAnsweringModelOutputBase
|
13 |
-
)
|
14 |
import numpy as np
|
15 |
-
from PIL import Image, ImageDraw, ImageFont
|
16 |
-
from subprocess import run
|
17 |
import pdf2image
|
18 |
-
from pprint import pprint
|
19 |
import logging
|
20 |
from os import environ
|
21 |
-
from dataclasses import dataclass
|
22 |
|
23 |
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
24 |
# install tesseract-ocr and pytesseract
|
@@ -163,9 +163,8 @@ class EndpointHandler:
|
|
163 |
includes the deserialized image file as PIL.Image
|
164 |
"""
|
165 |
image = data.pop("inputs", data)
|
166 |
-
|
167 |
-
# image = pdf_to_image(image)
|
168 |
images = [x.convert("RGB") for x in pdf2image.convert_from_bytes(image)]
|
|
|
169 |
question = "what is the bill date"
|
170 |
with torch.no_grad():
|
171 |
for image in images:
|
@@ -207,9 +206,9 @@ class EndpointHandler:
|
|
207 |
target_start_index = torch.tensor([7])
|
208 |
target_end_index = torch.tensor([14])
|
209 |
outputs = self.model(**encoding, start_positions=target_start_index, end_positions=target_end_index)
|
210 |
-
predicted_answer_span_start = outputs.start_logits.argmax(-1).item()
|
211 |
-
predicted_answer_span_end = outputs.end_logits.argmax(-1).item()
|
212 |
-
|
213 |
logger.info(f'''
|
214 |
START
|
215 |
predicted_start_idx: {predicted_start_idx}
|
|
|
8 |
from transformers.tokenization_utils_base import BatchEncoding
|
9 |
from transformers.tokenization_utils_base import TruncationStrategy
|
10 |
from transformers.utils import TensorType
|
11 |
+
# from transformers.modeling_outputs import (
|
12 |
+
# QuestionAnsweringModelOutput as QuestionAnsweringModelOutputBase
|
13 |
+
# )
|
14 |
import numpy as np
|
15 |
+
# from PIL import Image, ImageDraw, ImageFont
|
16 |
+
# from subprocess import run
|
17 |
import pdf2image
|
18 |
+
# from pprint import pprint
|
19 |
import logging
|
20 |
from os import environ
|
21 |
+
# from dataclasses import dataclass
|
22 |
|
23 |
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
24 |
# install tesseract-ocr and pytesseract
|
|
|
163 |
includes the deserialized image file as PIL.Image
|
164 |
"""
|
165 |
image = data.pop("inputs", data)
|
|
|
|
|
166 |
images = [x.convert("RGB") for x in pdf2image.convert_from_bytes(image)]
|
167 |
+
|
168 |
question = "what is the bill date"
|
169 |
with torch.no_grad():
|
170 |
for image in images:
|
|
|
206 |
target_start_index = torch.tensor([7])
|
207 |
target_end_index = torch.tensor([14])
|
208 |
outputs = self.model(**encoding, start_positions=target_start_index, end_positions=target_end_index)
|
209 |
+
# predicted_answer_span_start = outputs.start_logits.argmax(-1).item()
|
210 |
+
# predicted_answer_span_end = outputs.end_logits.argmax(-1).item()
|
211 |
+
|
212 |
logger.info(f'''
|
213 |
START
|
214 |
predicted_start_idx: {predicted_start_idx}
|