unstructured-chipper-app-v3

Running

App Files Files Community

ajimeno commited on Sep 6, 2023

Commit

ca53d7c

•

1 Parent(s): 7a740e0

First commit

Browse files

Files changed (5) hide show

README.md +7 -6
app.py +92 -0
document.png +0 -0
requirements.txt +7 -0
rsz_unstructured_logo.png +0 -0

README.md CHANGED Viewed

@@ -1,13 +1,14 @@
 ---
-title: Unstructured Chipper App
-emoji: 📉
 colorFrom: green
-colorTo: purple
 sdk: streamlit
-sdk_version: 1.26.0
 app_file: app.py
 pinned: false
-license: cc-by-nc-4.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Ved Fine Tuned
+emoji: 🦀
 colorFrom: green
+colorTo: blue
 sdk: streamlit
+sdk_version: 1.19.0
 app_file: app.py
 pinned: false
+license: other
+duplicated_from: unstructuredio/ved-pre-trained
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import numpy as np
+import torch
+from torch import nn
+import streamlit as st
+import os
+from PIL import Image
+from io import BytesIO
+from transformers import VisionEncoderDecoderModel, VisionEncoderDecoderConfig, DonutProcessor, DonutImageProcessor, AutoTokenizer
+def run_prediction(sample, model, processor):
+    pixel_values = processor(np.array(
+                    sample,
+                    np.float32,
+                ), return_tensors="pt").pixel_values
+    with torch.no_grad():
+        outputs = model.generate(
+            pixel_values.to(device),
+            decoder_input_ids=processor.tokenizer("<s><s_plain>", add_special_tokens=False, return_tensors="pt").input_ids.to(device),
+            do_sample=True,
+            top_p=0.92,
+            top_k=5,
+            no_repeat_ngram_size=10,
+            num_beams=3
+        )
+    # process output
+    prediction = processor.batch_decode(outputs)[0]
+    print(prediction)
+    return prediction
+logo = Image.open("./rsz_unstructured_logo.png")
+st.image(logo)
+st.markdown('''
+### Chipper
+Chipper is an OCR-free Document Understanding Transformer. It was pre-trained with over 1M documents from public sources and fine-tuned on a large range of documents.
+At [Unstructured.io](https://github.com/Unstructured-IO/unstructured) we are on a mission to build custom preprocessing pipelines for labeling, training, or production ML-ready pipelines.
+Come and join us in our public repos and contribute! Each of your contributions and feedback holds great value and is very significant to the community.
+''')
+image_upload = None
+photo = None
+with st.sidebar:
+    # file upload
+    uploaded_file = st.file_uploader("Upload a document")
+    if uploaded_file is not None:
+        # To read file as bytes:
+        image_bytes_data = uploaded_file.getvalue()
+        image_upload = Image.open(BytesIO(image_bytes_data))
+if image_upload:
+    image = image_upload
+else:
+    image = Image.open(f"./document.png")
+st.image(image, caption='Your target document')
+with st.spinner(f'Processing the document ...'):
+        pre_trained_model = "unstructuredio/chipper-fast-fine-tuning"
+        processor = DonutProcessor.from_pretrained(pre_trained_model)
+        model = VisionEncoderDecoderModel.from_pretrained(pre_trained_model)
+        from huggingface_hub import hf_hub_download
+        lm_head_file = hf_hub_download(
+            repo_id=pre_trained_model, filename="lm_head.pth"
+        )
+        rank = 128
+        model.decoder.lm_head = nn.Sequential(
+            nn.Linear(model.decoder.lm_head.weight.shape[1], rank, bias=False),
+            nn.Linear(rank, rank, bias=False),
+            nn.Linear(rank, model.decoder.lm_head.weight.shape[0], bias=True),
+        )
+        model.decoder.lm_head.load_state_dict(torch.load(lm_head_file))
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        model.eval()
+        model.to(device)
+st.info(f'Parsing document')
+parsed_info = run_prediction(image.convert("RGB"), model, processor)
+st.text(f'\nDocument:')
+st.text_area('Output text', value=parsed_info, height=800)

document.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+altair<5
+huggingface_hub
+numpy
+opencv-python
+streamlit
+torch==1.13.1
+transformers

rsz_unstructured_logo.png ADDED Viewed