Spaces:
Sleeping
Sleeping
srijaydeshpande
commited on
Commit
•
33ac67b
1
Parent(s):
0ff9f5d
Update app.py
Browse files
app.py
CHANGED
@@ -84,6 +84,10 @@ def txt_to_html(text):
|
|
84 |
@spaces.GPU(duration=80)
|
85 |
def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
|
86 |
|
|
|
|
|
|
|
|
|
87 |
llm = Llama(
|
88 |
model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
|
89 |
flash_attn=True,
|
@@ -107,11 +111,19 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
|
|
107 |
)
|
108 |
output = output['choices'][0]['message']['content']
|
109 |
|
|
|
|
|
|
|
|
|
110 |
# Remove starting header string in output
|
111 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
112 |
if find_index != -1:
|
113 |
output = output[find_index:].strip()
|
114 |
|
|
|
|
|
|
|
|
|
115 |
# print('---------------Remove Dates-----------------------')
|
116 |
# print(output)
|
117 |
|
@@ -130,11 +142,19 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
|
|
130 |
)
|
131 |
output = output['choices'][0]['message']['content']
|
132 |
|
|
|
|
|
|
|
|
|
133 |
# Remove starting header string in output
|
134 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
135 |
if find_index != -1:
|
136 |
output = output[find_index:].strip()
|
137 |
|
|
|
|
|
|
|
|
|
138 |
# print('---------------Remove Addresses-----------------------')
|
139 |
# print(output)
|
140 |
|
@@ -153,11 +173,19 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
|
|
153 |
)
|
154 |
output = output['choices'][0]['message']['content']
|
155 |
|
|
|
|
|
|
|
|
|
156 |
# Remove starting header string in output
|
157 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
158 |
if find_index != -1:
|
159 |
output = output[find_index:].strip()
|
160 |
|
|
|
|
|
|
|
|
|
161 |
# print('---------------Remove Names-----------------------')
|
162 |
# print(output)
|
163 |
|
@@ -178,11 +206,19 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
|
|
178 |
)
|
179 |
output = output['choices'][0]['message']['content']
|
180 |
|
|
|
|
|
|
|
|
|
181 |
# Remove starting header string in output
|
182 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
183 |
if find_index != -1:
|
184 |
output = output[find_index:].strip()
|
185 |
|
|
|
|
|
|
|
|
|
186 |
# print('---------------Remove Registration Numbers-----------------------')
|
187 |
# print(output)
|
188 |
|
|
|
84 |
@spaces.GPU(duration=80)
|
85 |
def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
|
86 |
|
87 |
+
print('-----------------------------------------------------------')
|
88 |
+
print(pdftext)
|
89 |
+
print('-----------------------------------------------------------')
|
90 |
+
|
91 |
llm = Llama(
|
92 |
model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
|
93 |
flash_attn=True,
|
|
|
111 |
)
|
112 |
output = output['choices'][0]['message']['content']
|
113 |
|
114 |
+
print('-----------------------------------------------------------')
|
115 |
+
print(output)
|
116 |
+
print('-----------------------------------------------------------')
|
117 |
+
|
118 |
# Remove starting header string in output
|
119 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
120 |
if find_index != -1:
|
121 |
output = output[find_index:].strip()
|
122 |
|
123 |
+
print('-----------------------------------------------------------')
|
124 |
+
print(output)
|
125 |
+
print('-----------------------------------------------------------')
|
126 |
+
|
127 |
# print('---------------Remove Dates-----------------------')
|
128 |
# print(output)
|
129 |
|
|
|
142 |
)
|
143 |
output = output['choices'][0]['message']['content']
|
144 |
|
145 |
+
print('-----------------------------------------------------------')
|
146 |
+
print(output)
|
147 |
+
print('-----------------------------------------------------------')
|
148 |
+
|
149 |
# Remove starting header string in output
|
150 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
151 |
if find_index != -1:
|
152 |
output = output[find_index:].strip()
|
153 |
|
154 |
+
print('-----------------------------------------------------------')
|
155 |
+
print(output)
|
156 |
+
print('-----------------------------------------------------------')
|
157 |
+
|
158 |
# print('---------------Remove Addresses-----------------------')
|
159 |
# print(output)
|
160 |
|
|
|
173 |
)
|
174 |
output = output['choices'][0]['message']['content']
|
175 |
|
176 |
+
print('-----------------------------------------------------------')
|
177 |
+
print(output)
|
178 |
+
print('-----------------------------------------------------------')
|
179 |
+
|
180 |
# Remove starting header string in output
|
181 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
182 |
if find_index != -1:
|
183 |
output = output[find_index:].strip()
|
184 |
|
185 |
+
print('-----------------------------------------------------------')
|
186 |
+
print(output)
|
187 |
+
print('-----------------------------------------------------------')
|
188 |
+
|
189 |
# print('---------------Remove Names-----------------------')
|
190 |
# print(output)
|
191 |
|
|
|
206 |
)
|
207 |
output = output['choices'][0]['message']['content']
|
208 |
|
209 |
+
print('-----------------------------------------------------------')
|
210 |
+
print(output)
|
211 |
+
print('-----------------------------------------------------------')
|
212 |
+
|
213 |
# Remove starting header string in output
|
214 |
find_index = output.find(' '.join(pdftext.split()[:3]))
|
215 |
if find_index != -1:
|
216 |
output = output[find_index:].strip()
|
217 |
|
218 |
+
print('-----------------------------------------------------------')
|
219 |
+
print(output)
|
220 |
+
print('-----------------------------------------------------------')
|
221 |
+
|
222 |
# print('---------------Remove Registration Numbers-----------------------')
|
223 |
# print(output)
|
224 |
|