srijaydeshpande commited on
Commit
33ac67b
1 Parent(s): 0ff9f5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py CHANGED
@@ -84,6 +84,10 @@ def txt_to_html(text):
84
  @spaces.GPU(duration=80)
85
  def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
86
 
 
 
 
 
87
  llm = Llama(
88
  model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
89
  flash_attn=True,
@@ -107,11 +111,19 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
107
  )
108
  output = output['choices'][0]['message']['content']
109
 
 
 
 
 
110
  # Remove starting header string in output
111
  find_index = output.find(' '.join(pdftext.split()[:3]))
112
  if find_index != -1:
113
  output = output[find_index:].strip()
114
 
 
 
 
 
115
  # print('---------------Remove Dates-----------------------')
116
  # print(output)
117
 
@@ -130,11 +142,19 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
130
  )
131
  output = output['choices'][0]['message']['content']
132
 
 
 
 
 
133
  # Remove starting header string in output
134
  find_index = output.find(' '.join(pdftext.split()[:3]))
135
  if find_index != -1:
136
  output = output[find_index:].strip()
137
 
 
 
 
 
138
  # print('---------------Remove Addresses-----------------------')
139
  # print(output)
140
 
@@ -153,11 +173,19 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
153
  )
154
  output = output['choices'][0]['message']['content']
155
 
 
 
 
 
156
  # Remove starting header string in output
157
  find_index = output.find(' '.join(pdftext.split()[:3]))
158
  if find_index != -1:
159
  output = output[find_index:].strip()
160
 
 
 
 
 
161
  # print('---------------Remove Names-----------------------')
162
  # print(output)
163
 
@@ -178,11 +206,19 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
178
  )
179
  output = output['choices'][0]['message']['content']
180
 
 
 
 
 
181
  # Remove starting header string in output
182
  find_index = output.find(' '.join(pdftext.split()[:3]))
183
  if find_index != -1:
184
  output = output[find_index:].strip()
185
 
 
 
 
 
186
  # print('---------------Remove Registration Numbers-----------------------')
187
  # print(output)
188
 
 
84
  @spaces.GPU(duration=80)
85
  def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
86
 
87
+ print('-----------------------------------------------------------')
88
+ print(pdftext)
89
+ print('-----------------------------------------------------------')
90
+
91
  llm = Llama(
92
  model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
93
  flash_attn=True,
 
111
  )
112
  output = output['choices'][0]['message']['content']
113
 
114
+ print('-----------------------------------------------------------')
115
+ print(output)
116
+ print('-----------------------------------------------------------')
117
+
118
  # Remove starting header string in output
119
  find_index = output.find(' '.join(pdftext.split()[:3]))
120
  if find_index != -1:
121
  output = output[find_index:].strip()
122
 
123
+ print('-----------------------------------------------------------')
124
+ print(output)
125
+ print('-----------------------------------------------------------')
126
+
127
  # print('---------------Remove Dates-----------------------')
128
  # print(output)
129
 
 
142
  )
143
  output = output['choices'][0]['message']['content']
144
 
145
+ print('-----------------------------------------------------------')
146
+ print(output)
147
+ print('-----------------------------------------------------------')
148
+
149
  # Remove starting header string in output
150
  find_index = output.find(' '.join(pdftext.split()[:3]))
151
  if find_index != -1:
152
  output = output[find_index:].strip()
153
 
154
+ print('-----------------------------------------------------------')
155
+ print(output)
156
+ print('-----------------------------------------------------------')
157
+
158
  # print('---------------Remove Addresses-----------------------')
159
  # print(output)
160
 
 
173
  )
174
  output = output['choices'][0]['message']['content']
175
 
176
+ print('-----------------------------------------------------------')
177
+ print(output)
178
+ print('-----------------------------------------------------------')
179
+
180
  # Remove starting header string in output
181
  find_index = output.find(' '.join(pdftext.split()[:3]))
182
  if find_index != -1:
183
  output = output[find_index:].strip()
184
 
185
+ print('-----------------------------------------------------------')
186
+ print(output)
187
+ print('-----------------------------------------------------------')
188
+
189
  # print('---------------Remove Names-----------------------')
190
  # print(output)
191
 
 
206
  )
207
  output = output['choices'][0]['message']['content']
208
 
209
+ print('-----------------------------------------------------------')
210
+ print(output)
211
+ print('-----------------------------------------------------------')
212
+
213
  # Remove starting header string in output
214
  find_index = output.find(' '.join(pdftext.split()[:3]))
215
  if find_index != -1:
216
  output = output[find_index:].strip()
217
 
218
+ print('-----------------------------------------------------------')
219
+ print(output)
220
+ print('-----------------------------------------------------------')
221
+
222
  # print('---------------Remove Registration Numbers-----------------------')
223
  # print(output)
224