Tahsin-Mayeesha commited on
Commit
93235fb
β€’
1 Parent(s): 258816a

add evaluation results with language model

Browse files
.ipynb_checkpoints/add lm decoder-checkpoint.ipynb ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
.ipynb_checkpoints/preprocessor_config-checkpoint.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
add lm decoder.ipynb ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "db2971a9",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "application/vnd.jupyter.widget-view+json": {
12
+ "model_id": "29f15da8fd9549188347df46955b078d",
13
+ "version_major": 2,
14
+ "version_minor": 0
15
+ },
16
+ "text/plain": [
17
+ "VBox(children=(HTML(value='<center>\\n<img src=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
18
+ ]
19
+ },
20
+ "metadata": {},
21
+ "output_type": "display_data"
22
+ }
23
+ ],
24
+ "source": [
25
+ "from huggingface_hub import notebook_login\n",
26
+ "\n",
27
+ "notebook_login()"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "code",
32
+ "execution_count": 2,
33
+ "id": "2377a1e5",
34
+ "metadata": {},
35
+ "outputs": [
36
+ {
37
+ "name": "stderr",
38
+ "output_type": "stream",
39
+ "text": [
40
+ "Cloning https://huggingface.co/Tahsin-Mayeesha/wav2vec2-bn-300m into local empty directory.\n"
41
+ ]
42
+ },
43
+ {
44
+ "data": {
45
+ "application/vnd.jupyter.widget-view+json": {
46
+ "model_id": "9eacbc3325314c0b9e70d738ea655554",
47
+ "version_major": 2,
48
+ "version_minor": 0
49
+ },
50
+ "text/plain": [
51
+ "Download file pytorch_model.bin: 0%| | 594/1.18G [00:00<?, ?B/s]"
52
+ ]
53
+ },
54
+ "metadata": {},
55
+ "output_type": "display_data"
56
+ },
57
+ {
58
+ "data": {
59
+ "application/vnd.jupyter.widget-view+json": {
60
+ "model_id": "abce727d7bdc4291b5b8158ba46ca359",
61
+ "version_major": 2,
62
+ "version_minor": 0
63
+ },
64
+ "text/plain": [
65
+ "Download file runs/Feb02_18-57-15_job-adbfa1a2-412e-4cc9-8438-18b8de11318f/events.out.tfevents.1643828376.job-…"
66
+ ]
67
+ },
68
+ "metadata": {},
69
+ "output_type": "display_data"
70
+ },
71
+ {
72
+ "data": {
73
+ "application/vnd.jupyter.widget-view+json": {
74
+ "model_id": "990958b3a4894a58bbc7b275683aadc5",
75
+ "version_major": 2,
76
+ "version_minor": 0
77
+ },
78
+ "text/plain": [
79
+ "Download file runs/Feb02_18-57-15_job-adbfa1a2-412e-4cc9-8438-18b8de11318f/1643828376.0908198/events.out.tfeve…"
80
+ ]
81
+ },
82
+ "metadata": {},
83
+ "output_type": "display_data"
84
+ },
85
+ {
86
+ "data": {
87
+ "application/vnd.jupyter.widget-view+json": {
88
+ "model_id": "0e7b5cf37e13411f861ab6f97a7086f5",
89
+ "version_major": 2,
90
+ "version_minor": 0
91
+ },
92
+ "text/plain": [
93
+ "Download file training_args.bin: 100%|##########| 2.92k/2.92k [00:00<?, ?B/s]"
94
+ ]
95
+ },
96
+ "metadata": {},
97
+ "output_type": "display_data"
98
+ },
99
+ {
100
+ "data": {
101
+ "application/vnd.jupyter.widget-view+json": {
102
+ "model_id": "083dfac65da1440bbb1b44d0ef9736b2",
103
+ "version_major": 2,
104
+ "version_minor": 0
105
+ },
106
+ "text/plain": [
107
+ "Clean file runs/Feb02_18-57-15_job-adbfa1a2-412e-4cc9-8438-18b8de11318f/1643828376.0908198/events.out.tfevents…"
108
+ ]
109
+ },
110
+ "metadata": {},
111
+ "output_type": "display_data"
112
+ },
113
+ {
114
+ "data": {
115
+ "application/vnd.jupyter.widget-view+json": {
116
+ "model_id": "de458e6cd4844842a9518ab9a6555742",
117
+ "version_major": 2,
118
+ "version_minor": 0
119
+ },
120
+ "text/plain": [
121
+ "Clean file training_args.bin: 34%|###4 | 1.00k/2.92k [00:00<?, ?B/s]"
122
+ ]
123
+ },
124
+ "metadata": {},
125
+ "output_type": "display_data"
126
+ },
127
+ {
128
+ "data": {
129
+ "application/vnd.jupyter.widget-view+json": {
130
+ "model_id": "83b6f9d8c39649e382025e1462d12ee0",
131
+ "version_major": 2,
132
+ "version_minor": 0
133
+ },
134
+ "text/plain": [
135
+ "Clean file runs/Feb02_18-57-15_job-adbfa1a2-412e-4cc9-8438-18b8de11318f/events.out.tfevents.1643828376.job-adb…"
136
+ ]
137
+ },
138
+ "metadata": {},
139
+ "output_type": "display_data"
140
+ },
141
+ {
142
+ "data": {
143
+ "application/vnd.jupyter.widget-view+json": {
144
+ "model_id": "95142f5b1abc4218b5ed797b9edffbbc",
145
+ "version_major": 2,
146
+ "version_minor": 0
147
+ },
148
+ "text/plain": [
149
+ "Clean file pytorch_model.bin: 0%| | 1.00k/1.18G [00:00<?, ?B/s]"
150
+ ]
151
+ },
152
+ "metadata": {},
153
+ "output_type": "display_data"
154
+ }
155
+ ],
156
+ "source": [
157
+ "from huggingface_hub import Repository\n",
158
+ "\n",
159
+ "repo = Repository(local_dir=\"wav2vec2-bn-300m\", clone_from=\"Tahsin-Mayeesha/wav2vec2-bn-300m\")"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 4,
165
+ "id": "091991cd",
166
+ "metadata": {},
167
+ "outputs": [
168
+ {
169
+ "data": {
170
+ "application/vnd.jupyter.widget-view+json": {
171
+ "model_id": "ab4eb980966140b7a6bca34bc51c3fd2",
172
+ "version_major": 2,
173
+ "version_minor": 0
174
+ },
175
+ "text/plain": [
176
+ "Downloading: 0%| | 0.00/212 [00:00<?, ?B/s]"
177
+ ]
178
+ },
179
+ "metadata": {},
180
+ "output_type": "display_data"
181
+ },
182
+ {
183
+ "data": {
184
+ "application/vnd.jupyter.widget-view+json": {
185
+ "model_id": "442c24c800b949238d33dd9ad2100c09",
186
+ "version_major": 2,
187
+ "version_minor": 0
188
+ },
189
+ "text/plain": [
190
+ "Downloading: 0%| | 0.00/260 [00:00<?, ?B/s]"
191
+ ]
192
+ },
193
+ "metadata": {},
194
+ "output_type": "display_data"
195
+ },
196
+ {
197
+ "data": {
198
+ "application/vnd.jupyter.widget-view+json": {
199
+ "model_id": "69504d7b2d994c6789cdc9b27c00bc5b",
200
+ "version_major": 2,
201
+ "version_minor": 0
202
+ },
203
+ "text/plain": [
204
+ "Downloading: 0%| | 0.00/1.99k [00:00<?, ?B/s]"
205
+ ]
206
+ },
207
+ "metadata": {},
208
+ "output_type": "display_data"
209
+ },
210
+ {
211
+ "data": {
212
+ "application/vnd.jupyter.widget-view+json": {
213
+ "model_id": "f8290e53eb4f402e99b6d71bd5836955",
214
+ "version_major": 2,
215
+ "version_minor": 0
216
+ },
217
+ "text/plain": [
218
+ "Downloading: 0%| | 0.00/1.13k [00:00<?, ?B/s]"
219
+ ]
220
+ },
221
+ "metadata": {},
222
+ "output_type": "display_data"
223
+ },
224
+ {
225
+ "data": {
226
+ "application/vnd.jupyter.widget-view+json": {
227
+ "model_id": "abaa5f4a82de4ec8b27a67178addfcb9",
228
+ "version_major": 2,
229
+ "version_minor": 0
230
+ },
231
+ "text/plain": [
232
+ "Downloading: 0%| | 0.00/25.0 [00:00<?, ?B/s]"
233
+ ]
234
+ },
235
+ "metadata": {},
236
+ "output_type": "display_data"
237
+ },
238
+ {
239
+ "data": {
240
+ "application/vnd.jupyter.widget-view+json": {
241
+ "model_id": "9b6186e1a4df4999b71e8ea2e8f9d392",
242
+ "version_major": 2,
243
+ "version_minor": 0
244
+ },
245
+ "text/plain": [
246
+ "Downloading: 0%| | 0.00/309 [00:00<?, ?B/s]"
247
+ ]
248
+ },
249
+ "metadata": {},
250
+ "output_type": "display_data"
251
+ }
252
+ ],
253
+ "source": [
254
+ "from transformers import AutoProcessor\n",
255
+ "processor = AutoProcessor.from_pretrained(\"Tahsin-Mayeesha/wav2vec2-bn-300m\")"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": 5,
261
+ "id": "3507c167",
262
+ "metadata": {},
263
+ "outputs": [],
264
+ "source": [
265
+ "vocab_dict = processor.tokenizer.get_vocab()\n",
266
+ "sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": 6,
272
+ "id": "15ee83a8",
273
+ "metadata": {},
274
+ "outputs": [
275
+ {
276
+ "name": "stderr",
277
+ "output_type": "stream",
278
+ "text": [
279
+ "Found entries of length > 1 in alphabet. This is unusual unless style is BPE, but the alphabet was not recognized as BPE type. Is this correct?\n"
280
+ ]
281
+ }
282
+ ],
283
+ "source": [
284
+ "from pyctcdecode import build_ctcdecoder\n",
285
+ "\n",
286
+ "decoder = build_ctcdecoder(\n",
287
+ " labels=list(sorted_vocab_dict.keys()),\n",
288
+ " kenlm_model_path=\"5gram.arpa\",\n",
289
+ ")"
290
+ ]
291
+ },
292
+ {
293
+ "cell_type": "code",
294
+ "execution_count": 7,
295
+ "id": "46585ac6",
296
+ "metadata": {},
297
+ "outputs": [],
298
+ "source": [
299
+ "from transformers import Wav2Vec2ProcessorWithLM\n",
300
+ "\n",
301
+ "processor_with_lm = Wav2Vec2ProcessorWithLM(\n",
302
+ " feature_extractor=processor.feature_extractor,\n",
303
+ " tokenizer=processor.tokenizer,\n",
304
+ " decoder=decoder\n",
305
+ ")"
306
+ ]
307
+ },
308
+ {
309
+ "cell_type": "code",
310
+ "execution_count": 8,
311
+ "id": "c17befdc",
312
+ "metadata": {},
313
+ "outputs": [],
314
+ "source": [
315
+ "processor_with_lm.save_pretrained(\"wav2vec2-bn-300m\")"
316
+ ]
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": 9,
321
+ "id": "f3ec60c4",
322
+ "metadata": {},
323
+ "outputs": [
324
+ {
325
+ "name": "stderr",
326
+ "output_type": "stream",
327
+ "text": [
328
+ "Adding files tracked by Git LFS: ['language_model/unigrams.txt']. This may take a bit of time if the files are large.\n"
329
+ ]
330
+ },
331
+ {
332
+ "data": {
333
+ "application/vnd.jupyter.widget-view+json": {
334
+ "model_id": "7aa6e28e8a9c49b79b09f5d2884383d7",
335
+ "version_major": 2,
336
+ "version_minor": 0
337
+ },
338
+ "text/plain": [
339
+ "Upload file language_model/unigrams.txt: 0%| | 3.38k/22.3M [00:00<?, ?B/s]"
340
+ ]
341
+ },
342
+ "metadata": {},
343
+ "output_type": "display_data"
344
+ },
345
+ {
346
+ "name": "stderr",
347
+ "output_type": "stream",
348
+ "text": [
349
+ "To https://huggingface.co/Tahsin-Mayeesha/wav2vec2-bn-300m\n",
350
+ " b6e6996..258816a main -> main\n",
351
+ "\n"
352
+ ]
353
+ },
354
+ {
355
+ "data": {
356
+ "text/plain": [
357
+ "'https://huggingface.co/Tahsin-Mayeesha/wav2vec2-bn-300m/commit/258816acfe8e1e49f41b4edcf9f20f812b4bf00d'"
358
+ ]
359
+ },
360
+ "execution_count": 9,
361
+ "metadata": {},
362
+ "output_type": "execute_result"
363
+ }
364
+ ],
365
+ "source": [
366
+ "repo.push_to_hub(commit_message=\"Upload lm-boosted decoder\")"
367
+ ]
368
+ },
369
+ {
370
+ "cell_type": "code",
371
+ "execution_count": null,
372
+ "id": "add2d4ca",
373
+ "metadata": {},
374
+ "outputs": [],
375
+ "source": []
376
+ }
377
+ ],
378
+ "metadata": {
379
+ "kernelspec": {
380
+ "display_name": "Python 3",
381
+ "language": "python",
382
+ "name": "python3"
383
+ },
384
+ "language_info": {
385
+ "codemirror_mode": {
386
+ "name": "ipython",
387
+ "version": 3
388
+ },
389
+ "file_extension": ".py",
390
+ "mimetype": "text/x-python",
391
+ "name": "python",
392
+ "nbconvert_exporter": "python",
393
+ "pygments_lexer": "ipython3",
394
+ "version": "3.8.8"
395
+ }
396
+ },
397
+ "nbformat": 4,
398
+ "nbformat_minor": 5
399
+ }
{.ipynb_checkpoints β†’ evaluations_no_lm/.ipynb_checkpoints}/OPENSLR_bn_test_eval_results-checkpoint.txt RENAMED
File without changes
{.ipynb_checkpoints β†’ evaluations_no_lm/.ipynb_checkpoints}/log_OPENSLR_bn_test_predictions-checkpoint.txt RENAMED
File without changes
OPENSLR_bn_test_eval_results.txt β†’ evaluations_no_lm/OPENSLR_bn_test_eval_results.txt RENAMED
File without changes
log_OPENSLR_bn_test_predictions.txt β†’ evaluations_no_lm/log_OPENSLR_bn_test_predictions.txt RENAMED
File without changes
log_OPENSLR_bn_test_targets.txt β†’ evaluations_no_lm/log_OPENSLR_bn_test_targets.txt RENAMED
File without changes
evaluations_with_lm/.ipynb_checkpoints/openslr_bn_test_eval_results-checkpoint.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.17776164652632478
2
+ CER: 0.04394092712884769
evaluations_with_lm/log_openslr_bn_test_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
evaluations_with_lm/log_openslr_bn_test_targets.txt ADDED
The diff for this file is too large to render. See raw diff
 
evaluations_with_lm/openslr_bn_test_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.17776164652632478
2
+ CER: 0.04394092712884769