Pranjal12345 commited on
Commit
71f4c16
1 Parent(s): b86f76f
Files changed (2) hide show
  1. main.py +146 -80
  2. requirements.txt +1 -2
main.py CHANGED
@@ -1,57 +1,100 @@
1
- # #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
2
 
3
 
4
- # # from fastapi import FastAPI
5
- # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
- # # import librosa
7
- # # import uvicorn
8
 
9
- # # app = FastAPI()
10
 
11
- # # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
12
- # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
13
- # # model.config.forced_decoder_ids = None
14
 
15
- # # audio_file_path = "output.mp3"
16
 
17
- # # audio_data, _ = librosa.load(audio_file_path, sr=16000)
18
 
19
- # # @app.get("/")
20
- # # def transcribe_audio():
21
- # # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
22
 
23
- # # predicted_ids = model.generate(input_features)
24
- # # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
25
 
26
- # # return {"transcription": transcription[0]}
27
 
28
 
29
- # # if __name__ == "__main__":
30
- # # import uvicorn
31
- # # uvicorn.run(app, host="0.0.0.0", port=8000)
32
 
33
 
34
- # # if __name__=='__main__':
35
- # # uvicorn.run('main:app', reload=True)
36
 
37
 
38
 
39
 
40
- # #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
41
- # #curl -X GET "http://localhost:8000/?text=I%20like%20Apples"
42
- # #http://localhost:8000/?text=I%20like%20Apples
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
 
 
 
 
 
 
 
 
 
44
 
 
 
 
45
 
46
 
 
 
47
 
48
 
49
 
50
 
51
- # # from fastapi import FastAPI
 
52
  # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
53
  # # import librosa
 
54
  # # import uvicorn
 
55
 
56
  # # app = FastAPI()
57
 
@@ -60,15 +103,30 @@
60
  # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
61
  # # model.config.forced_decoder_ids = None
62
 
63
- # # # Path to your audio file
64
- # # audio_file_path = "/home/pranjal/Downloads/output.mp3"
65
-
66
- # # # Read the audio file
67
- # # audio_data, _ = librosa.load(audio_file_path, sr=16000)
68
-
69
  # # @app.get("/")
70
- # # def transcribe_audio():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # # # Process the audio data using the Whisper processor
 
72
  # # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
73
 
74
  # # # Generate transcription
@@ -76,18 +134,19 @@
76
  # # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
77
 
78
  # # return {"transcription": transcription[0]}
 
 
79
 
80
- # # if __name__ == "__main__":
81
- # # import uvicorn
82
- # # uvicorn.run(app, host="0.0.0.0", port=8000)
83
 
84
 
85
- # # if __name__=='__app__':
86
- # # uvicorn.run('main:app', reload=True)
87
 
88
 
89
 
90
 
 
 
91
 
92
  # from fastapi import FastAPI, UploadFile, File
93
  # from transformers import WhisperProcessor, WhisperForConditionalGeneration
@@ -98,10 +157,14 @@
98
 
99
  # app = FastAPI()
100
 
101
- # # Load model and processor
102
- # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
103
- # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
104
- # model.config.forced_decoder_ids = None
 
 
 
 
105
 
106
  # @app.get("/")
107
  # def read_root():
@@ -126,45 +189,44 @@
126
  # audio_data = await audio_file.read()
127
 
128
  # # Process the audio data using the Whisper processor
129
- # audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
130
- # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
131
 
132
- # # Generate transcription
133
- # predicted_ids = model.generate(input_features)
134
- # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
 
135
 
136
- # return {"transcription": transcription[0]}
137
  # except Exception as e:
138
  # return {"error": str(e)}
139
 
140
- # if __name__ == "__app__":
141
- # uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
142
-
143
-
144
-
145
-
146
-
147
 
148
  #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
149
 
150
-
151
  from fastapi import FastAPI, UploadFile, File
152
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
153
- import librosa
154
  from fastapi.responses import HTMLResponse
155
- import uvicorn
156
  import io
 
157
 
158
- app = FastAPI()
159
 
160
- # # Load model and processor
161
- # processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
162
- # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
163
- # model.config.forced_decoder_ids = None
 
 
164
 
165
- import whisper
166
- model = whisper.load_model("small")
 
167
 
 
 
168
 
169
  @app.get("/")
170
  def read_root():
@@ -184,23 +246,27 @@ def read_root():
184
 
185
  @app.post("/transcribe")
186
  async def transcribe_audio(audio_file: UploadFile):
187
- try:
188
- # Read the uploaded audio file
189
  audio_data = await audio_file.read()
 
190
 
191
- # Process the audio data using the Whisper processor
192
- # audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
193
- # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
194
 
195
- # # Generate transcription
196
- # predicted_ids = model.generate(input_features)
197
- # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
198
- result = model.transcribe("/home/pranjal/Downloads/rt.mp3")
 
 
 
 
 
 
 
 
199
 
200
- return {"transcription": result['text']}
201
- except Exception as e:
202
- return {"error": str(e)}
203
-
204
- # if __name__ == "__app__":
205
- # uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
206
 
 
 
 
 
1
+ # # #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
2
 
3
 
4
+ # # # from fastapi import FastAPI
5
+ # # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
+ # # # import librosa
7
+ # # # import uvicorn
8
 
9
+ # # # app = FastAPI()
10
 
11
+ # # # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
12
+ # # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
13
+ # # # model.config.forced_decoder_ids = None
14
 
15
+ # # # audio_file_path = "output.mp3"
16
 
17
+ # # # audio_data, _ = librosa.load(audio_file_path, sr=16000)
18
 
19
+ # # # @app.get("/")
20
+ # # # def transcribe_audio():
21
+ # # # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
22
 
23
+ # # # predicted_ids = model.generate(input_features)
24
+ # # # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
25
 
26
+ # # # return {"transcription": transcription[0]}
27
 
28
 
29
+ # # # if __name__ == "__main__":
30
+ # # # import uvicorn
31
+ # # # uvicorn.run(app, host="0.0.0.0", port=8000)
32
 
33
 
34
+ # # # if __name__=='__main__':
35
+ # # # uvicorn.run('main:app', reload=True)
36
 
37
 
38
 
39
 
40
+ # # #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
41
+ # # #curl -X GET "http://localhost:8000/?text=I%20like%20Apples"
42
+ # # #http://localhost:8000/?text=I%20like%20Apples
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+ # # # from fastapi import FastAPI
52
+ # # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
53
+ # # # import librosa
54
+ # # # import uvicorn
55
+
56
+ # # # app = FastAPI()
57
+
58
+ # # # # Load model and processor
59
+ # # # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
60
+ # # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
61
+ # # # model.config.forced_decoder_ids = None
62
+
63
+ # # # # Path to your audio file
64
+ # # # audio_file_path = "/home/pranjal/Downloads/output.mp3"
65
+
66
+ # # # # Read the audio file
67
+ # # # audio_data, _ = librosa.load(audio_file_path, sr=16000)
68
 
69
+ # # # @app.get("/")
70
+ # # # def transcribe_audio():
71
+ # # # # Process the audio data using the Whisper processor
72
+ # # # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
73
+
74
+ # # # # Generate transcription
75
+ # # # predicted_ids = model.generate(input_features)
76
+ # # # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
77
+
78
+ # # # return {"transcription": transcription[0]}
79
 
80
+ # # # if __name__ == "__main__":
81
+ # # # import uvicorn
82
+ # # # uvicorn.run(app, host="0.0.0.0", port=8000)
83
 
84
 
85
+ # # # if __name__=='__app__':
86
+ # # # uvicorn.run('main:app', reload=True)
87
 
88
 
89
 
90
 
91
+
92
+ # # from fastapi import FastAPI, UploadFile, File
93
  # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
94
  # # import librosa
95
+ # # from fastapi.responses import HTMLResponse
96
  # # import uvicorn
97
+ # # import io
98
 
99
  # # app = FastAPI()
100
 
 
103
  # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
104
  # # model.config.forced_decoder_ids = None
105
 
 
 
 
 
 
 
106
  # # @app.get("/")
107
+ # # def read_root():
108
+ # # html_form = """
109
+ # # <html>
110
+ # # <body>
111
+ # # <h2>ASR Transcription</h2>
112
+ # # <form action="/transcribe" method="post" enctype="multipart/form-data">
113
+ # # <label for="audio_file">Upload an audio file (MP3 or WAV):</label>
114
+ # # <input type="file" id="audio_file" name="audio_file" accept=".mp3, .wav" required><br><br>
115
+ # # <input type="submit" value="Transcribe">
116
+ # # </form>
117
+ # # </body>
118
+ # # </html>
119
+ # # """
120
+ # # return HTMLResponse(content=html_form, status_code=200)
121
+
122
+ # # @app.post("/transcribe")
123
+ # # async def transcribe_audio(audio_file: UploadFile):
124
+ # # try:
125
+ # # # Read the uploaded audio file
126
+ # # audio_data = await audio_file.read()
127
+
128
  # # # Process the audio data using the Whisper processor
129
+ # # audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
130
  # # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
131
 
132
  # # # Generate transcription
 
134
  # # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
135
 
136
  # # return {"transcription": transcription[0]}
137
+ # # except Exception as e:
138
+ # # return {"error": str(e)}
139
 
140
+ # # if __name__ == "__app__":
141
+ # # uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
 
142
 
143
 
 
 
144
 
145
 
146
 
147
 
148
+ # #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
149
+
150
 
151
  # from fastapi import FastAPI, UploadFile, File
152
  # from transformers import WhisperProcessor, WhisperForConditionalGeneration
 
157
 
158
  # app = FastAPI()
159
 
160
+ # # # Load model and processor
161
+ # # processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
162
+ # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
163
+ # # model.config.forced_decoder_ids = None
164
+
165
+ # import whisper
166
+ # model = whisper.load_model("small")
167
+
168
 
169
  # @app.get("/")
170
  # def read_root():
 
189
  # audio_data = await audio_file.read()
190
 
191
  # # Process the audio data using the Whisper processor
192
+ # # audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
193
+ # # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
194
 
195
+ # # # Generate transcription
196
+ # # predicted_ids = model.generate(input_features)
197
+ # # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
198
+ # result = model.transcribe("/home/pranjal/Downloads/rt.mp3")
199
 
200
+ # return {"transcription": result['text']}
201
  # except Exception as e:
202
  # return {"error": str(e)}
203
 
204
+ # # if __name__ == "__app__":
205
+ # # uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
 
 
 
 
 
206
 
207
  #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
208
 
 
209
  from fastapi import FastAPI, UploadFile, File
210
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
 
211
  from fastapi.responses import HTMLResponse
212
+ import librosa
213
  import io
214
+ import re
215
 
 
216
 
217
+ html_tag_remover = re.compile(r'<[^>]+>')
218
+
219
+ def remove_tags(text):
220
+ return html_tag_remover.sub('', text)
221
+
222
+ app = FastAPI()
223
 
224
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small")
225
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
226
+ model.config.forced_decoder_ids = None
227
 
228
+ chunk_duration = 30
229
+ overlap_duration = 5
230
 
231
  @app.get("/")
232
  def read_root():
 
246
 
247
  @app.post("/transcribe")
248
  async def transcribe_audio(audio_file: UploadFile):
 
 
249
  audio_data = await audio_file.read()
250
+ audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
251
 
252
+ transcription = []
 
 
253
 
254
+ start = 0
255
+ while start < len(audio_data):
256
+ end = start + chunk_duration * 16000
257
+ audio_chunk = audio_data[start:end]
258
+
259
+ input_features = processor(audio_chunk.tolist(), return_tensors="pt").input_features
260
+ predicted_ids = model.generate(input_features, max_length=1000)
261
+ chunk_transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
262
+
263
+ transcription.extend(chunk_transcription)
264
+
265
+ start = end - overlap_duration * 16000
266
 
267
+ final_transcription = " ".join(transcription)
268
+ final_transcription = remove_tags(final_transcription)
 
 
 
 
269
 
270
+
271
+ return {"transcription": final_transcription}
272
+
requirements.txt CHANGED
@@ -6,5 +6,4 @@ uvicorn
6
  transformers
7
  Torch
8
  python-multipart
9
- git+https://github.com/openai/whisper.git
10
- ffmpeg
 
6
  transformers
7
  Torch
8
  python-multipart
9
+ re