Pranjal12345 commited on
Commit
3381d4a
1 Parent(s): cbb1092
Files changed (1) hide show
  1. main.py +112 -48
main.py CHANGED
@@ -1,57 +1,100 @@
1
- #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
2
 
3
 
4
- # from fastapi import FastAPI
5
- # from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
- # import librosa
7
- # import uvicorn
8
 
9
- # app = FastAPI()
10
 
11
- # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
12
- # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
13
- # model.config.forced_decoder_ids = None
14
 
15
- # audio_file_path = "output.mp3"
16
 
17
- # audio_data, _ = librosa.load(audio_file_path, sr=16000)
18
 
19
- # @app.get("/")
20
- # def transcribe_audio():
21
- # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
22
 
23
- # predicted_ids = model.generate(input_features)
24
- # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
25
 
26
- # return {"transcription": transcription[0]}
27
 
28
 
29
- # if __name__ == "__main__":
30
- # import uvicorn
31
- # uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
- # if __name__=='__main__':
35
- # uvicorn.run('main:app', reload=True)
36
 
37
 
38
 
39
 
40
- #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
41
- #curl -X GET "http://localhost:8000/?text=I%20like%20Apples"
42
- #http://localhost:8000/?text=I%20like%20Apples
43
 
44
 
 
 
 
 
45
 
 
46
 
 
 
 
 
47
 
 
 
48
 
 
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- # from fastapi import FastAPI
 
 
 
 
52
  # from transformers import WhisperProcessor, WhisperForConditionalGeneration
53
  # import librosa
 
54
  # import uvicorn
 
55
 
56
  # app = FastAPI()
57
 
@@ -60,15 +103,30 @@
60
  # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
61
  # model.config.forced_decoder_ids = None
62
 
63
- # # Path to your audio file
64
- # audio_file_path = "/home/pranjal/Downloads/output.mp3"
65
-
66
- # # Read the audio file
67
- # audio_data, _ = librosa.load(audio_file_path, sr=16000)
68
-
69
  # @app.get("/")
70
- # def transcribe_audio():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # # Process the audio data using the Whisper processor
 
72
  # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
73
 
74
  # # Generate transcription
@@ -76,17 +134,18 @@
76
  # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
77
 
78
  # return {"transcription": transcription[0]}
 
 
 
 
 
79
 
80
- # if __name__ == "__main__":
81
- # import uvicorn
82
- # uvicorn.run(app, host="0.0.0.0", port=8000)
83
 
84
 
85
- # if __name__=='__app__':
86
- # uvicorn.run('main:app', reload=True)
87
 
88
 
89
 
 
90
 
91
 
92
  from fastapi import FastAPI, UploadFile, File
@@ -98,10 +157,14 @@ import io
98
 
99
  app = FastAPI()
100
 
101
- # Load model and processor
102
- processor = WhisperProcessor.from_pretrained("openai/whisper-small")
103
- model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
104
- model.config.forced_decoder_ids = None
 
 
 
 
105
 
106
  @app.get("/")
107
  def read_root():
@@ -126,14 +189,15 @@ async def transcribe_audio(audio_file: UploadFile):
126
  audio_data = await audio_file.read()
127
 
128
  # Process the audio data using the Whisper processor
129
- audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
130
- input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
131
 
132
- # Generate transcription
133
- predicted_ids = model.generate(input_features)
134
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
 
135
 
136
- return {"transcription": transcription[0]}
137
  except Exception as e:
138
  return {"error": str(e)}
139
 
 
1
+ # #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
2
 
3
 
4
+ # # from fastapi import FastAPI
5
+ # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
+ # # import librosa
7
+ # # import uvicorn
8
 
9
+ # # app = FastAPI()
10
 
11
+ # # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
12
+ # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
13
+ # # model.config.forced_decoder_ids = None
14
 
15
+ # # audio_file_path = "output.mp3"
16
 
17
+ # # audio_data, _ = librosa.load(audio_file_path, sr=16000)
18
 
19
+ # # @app.get("/")
20
+ # # def transcribe_audio():
21
+ # # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
22
 
23
+ # # predicted_ids = model.generate(input_features)
24
+ # # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
25
 
26
+ # # return {"transcription": transcription[0]}
27
 
28
 
29
+ # # if __name__ == "__main__":
30
+ # # import uvicorn
31
+ # # uvicorn.run(app, host="0.0.0.0", port=8000)
32
+
33
+
34
+ # # if __name__=='__main__':
35
+ # # uvicorn.run('main:app', reload=True)
36
+
37
+
38
+
39
+
40
+ # #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
41
+ # #curl -X GET "http://localhost:8000/?text=I%20like%20Apples"
42
+ # #http://localhost:8000/?text=I%20like%20Apples
43
 
44
 
 
 
45
 
46
 
47
 
48
 
 
 
 
49
 
50
 
51
+ # # from fastapi import FastAPI
52
+ # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
53
+ # # import librosa
54
+ # # import uvicorn
55
 
56
+ # # app = FastAPI()
57
 
58
+ # # # Load model and processor
59
+ # # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
60
+ # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
61
+ # # model.config.forced_decoder_ids = None
62
 
63
+ # # # Path to your audio file
64
+ # # audio_file_path = "/home/pranjal/Downloads/output.mp3"
65
 
66
+ # # # Read the audio file
67
+ # # audio_data, _ = librosa.load(audio_file_path, sr=16000)
68
 
69
+ # # @app.get("/")
70
+ # # def transcribe_audio():
71
+ # # # Process the audio data using the Whisper processor
72
+ # # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
73
+
74
+ # # # Generate transcription
75
+ # # predicted_ids = model.generate(input_features)
76
+ # # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
77
+
78
+ # # return {"transcription": transcription[0]}
79
+
80
+ # # if __name__ == "__main__":
81
+ # # import uvicorn
82
+ # # uvicorn.run(app, host="0.0.0.0", port=8000)
83
+
84
+
85
+ # # if __name__=='__app__':
86
+ # # uvicorn.run('main:app', reload=True)
87
 
88
+
89
+
90
+
91
+
92
+ # from fastapi import FastAPI, UploadFile, File
93
  # from transformers import WhisperProcessor, WhisperForConditionalGeneration
94
  # import librosa
95
+ # from fastapi.responses import HTMLResponse
96
  # import uvicorn
97
+ # import io
98
 
99
  # app = FastAPI()
100
 
 
103
  # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
104
  # model.config.forced_decoder_ids = None
105
 
 
 
 
 
 
 
106
  # @app.get("/")
107
+ # def read_root():
108
+ # html_form = """
109
+ # <html>
110
+ # <body>
111
+ # <h2>ASR Transcription</h2>
112
+ # <form action="/transcribe" method="post" enctype="multipart/form-data">
113
+ # <label for="audio_file">Upload an audio file (MP3 or WAV):</label>
114
+ # <input type="file" id="audio_file" name="audio_file" accept=".mp3, .wav" required><br><br>
115
+ # <input type="submit" value="Transcribe">
116
+ # </form>
117
+ # </body>
118
+ # </html>
119
+ # """
120
+ # return HTMLResponse(content=html_form, status_code=200)
121
+
122
+ # @app.post("/transcribe")
123
+ # async def transcribe_audio(audio_file: UploadFile):
124
+ # try:
125
+ # # Read the uploaded audio file
126
+ # audio_data = await audio_file.read()
127
+
128
  # # Process the audio data using the Whisper processor
129
+ # audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
130
  # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
131
 
132
  # # Generate transcription
 
134
  # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
135
 
136
  # return {"transcription": transcription[0]}
137
+ # except Exception as e:
138
+ # return {"error": str(e)}
139
+
140
+ # if __name__ == "__app__":
141
+ # uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
142
 
 
 
 
143
 
144
 
 
 
145
 
146
 
147
 
148
+ #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
149
 
150
 
151
  from fastapi import FastAPI, UploadFile, File
 
157
 
158
  app = FastAPI()
159
 
160
+ # # Load model and processor
161
+ # processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
162
+ # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
163
+ # model.config.forced_decoder_ids = None
164
+
165
+ import whisper
166
+ model = whisper.load_model("small")
167
+
168
 
169
  @app.get("/")
170
  def read_root():
 
189
  audio_data = await audio_file.read()
190
 
191
  # Process the audio data using the Whisper processor
192
+ # audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
193
+ # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
194
 
195
+ # # Generate transcription
196
+ # predicted_ids = model.generate(input_features)
197
+ # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
198
+ result = model.transcribe("/home/pranjal/Downloads/rt.mp3")
199
 
200
+ return {"transcription": result['text']}
201
  except Exception as e:
202
  return {"error": str(e)}
203