Spaces:

Pranjal12345
/

Whisper_with_FastApi

Runtime error

App Files Files Community

Pranjal12345 commited on Sep 27, 2023

Commit

3381d4a

•

1 Parent(s): cbb1092

dsds

Browse files

Files changed (1) hide show

main.py +112 -48

main.py CHANGED Viewed

@@ -1,57 +1,100 @@
-#uvicorn app:app --host 0.0.0.0 --port 8000 --reload
-# from fastapi import FastAPI
-# from transformers import WhisperProcessor, WhisperForConditionalGeneration
-# import librosa
-# import uvicorn
-# app = FastAPI()
-# processor = WhisperProcessor.from_pretrained("openai/whisper-small")
-# model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
-# model.config.forced_decoder_ids = None
-# audio_file_path = "output.mp3"
-# audio_data, _ = librosa.load(audio_file_path, sr=16000)
-# @app.get("/")
-# def transcribe_audio():
-#         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
-#         predicted_ids = model.generate(input_features)
-#         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-#         return {"transcription": transcription[0]}
-# if __name__ == "__main__":
-#     import uvicorn
-#     uvicorn.run(app, host="0.0.0.0", port=8000)
-# if __name__=='__main__':
-#     uvicorn.run('main:app', reload=True)
-#uvicorn app:app --host 0.0.0.0 --port 8000 --reload
-#curl -X GET "http://localhost:8000/?text=I%20like%20Apples"
-#http://localhost:8000/?text=I%20like%20Apples
-# from fastapi import FastAPI
 # from transformers import WhisperProcessor, WhisperForConditionalGeneration
 # import librosa
 # import uvicorn
 # app = FastAPI()
@@ -60,15 +103,30 @@
 # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
 # model.config.forced_decoder_ids = None
-# # Path to your audio file
-# audio_file_path = "/home/pranjal/Downloads/output.mp3"
-# # Read the audio file
-# audio_data, _ = librosa.load(audio_file_path, sr=16000)
 # @app.get("/")
-# def transcribe_audio():
 #         # Process the audio data using the Whisper processor
 #         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
 #         # Generate transcription
@@ -76,17 +134,18 @@
 #         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
 #         return {"transcription": transcription[0]}
-# if __name__ == "__main__":
-#     import uvicorn
-#     uvicorn.run(app, host="0.0.0.0", port=8000)
-# if __name__=='__app__':
-#     uvicorn.run('main:app', reload=True)
 from fastapi import FastAPI, UploadFile, File
@@ -98,10 +157,14 @@ import io
 app = FastAPI()
-# Load model and processor
-processor = WhisperProcessor.from_pretrained("openai/whisper-small")
-model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
-model.config.forced_decoder_ids = None
 @app.get("/")
 def read_root():
@@ -126,14 +189,15 @@ async def transcribe_audio(audio_file: UploadFile):
         audio_data = await audio_file.read()
         # Process the audio data using the Whisper processor
-        audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
-        input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
-        # Generate transcription
-        predicted_ids = model.generate(input_features)
-        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-        return {"transcription": transcription[0]}
     except Exception as e:
         return {"error": str(e)}

+# #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
+# # from fastapi import FastAPI
+# # from transformers import WhisperProcessor, WhisperForConditionalGeneration
+# # import librosa
+# # import uvicorn
+# # app = FastAPI()
+# # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+# # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
+# # model.config.forced_decoder_ids = None
+# # audio_file_path = "output.mp3"
+# # audio_data, _ = librosa.load(audio_file_path, sr=16000)
+# # @app.get("/")
+# # def transcribe_audio():
+# #         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
+# #         predicted_ids = model.generate(input_features)
+# #         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+# #         return {"transcription": transcription[0]}
+# # if __name__ == "__main__":
+# #     import uvicorn
+# #     uvicorn.run(app, host="0.0.0.0", port=8000)
+# # if __name__=='__main__':
+# #     uvicorn.run('main:app', reload=True)
+# #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
+# #curl -X GET "http://localhost:8000/?text=I%20like%20Apples"
+# #http://localhost:8000/?text=I%20like%20Apples
+# # from fastapi import FastAPI
+# # from transformers import WhisperProcessor, WhisperForConditionalGeneration
+# # import librosa
+# # import uvicorn
+# # app = FastAPI()
+# # # Load model and processor
+# # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+# # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
+# # model.config.forced_decoder_ids = None
+# # # Path to your audio file
+# # audio_file_path = "/home/pranjal/Downloads/output.mp3"
+# # # Read the audio file
+# # audio_data, _ = librosa.load(audio_file_path, sr=16000)
+# # @app.get("/")
+# # def transcribe_audio():
+# #         # Process the audio data using the Whisper processor
+# #         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
+# #         # Generate transcription
+# #         predicted_ids = model.generate(input_features)
+# #         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+# #         return {"transcription": transcription[0]}
+# # if __name__ == "__main__":
+# #     import uvicorn
+# #     uvicorn.run(app, host="0.0.0.0", port=8000)
+# # if __name__=='__app__':
+# #     uvicorn.run('main:app', reload=True)
+# from fastapi import FastAPI, UploadFile, File
 # from transformers import WhisperProcessor, WhisperForConditionalGeneration
 # import librosa
+# from fastapi.responses import HTMLResponse
 # import uvicorn
+# import io
 # app = FastAPI()
 # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
 # model.config.forced_decoder_ids = None
 # @app.get("/")
+# def read_root():
+#     html_form = """
+#     <html>
+#         <body>
+#             <h2>ASR Transcription</h2>
+#             <form action="/transcribe" method="post" enctype="multipart/form-data">
+#                 <label for="audio_file">Upload an audio file (MP3 or WAV):</label>
+#                 <input type="file" id="audio_file" name="audio_file" accept=".mp3, .wav" required><br><br>
+#                 <input type="submit" value="Transcribe">
+#             </form>
+#         </body>
+#     </html>
+#     """
+#     return HTMLResponse(content=html_form, status_code=200)
+# @app.post("/transcribe")
+# async def transcribe_audio(audio_file: UploadFile):
+#     try:
+#         # Read the uploaded audio file
+#         audio_data = await audio_file.read()
 #         # Process the audio data using the Whisper processor
+#         audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
 #         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
 #         # Generate transcription
 #         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
 #         return {"transcription": transcription[0]}
+#     except Exception as e:
+#         return {"error": str(e)}
+# if __name__ == "__app__":
+#     uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
+#uvicorn app:app --host 0.0.0.0 --port 8000 --reload
 from fastapi import FastAPI, UploadFile, File
 app = FastAPI()
+# # Load model and processor
+# processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
+# model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
+# model.config.forced_decoder_ids = None
+import whisper
+model = whisper.load_model("small")
 @app.get("/")
 def read_root():
         audio_data = await audio_file.read()
         # Process the audio data using the Whisper processor
+        # audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
+        # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
+        # # Generate transcription
+        # predicted_ids = model.generate(input_features)
+        # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+        result = model.transcribe("/home/pranjal/Downloads/rt.mp3")
+        return {"transcription": result['text']}
     except Exception as e:
         return {"error": str(e)}