Pranjal12345 commited on
Commit
ed343f7
1 Parent(s): a3d265e
Files changed (2) hide show
  1. main.py +120 -14
  2. requirements.txt +2 -1
main.py CHANGED
@@ -1,36 +1,142 @@
1
  #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
2
 
3
 
4
- from fastapi import FastAPI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
  import librosa
 
7
  import uvicorn
 
8
 
9
  app = FastAPI()
10
 
 
11
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
12
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
13
  model.config.forced_decoder_ids = None
14
 
15
- audio_file_path = "output.mp3"
16
-
17
- audio_data, _ = librosa.load(audio_file_path, sr=16000)
18
-
19
  @app.get("/")
20
- def transcribe_audio():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
22
 
 
23
  predicted_ids = model.generate(input_features)
24
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
25
 
26
  return {"transcription": transcription[0]}
 
 
27
 
28
-
29
- # if __name__ == "__main__":
30
- # import uvicorn
31
- # uvicorn.run(app, host="0.0.0.0", port=8000)
32
-
33
-
34
- if __name__=='__main__':
35
- uvicorn.run('main:app', reload=True)
36
 
 
1
  #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
2
 
3
 
4
+ # from fastapi import FastAPI
5
+ # from transformers import WhisperProcessor, WhisperForConditionalGeneration
6
+ # import librosa
7
+ # import uvicorn
8
+
9
+ # app = FastAPI()
10
+
11
+ # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
12
+ # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
13
+ # model.config.forced_decoder_ids = None
14
+
15
+ # audio_file_path = "output.mp3"
16
+
17
+ # audio_data, _ = librosa.load(audio_file_path, sr=16000)
18
+
19
+ # @app.get("/")
20
+ # def transcribe_audio():
21
+ # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
22
+
23
+ # predicted_ids = model.generate(input_features)
24
+ # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
25
+
26
+ # return {"transcription": transcription[0]}
27
+
28
+
29
+ # if __name__ == "__main__":
30
+ # import uvicorn
31
+ # uvicorn.run(app, host="0.0.0.0", port=8000)
32
+
33
+
34
+ # if __name__=='__main__':
35
+ # uvicorn.run('main:app', reload=True)
36
+
37
+
38
+
39
+
40
+ #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
41
+ #curl -X GET "http://localhost:8000/?text=I%20like%20Apples"
42
+ #http://localhost:8000/?text=I%20like%20Apples
43
+
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+ # from fastapi import FastAPI
52
+ # from transformers import WhisperProcessor, WhisperForConditionalGeneration
53
+ # import librosa
54
+ # import uvicorn
55
+
56
+ # app = FastAPI()
57
+
58
+ # # Load model and processor
59
+ # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
60
+ # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
61
+ # model.config.forced_decoder_ids = None
62
+
63
+ # # Path to your audio file
64
+ # audio_file_path = "/home/pranjal/Downloads/output.mp3"
65
+
66
+ # # Read the audio file
67
+ # audio_data, _ = librosa.load(audio_file_path, sr=16000)
68
+
69
+ # @app.get("/")
70
+ # def transcribe_audio():
71
+ # # Process the audio data using the Whisper processor
72
+ # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
73
+
74
+ # # Generate transcription
75
+ # predicted_ids = model.generate(input_features)
76
+ # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
77
+
78
+ # return {"transcription": transcription[0]}
79
+
80
+ # if __name__ == "__main__":
81
+ # import uvicorn
82
+ # uvicorn.run(app, host="0.0.0.0", port=8000)
83
+
84
+
85
+ # if __name__=='__app__':
86
+ # uvicorn.run('main:app', reload=True)
87
+
88
+
89
+
90
+
91
+
92
+ from fastapi import FastAPI, UploadFile, File
93
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
94
  import librosa
95
+ from fastapi.responses import HTMLResponse
96
  import uvicorn
97
+ import io
98
 
99
  app = FastAPI()
100
 
101
+ # Load model and processor
102
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
103
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
104
  model.config.forced_decoder_ids = None
105
 
 
 
 
 
106
  @app.get("/")
107
+ def read_root():
108
+ html_form = """
109
+ <html>
110
+ <body>
111
+ <h2>ASR Transcription</h2>
112
+ <form action="/transcribe" method="post" enctype="multipart/form-data">
113
+ <label for="audio_file">Upload an audio file (MP3 or WAV):</label>
114
+ <input type="file" id="audio_file" name="audio_file" accept=".mp3, .wav" required><br><br>
115
+ <input type="submit" value="Transcribe">
116
+ </form>
117
+ </body>
118
+ </html>
119
+ """
120
+ return HTMLResponse(content=html_form, status_code=200)
121
+
122
+ @app.post("/transcribe")
123
+ async def transcribe_audio(audio_file: UploadFile):
124
+ try:
125
+ # Read the uploaded audio file
126
+ audio_data = await audio_file.read()
127
+
128
+ # Process the audio data using the Whisper processor
129
+ audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
130
  input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
131
 
132
+ # Generate transcription
133
  predicted_ids = model.generate(input_features)
134
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
135
 
136
  return {"transcription": transcription[0]}
137
+ except Exception as e:
138
+ return {"error": str(e)}
139
 
140
+ # if __name__ == "__app__":
141
+ # uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
 
 
 
 
 
 
142
 
requirements.txt CHANGED
@@ -4,4 +4,5 @@ datasets
4
  fastapi
5
  uvicorn
6
  transformers
7
- Torch
 
 
4
  fastapi
5
  uvicorn
6
  transformers
7
+ Torch
8
+ io