Shrirang20 commited on
Commit
51e92f1
1 Parent(s): 6272d28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +440 -441
app.py CHANGED
@@ -1,442 +1,441 @@
1
-
2
-
3
- # Commented out IPython magic to ensure Python compatibility.
4
- # %%shell
5
- #
6
- # pip install -q langchain_community langchain_huggingface faiss-cpu gradio openai google-generativeai langchain-google-genai torch torchvision torchaudio youtokentome pypdf accelerate
7
- #
8
-
9
-
10
- # Commented out IPython magic to ensure Python compatibility.
11
- # %%capture
12
- # %%shell
13
- #
14
- # # Install the custom version of NeMo by AI4Bharat
15
- # wget https://indic-asr-public.objectstore.e2enetworks.net/ai4b_nemo.zip
16
- #
17
- # unzip -q /content/ai4b_nemo.zip && cd NeMo
18
- # bash reinstall.sh
19
- #
20
- # cd ..
21
- #
22
-
23
- # Commented out IPython magic to ensure Python compatibility.
24
- # %%capture
25
- # %%shell
26
- #
27
- # git clone -q https://github.com/VarunGumma/IndicTransTokenizer
28
- # cd IndicTransTokenizer
29
- # pip install -q --editable ./
30
- # cd ..
31
- #
32
-
33
-
34
- # Commented out IPython magic to ensure Python compatibility.
35
- # %%capture
36
- # %%shell
37
- #
38
- # apt-get install libsndfile1-dev ffmpeg
39
- #
40
- # git clone https://github.com/gokulkarthik/TTS
41
- # cd TTS
42
- #
43
- # pip3 install -e .[all]
44
- # pip3 install -r requirements.txt
45
- #
46
- # cd ..
47
- #
48
-
49
- """## **Restart session**
50
- """
51
-
52
- # Commented out IPython magic to ensure Python compatibility.
53
- # %%capture
54
- # !pip install gradio
55
-
56
- # Commented out IPython magic to ensure Python compatibility.
57
- # %%capture
58
- #
59
- # # INFO: If you're unable to import these libraries, just rerun this cell again.
60
- #
61
- import gradio as gr
62
- from torch import cuda, inference_mode
63
- import nemo.collections.asr as nemo_asr
64
- from IndicTransTokenizer import IndicProcessor
65
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
66
-
67
-
68
- DEVICE = "cuda" if cuda.is_available() else "cpu"
69
-
70
- print(f"Using device: {DEVICE}")
71
-
72
-
73
- import os
74
- from langchain_community.vectorstores import FAISS
75
- from langchain_huggingface import HuggingFaceEmbeddings
76
- from langchain_community.document_loaders import PyPDFLoader
77
-
78
- """### Load and convert PDF data into vectorDB"""
79
-
80
- pm_kisan_doc = "/content/PM-KISANOperationalGuidelines(English).pdf"
81
-
82
- from langchain_community.document_loaders import PyPDFLoader
83
- from langchain.text_splitter import RecursiveCharacterTextSplitter
84
-
85
- text_splitter = RecursiveCharacterTextSplitter(
86
- chunk_size=600,
87
- chunk_overlap=100
88
- )
89
-
90
- loader = PyPDFLoader(pm_kisan_doc)
91
- pages = loader.load_and_split(text_splitter=text_splitter)
92
-
93
- pages_chunks = [page.page_content for page in pages]
94
- print(f"Generated {len(pages_chunks)} chunks of {pm_kisan_doc}")
95
-
96
- pages_chunks[8]
97
-
98
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
99
-
100
- faiss = FAISS.from_texts(pages_chunks, embeddings)
101
-
102
- """### Querying the vectorDB"""
103
-
104
- # Test query
105
- result = faiss.similarity_search("what are the benefits of PM kisan yojna", k=3)
106
-
107
- # This returns the most relevant doc similar to the query
108
-
109
- print(result[0].page_content)
110
-
111
- Result_with_score = faiss.similarity_search_with_score("what are the benefits of PM kisan yojna", k=3)
112
- Result_with_score[0]
113
-
114
- os.environ['GEMINI_API_KEY'] = userdata.get('GEMINI_API_KEY')
115
-
116
- import google.generativeai as genai
117
-
118
- def get_gemini_output(prompt, temperature=0.6):
119
-
120
- genai.configure(api_key= os.environ['GEMINI_API_KEY'])
121
- model = genai.GenerativeModel(model_name='gemini-pro')
122
- answer = model.generate_content(prompt,
123
- generation_config=genai.types.GenerationConfig(
124
- temperature=0.6))
125
-
126
- return answer.text
127
-
128
- """## Build an end-to-end RAG powered Voice Assistant
129
- """
130
-
131
- ip = IndicProcessor(inference=True)
132
-
133
- # Commented out IPython magic to ensure Python compatibility.
134
- # # %%capture
135
-
136
- en2indic_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
137
- en2indic_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
138
-
139
-
140
- # Commented out IPython magic to ensure Python compatibility.
141
- # # %%capture
142
-
143
- indic2en_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-dist-200M", trust_remote_code=True)
144
- indic2en_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-dist-200M", trust_remote_code=True)
145
-
146
-
147
- model_tokenizer_config = {
148
- "en2indic": {
149
- "tokenizer": en2indic_tokenizer,
150
- "model": en2indic_model,
151
- },
152
- "indic2en": {
153
- "tokenizer": indic2en_tokenizer,
154
- "model": indic2en_model,
155
- }
156
- }
157
-
158
- def indic_translate(src_lang: str, tgt_lang: str, sents_to_translate: list):
159
-
160
- lang_map = {
161
- "punjabi": "pan_Guru",
162
- "bengali": "ben_Beng",
163
- "malayalam": "mal_Mlym",
164
- "marathi": "mar_Deva",
165
- "tamil": "tam_Taml",
166
- "gujarati": "guj_Gujr",
167
- "telugu": "tel_Telu",
168
- "hindi": "hin_Deva",
169
- "kannada": "kan_Knda",
170
- "odia": "ory_Orya",
171
- "english": "eng_Latn"
172
- }
173
-
174
- src_lang = lang_map[src_lang]
175
- tgt_lang = lang_map[tgt_lang]
176
-
177
- if src_lang == "eng_Latn":
178
- tokenizer = model_tokenizer_config["en2indic"]["tokenizer"]
179
- model = model_tokenizer_config["en2indic"]["model"]
180
-
181
- print(f"Using en2indic, src_lang: {src_lang}, tgt_lang: {tgt_lang}")
182
-
183
- else:
184
- tokenizer = model_tokenizer_config["indic2en"]["tokenizer"]
185
- model = model_tokenizer_config["indic2en"]["model"]
186
-
187
- print(f"Using indic2en, src_lang: {src_lang}, tgt_lang: {tgt_lang}")
188
-
189
-
190
- batch = ip.preprocess_batch(sents_to_translate, src_lang=src_lang, tgt_lang=tgt_lang, show_progress_bar=False)
191
- batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt")
192
-
193
- with inference_mode():
194
- print("Generating...")
195
- outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256)
196
-
197
- with tokenizer.as_target_tokenizer():
198
- outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
199
-
200
- if tgt_lang != "en_Latn":
201
- print(f"Postprocessing for {tgt_lang}")
202
- outputs = ip.postprocess_batch(outputs, lang=tgt_lang)
203
-
204
-
205
- return outputs
206
-
207
- def download_ai4b_tts_model(lang: str):
208
-
209
- lang_map = {
210
- "odia": "or",
211
- "hindi": "hi",
212
- "tamil": "ta",
213
- "telugu": "te",
214
- "punjabi": "pa",
215
- "kannada": "kn",
216
- "bengali": "bn",
217
- "marathi": "mr",
218
- "gujarati": "gu",
219
- "malayalam": "ml",
220
- }
221
-
222
- selected_lang = lang_map[lang]
223
-
224
- download_path = f"/{selected_lang}.zip"
225
-
226
- if os.path.exists(download_path):
227
- print(f"IndicTTS Model for {lang} already exists.")
228
-
229
- def run_tts(text, tts_lang):
230
-
231
- lang_map = {
232
- "odia": "or",
233
- "hindi": "hi",
234
- "tamil": "ta",
235
- "telugu": "te",
236
- "punjabi": "pa",
237
- "kannada": "kn",
238
- "bengali": "bn",
239
- "marathi": "mr",
240
- "gujarati": "gu",
241
- "malayalam": "ml",
242
- }
243
-
244
- download_ai4b_tts_model(lang=tts_lang)
245
-
246
- tts_lang = lang_map[tts_lang]
247
- print(f"Lang code: {tts_lang}")
248
-
249
-
250
- tts_command = f'python3 -m TTS.bin.synthesize --text "{text}" \
251
- --model_path /models/v1/{tts_lang}/fastpitch/best_model.pth \
252
- --config_path /models/v1/{tts_lang}/fastpitch/config.json \
253
- --vocoder_path /models/v1/{tts_lang}/hifigan/best_model.pth \
254
- --vocoder_config_path /models/v1/{tts_lang}/hifigan/config.json \
255
- --speakers_file_path /models/v1/{tts_lang}/fastpitch/speakers.pth \
256
- --out_path /tts_output.wav \
257
- --speaker_idx male'
258
-
259
- if DEVICE == "cuda":
260
- tts_command += " --use_cuda True"
261
- print(f"Running IndicTTS on GPU")
262
-
263
- else:
264
- print(f"Running IndicTTS on CPU")
265
-
266
- os.system(tts_command)
267
-
268
- os.makedirs('/asr_models')
269
-
270
- def download_ai4b_asr_model(lang: str):
271
-
272
- available_langs = {
273
- "odia": "or",
274
- "hindi": "hi",
275
- "tamil": "ta",
276
- "telugu": "te",
277
- "punjabi": "pa",
278
- "kannada": "kn",
279
- "bengali": "bn",
280
- "marathi": "mr",
281
- "gujarati": "gu",
282
- "malayalam": "ml",
283
- }
284
-
285
- download_path = f"/asr_models/ai4b_indicConformer_{available_langs[lang]}.nemo"
286
- print(f"Downloaded ASR model path: {download_path}")
287
-
288
- if os.path.exists(download_path):
289
- print(f"Model for {lang} already exists.")
290
-
291
- elif lang not in available_langs:
292
- raise ValueError(f"Invalid language code: {lang}")
293
-
294
- return download_path
295
-
296
- import librosa
297
-
298
- def preprocess_audio(audio_path):
299
- audio, sr = librosa.load(audio_path, sr=None, mono=True)
300
- return audio, sr
301
-
302
- def transcribe(audio: str, lang: str):
303
- audio, sr = preprocess_audio(audio)
304
-
305
- lang_map = {
306
- "odia": "or",
307
- "hindi": "hi",
308
- "tamil": "ta",
309
- "telugu": "te",
310
- "punjabi": "pa",
311
- "kannada": "kn",
312
- "bengali": "bn",
313
- "marathi": "mr",
314
- "gujarati": "gu",
315
- "malayalam": "ml",
316
- }
317
-
318
- download_path = download_ai4b_asr_model(lang=lang)
319
-
320
- asr_model = nemo_asr.models.ASRModel.restore_from(
321
- download_path, map_location=DEVICE
322
- )
323
-
324
- transcription = asr_model.transcribe(audio, batch_size=1, language_id=lang_map[lang])[0][0]
325
- print(f"Transcription: {transcription}")
326
-
327
- return transcription
328
-
329
- def query_vector_db(query):
330
- # Combine the top-3 similar documents from the vectorDB
331
- result = " ".join([result.page_content for result in faiss.similarity_search(query, k=3)])
332
-
333
- return result
334
-
335
- from langchain_core.prompts import PromptTemplate
336
-
337
- def process_user_query(user_query, retrieved_doc):
338
-
339
- prompt_template = PromptTemplate.from_template(
340
- "You are a chatbot , which provides information to user based on their queries, \
341
- the user asks: {user_query}, The information from the related query is: {retrieved_doc}. \
342
- Now give the output based on the query and relevant information that i provided, written in a structured, well-formatted and concise way. \
343
- The length of the output should be no more than 70 words, must be in 5 lines."
344
- )
345
-
346
- prompt = prompt_template.format(user_query=user_query, retrieved_doc=retrieved_doc)
347
-
348
- processed_doc = get_gemini_output(prompt)
349
- print(processed_doc)
350
-
351
- return processed_doc
352
-
353
- #Context awareness
354
- from collections import deque
355
-
356
- class ContextManger:
357
- def __init__(self,max_history=7):
358
- self.history = deque(maxlen=max_history)
359
-
360
- def add_interaction(self,query,response):
361
- self.history.append((query,response))
362
-
363
- def get_context(self):
364
- return list(self.history)
365
-
366
- context_manager = ContextManger()
367
-
368
- # context = context_manager.get_context()
369
- # contexulized_query = f"Previous context: {context} \n\nCurrent query: {indic_to_en}"
370
-
371
- import traceback
372
-
373
- def process_gradio_input(audio, user_lang):
374
- try:
375
- # Use IndicASR to transcribe the input audio
376
- print(f"Transcribing...")
377
- query_transcription = transcribe(audio, lang=user_lang)
378
-
379
- # Convert the Indic text from transcription to English, so that GPT-3.5 can process it
380
- print(f"Translating indic to en..")
381
- indic_to_en = indic_translate(src_lang=user_lang, tgt_lang="english", sents_to_translate=[query_transcription])[0]
382
-
383
- # context_manager = ContextManager()
384
-
385
- context = context_manager.get_context()
386
- contexulized_query = f"Previous context: {context} \n\nCurrent query: {indic_to_en}"
387
-
388
- # Query the Vector DB to get the relevant document from the query
389
- print(f"Querying vector db")
390
- retrieved_doc = query_vector_db(contexulized_query)
391
-
392
- # Extract relevant information from the retrieved document
393
- print(f"Processing user query")
394
- processed_doc = process_user_query(user_query=contexulized_query, retrieved_doc=retrieved_doc)
395
-
396
- context_manager.add_interaction(indic_to_en, processed_doc)
397
-
398
- # Break the document into chunks for faster batch processing
399
- print(f"Breaking document into chunks..")
400
- processed_doc_chunks = processed_doc.strip().split(". ")
401
- processed_doc_chunks = [f"{chunk}." for chunk in processed_doc_chunks if chunk != ""]
402
-
403
- # Translate the the extracted information back to Indic language
404
- print(f"Translating en to indic..")
405
- en_to_indic_chunks = indic_translate(src_lang="english", tgt_lang=user_lang, sents_to_translate=processed_doc_chunks)
406
- en_to_indic_doc = " ".join(en_to_indic_chunks)
407
- print(f"en_to_indic_doc: {en_to_indic_doc}")
408
-
409
- # Run IndicTTS to generate audio
410
- print(f"Running TTS to generate audio..")
411
- run_tts(text=en_to_indic_doc, tts_lang=user_lang)
412
- print("Finished running TTS")
413
-
414
- audio_outfile_path = "/content/tts_output.wav"
415
-
416
-
417
- return en_to_indic_doc, audio_outfile_path
418
-
419
- except Exception as e:
420
- error_message = f"An error occurred: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
421
- print(error_message)
422
- return error_message, None
423
-
424
- def launch_gradio_app(show_log=False):
425
-
426
- languages = ["hindi", "odia", "tamil", "telugu", "punjabi", "kannada", "bengali", "marathi", "gujarati", "malayalam"]
427
-
428
- iface = gr.Interface(
429
- fn=process_gradio_input,
430
- inputs=[
431
- gr.Audio(sources=['upload', 'microphone'], type="filepath", show_download_button=True), # Input audio
432
- gr.Dropdown(languages, label="Language", value="hindi"), # Language selection
433
- ],
434
- outputs=["text", "audio"],
435
- allow_flagging="never",
436
- title="Farmer's Voice Assistant 🧑‍🌾 Powered by AI4Bharat Tech",
437
- description="Know about latest farming schemes, this system is powered by tools from AI4Bharat, like IndicASR, IndicTTS and IndicTrans",
438
- )
439
-
440
- iface.launch(debug=show_log)
441
-
442
  launch_gradio_app(show_log=True)
 
1
+
2
+
3
+ # Commented out IPython magic to ensure Python compatibility.
4
+ # %%shell
5
+ # pip install -q langchain_community langchain_huggingface faiss-cpu gradio openai google-generativeai langchain-google-genai torch torchvision torchaudio youtokentome pypdf accelerate
6
+
7
+
8
+
9
+ # Commented out IPython magic to ensure Python compatibility.
10
+ # %%capture
11
+ # %%shell
12
+ #
13
+ # # Install the custom version of NeMo by AI4Bharat
14
+ # wget https://indic-asr-public.objectstore.e2enetworks.net/ai4b_nemo.zip
15
+ #
16
+ # unzip -q /content/ai4b_nemo.zip && cd NeMo
17
+ # bash reinstall.sh
18
+ #
19
+ # cd ..
20
+ #
21
+
22
+ # Commented out IPython magic to ensure Python compatibility.
23
+ # %%capture
24
+ # %%shell
25
+ #
26
+ # git clone -q https://github.com/VarunGumma/IndicTransTokenizer
27
+ # cd IndicTransTokenizer
28
+ # pip install -q --editable ./
29
+ # cd ..
30
+ #
31
+
32
+
33
+ # Commented out IPython magic to ensure Python compatibility.
34
+ # %%capture
35
+ # %%shell
36
+ #
37
+ # apt-get install libsndfile1-dev ffmpeg
38
+ #
39
+ # git clone https://github.com/gokulkarthik/TTS
40
+ # cd TTS
41
+ #
42
+ # pip3 install -e .[all]
43
+ # pip3 install -r requirements.txt
44
+ #
45
+ # cd ..
46
+ #
47
+
48
+ """## **Restart session**
49
+ """
50
+
51
+ # Commented out IPython magic to ensure Python compatibility.
52
+ # %%capture
53
+ # !pip install gradio
54
+
55
+ # Commented out IPython magic to ensure Python compatibility.
56
+ # %%capture
57
+ #
58
+ # # INFO: If you're unable to import these libraries, just rerun this cell again.
59
+ #
60
+ import gradio as gr
61
+ from torch import cuda, inference_mode
62
+ import nemo.collections.asr as nemo_asr
63
+ from IndicTransTokenizer import IndicProcessor
64
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
65
+
66
+
67
+ DEVICE = "cuda" if cuda.is_available() else "cpu"
68
+
69
+ print(f"Using device: {DEVICE}")
70
+
71
+
72
+ import os
73
+ from langchain_community.vectorstores import FAISS
74
+ from langchain_huggingface import HuggingFaceEmbeddings
75
+ from langchain_community.document_loaders import PyPDFLoader
76
+
77
+ """### Load and convert PDF data into vectorDB"""
78
+
79
+ pm_kisan_doc = "/content/PM-KISANOperationalGuidelines(English).pdf"
80
+
81
+ from langchain_community.document_loaders import PyPDFLoader
82
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
83
+
84
+ text_splitter = RecursiveCharacterTextSplitter(
85
+ chunk_size=600,
86
+ chunk_overlap=100
87
+ )
88
+
89
+ loader = PyPDFLoader(pm_kisan_doc)
90
+ pages = loader.load_and_split(text_splitter=text_splitter)
91
+
92
+ pages_chunks = [page.page_content for page in pages]
93
+ print(f"Generated {len(pages_chunks)} chunks of {pm_kisan_doc}")
94
+
95
+ pages_chunks[8]
96
+
97
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
98
+
99
+ faiss = FAISS.from_texts(pages_chunks, embeddings)
100
+
101
+ """### Querying the vectorDB"""
102
+
103
+ # Test query
104
+ result = faiss.similarity_search("what are the benefits of PM kisan yojna", k=3)
105
+
106
+ # This returns the most relevant doc similar to the query
107
+
108
+ print(result[0].page_content)
109
+
110
+ Result_with_score = faiss.similarity_search_with_score("what are the benefits of PM kisan yojna", k=3)
111
+ Result_with_score[0]
112
+
113
+ os.environ['GEMINI_API_KEY'] = userdata.get('GEMINI_API_KEY')
114
+
115
+ import google.generativeai as genai
116
+
117
+ def get_gemini_output(prompt, temperature=0.6):
118
+
119
+ genai.configure(api_key= os.environ['GEMINI_API_KEY'])
120
+ model = genai.GenerativeModel(model_name='gemini-pro')
121
+ answer = model.generate_content(prompt,
122
+ generation_config=genai.types.GenerationConfig(
123
+ temperature=0.6))
124
+
125
+ return answer.text
126
+
127
+ """## Build an end-to-end RAG powered Voice Assistant
128
+ """
129
+
130
+ ip = IndicProcessor(inference=True)
131
+
132
+ # Commented out IPython magic to ensure Python compatibility.
133
+ # # %%capture
134
+
135
+ en2indic_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
136
+ en2indic_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)
137
+
138
+
139
+ # Commented out IPython magic to ensure Python compatibility.
140
+ # # %%capture
141
+
142
+ indic2en_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indictrans2-indic-en-dist-200M", trust_remote_code=True)
143
+ indic2en_model = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-dist-200M", trust_remote_code=True)
144
+
145
+
146
+ model_tokenizer_config = {
147
+ "en2indic": {
148
+ "tokenizer": en2indic_tokenizer,
149
+ "model": en2indic_model,
150
+ },
151
+ "indic2en": {
152
+ "tokenizer": indic2en_tokenizer,
153
+ "model": indic2en_model,
154
+ }
155
+ }
156
+
157
+ def indic_translate(src_lang: str, tgt_lang: str, sents_to_translate: list):
158
+
159
+ lang_map = {
160
+ "punjabi": "pan_Guru",
161
+ "bengali": "ben_Beng",
162
+ "malayalam": "mal_Mlym",
163
+ "marathi": "mar_Deva",
164
+ "tamil": "tam_Taml",
165
+ "gujarati": "guj_Gujr",
166
+ "telugu": "tel_Telu",
167
+ "hindi": "hin_Deva",
168
+ "kannada": "kan_Knda",
169
+ "odia": "ory_Orya",
170
+ "english": "eng_Latn"
171
+ }
172
+
173
+ src_lang = lang_map[src_lang]
174
+ tgt_lang = lang_map[tgt_lang]
175
+
176
+ if src_lang == "eng_Latn":
177
+ tokenizer = model_tokenizer_config["en2indic"]["tokenizer"]
178
+ model = model_tokenizer_config["en2indic"]["model"]
179
+
180
+ print(f"Using en2indic, src_lang: {src_lang}, tgt_lang: {tgt_lang}")
181
+
182
+ else:
183
+ tokenizer = model_tokenizer_config["indic2en"]["tokenizer"]
184
+ model = model_tokenizer_config["indic2en"]["model"]
185
+
186
+ print(f"Using indic2en, src_lang: {src_lang}, tgt_lang: {tgt_lang}")
187
+
188
+
189
+ batch = ip.preprocess_batch(sents_to_translate, src_lang=src_lang, tgt_lang=tgt_lang, show_progress_bar=False)
190
+ batch = tokenizer(batch, padding="longest", truncation=True, max_length=256, return_tensors="pt")
191
+
192
+ with inference_mode():
193
+ print("Generating...")
194
+ outputs = model.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256)
195
+
196
+ with tokenizer.as_target_tokenizer():
197
+ outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True, clean_up_tokenization_spaces=True)
198
+
199
+ if tgt_lang != "en_Latn":
200
+ print(f"Postprocessing for {tgt_lang}")
201
+ outputs = ip.postprocess_batch(outputs, lang=tgt_lang)
202
+
203
+
204
+ return outputs
205
+
206
+ def download_ai4b_tts_model(lang: str):
207
+
208
+ lang_map = {
209
+ "odia": "or",
210
+ "hindi": "hi",
211
+ "tamil": "ta",
212
+ "telugu": "te",
213
+ "punjabi": "pa",
214
+ "kannada": "kn",
215
+ "bengali": "bn",
216
+ "marathi": "mr",
217
+ "gujarati": "gu",
218
+ "malayalam": "ml",
219
+ }
220
+
221
+ selected_lang = lang_map[lang]
222
+
223
+ download_path = f"/{selected_lang}.zip"
224
+
225
+ if os.path.exists(download_path):
226
+ print(f"IndicTTS Model for {lang} already exists.")
227
+
228
+ def run_tts(text, tts_lang):
229
+
230
+ lang_map = {
231
+ "odia": "or",
232
+ "hindi": "hi",
233
+ "tamil": "ta",
234
+ "telugu": "te",
235
+ "punjabi": "pa",
236
+ "kannada": "kn",
237
+ "bengali": "bn",
238
+ "marathi": "mr",
239
+ "gujarati": "gu",
240
+ "malayalam": "ml",
241
+ }
242
+
243
+ download_ai4b_tts_model(lang=tts_lang)
244
+
245
+ tts_lang = lang_map[tts_lang]
246
+ print(f"Lang code: {tts_lang}")
247
+
248
+
249
+ tts_command = f'python3 -m TTS.bin.synthesize --text "{text}" \
250
+ --model_path /models/v1/{tts_lang}/fastpitch/best_model.pth \
251
+ --config_path /models/v1/{tts_lang}/fastpitch/config.json \
252
+ --vocoder_path /models/v1/{tts_lang}/hifigan/best_model.pth \
253
+ --vocoder_config_path /models/v1/{tts_lang}/hifigan/config.json \
254
+ --speakers_file_path /models/v1/{tts_lang}/fastpitch/speakers.pth \
255
+ --out_path /tts_output.wav \
256
+ --speaker_idx male'
257
+
258
+ if DEVICE == "cuda":
259
+ tts_command += " --use_cuda True"
260
+ print(f"Running IndicTTS on GPU")
261
+
262
+ else:
263
+ print(f"Running IndicTTS on CPU")
264
+
265
+ os.system(tts_command)
266
+
267
+ os.makedirs('/asr_models')
268
+
269
+ def download_ai4b_asr_model(lang: str):
270
+
271
+ available_langs = {
272
+ "odia": "or",
273
+ "hindi": "hi",
274
+ "tamil": "ta",
275
+ "telugu": "te",
276
+ "punjabi": "pa",
277
+ "kannada": "kn",
278
+ "bengali": "bn",
279
+ "marathi": "mr",
280
+ "gujarati": "gu",
281
+ "malayalam": "ml",
282
+ }
283
+
284
+ download_path = f"/asr_models/ai4b_indicConformer_{available_langs[lang]}.nemo"
285
+ print(f"Downloaded ASR model path: {download_path}")
286
+
287
+ if os.path.exists(download_path):
288
+ print(f"Model for {lang} already exists.")
289
+
290
+ elif lang not in available_langs:
291
+ raise ValueError(f"Invalid language code: {lang}")
292
+
293
+ return download_path
294
+
295
+ import librosa
296
+
297
+ def preprocess_audio(audio_path):
298
+ audio, sr = librosa.load(audio_path, sr=None, mono=True)
299
+ return audio, sr
300
+
301
+ def transcribe(audio: str, lang: str):
302
+ audio, sr = preprocess_audio(audio)
303
+
304
+ lang_map = {
305
+ "odia": "or",
306
+ "hindi": "hi",
307
+ "tamil": "ta",
308
+ "telugu": "te",
309
+ "punjabi": "pa",
310
+ "kannada": "kn",
311
+ "bengali": "bn",
312
+ "marathi": "mr",
313
+ "gujarati": "gu",
314
+ "malayalam": "ml",
315
+ }
316
+
317
+ download_path = download_ai4b_asr_model(lang=lang)
318
+
319
+ asr_model = nemo_asr.models.ASRModel.restore_from(
320
+ download_path, map_location=DEVICE
321
+ )
322
+
323
+ transcription = asr_model.transcribe(audio, batch_size=1, language_id=lang_map[lang])[0][0]
324
+ print(f"Transcription: {transcription}")
325
+
326
+ return transcription
327
+
328
+ def query_vector_db(query):
329
+ # Combine the top-3 similar documents from the vectorDB
330
+ result = " ".join([result.page_content for result in faiss.similarity_search(query, k=3)])
331
+
332
+ return result
333
+
334
+ from langchain_core.prompts import PromptTemplate
335
+
336
+ def process_user_query(user_query, retrieved_doc):
337
+
338
+ prompt_template = PromptTemplate.from_template(
339
+ "You are a chatbot , which provides information to user based on their queries, \
340
+ the user asks: {user_query}, The information from the related query is: {retrieved_doc}. \
341
+ Now give the output based on the query and relevant information that i provided, written in a structured, well-formatted and concise way. \
342
+ The length of the output should be no more than 70 words, must be in 5 lines."
343
+ )
344
+
345
+ prompt = prompt_template.format(user_query=user_query, retrieved_doc=retrieved_doc)
346
+
347
+ processed_doc = get_gemini_output(prompt)
348
+ print(processed_doc)
349
+
350
+ return processed_doc
351
+
352
+ #Context awareness
353
+ from collections import deque
354
+
355
+ class ContextManger:
356
+ def __init__(self,max_history=7):
357
+ self.history = deque(maxlen=max_history)
358
+
359
+ def add_interaction(self,query,response):
360
+ self.history.append((query,response))
361
+
362
+ def get_context(self):
363
+ return list(self.history)
364
+
365
+ context_manager = ContextManger()
366
+
367
+ # context = context_manager.get_context()
368
+ # contexulized_query = f"Previous context: {context} \n\nCurrent query: {indic_to_en}"
369
+
370
+ import traceback
371
+
372
+ def process_gradio_input(audio, user_lang):
373
+ try:
374
+ # Use IndicASR to transcribe the input audio
375
+ print(f"Transcribing...")
376
+ query_transcription = transcribe(audio, lang=user_lang)
377
+
378
+ # Convert the Indic text from transcription to English, so that GPT-3.5 can process it
379
+ print(f"Translating indic to en..")
380
+ indic_to_en = indic_translate(src_lang=user_lang, tgt_lang="english", sents_to_translate=[query_transcription])[0]
381
+
382
+ # context_manager = ContextManager()
383
+
384
+ context = context_manager.get_context()
385
+ contexulized_query = f"Previous context: {context} \n\nCurrent query: {indic_to_en}"
386
+
387
+ # Query the Vector DB to get the relevant document from the query
388
+ print(f"Querying vector db")
389
+ retrieved_doc = query_vector_db(contexulized_query)
390
+
391
+ # Extract relevant information from the retrieved document
392
+ print(f"Processing user query")
393
+ processed_doc = process_user_query(user_query=contexulized_query, retrieved_doc=retrieved_doc)
394
+
395
+ context_manager.add_interaction(indic_to_en, processed_doc)
396
+
397
+ # Break the document into chunks for faster batch processing
398
+ print(f"Breaking document into chunks..")
399
+ processed_doc_chunks = processed_doc.strip().split(". ")
400
+ processed_doc_chunks = [f"{chunk}." for chunk in processed_doc_chunks if chunk != ""]
401
+
402
+ # Translate the the extracted information back to Indic language
403
+ print(f"Translating en to indic..")
404
+ en_to_indic_chunks = indic_translate(src_lang="english", tgt_lang=user_lang, sents_to_translate=processed_doc_chunks)
405
+ en_to_indic_doc = " ".join(en_to_indic_chunks)
406
+ print(f"en_to_indic_doc: {en_to_indic_doc}")
407
+
408
+ # Run IndicTTS to generate audio
409
+ print(f"Running TTS to generate audio..")
410
+ run_tts(text=en_to_indic_doc, tts_lang=user_lang)
411
+ print("Finished running TTS")
412
+
413
+ audio_outfile_path = "/content/tts_output.wav"
414
+
415
+
416
+ return en_to_indic_doc, audio_outfile_path
417
+
418
+ except Exception as e:
419
+ error_message = f"An error occurred: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
420
+ print(error_message)
421
+ return error_message, None
422
+
423
+ def launch_gradio_app(show_log=False):
424
+
425
+ languages = ["hindi", "odia", "tamil", "telugu", "punjabi", "kannada", "bengali", "marathi", "gujarati", "malayalam"]
426
+
427
+ iface = gr.Interface(
428
+ fn=process_gradio_input,
429
+ inputs=[
430
+ gr.Audio(sources=['upload', 'microphone'], type="filepath", show_download_button=True), # Input audio
431
+ gr.Dropdown(languages, label="Language", value="hindi"), # Language selection
432
+ ],
433
+ outputs=["text", "audio"],
434
+ allow_flagging="never",
435
+ title="Farmer's Voice Assistant 🧑‍🌾 Powered by AI4Bharat Tech",
436
+ description="Know about latest farming schemes, this system is powered by tools from AI4Bharat, like IndicASR, IndicTTS and IndicTrans",
437
+ )
438
+
439
+ iface.launch(debug=show_log)
440
+
 
441
  launch_gradio_app(show_log=True)