pragneshbarik commited on
Commit
9d25320
β€’
1 Parent(s): a741634

refined UI

Browse files
__pycache__/chat_client.cpython-39.pyc ADDED
Binary file (1.19 kB). View file
 
__pycache__/mistral7b.cpython-39.pyc ADDED
Binary file (1.18 kB). View file
 
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from mistral7b import chat
3
  import time
4
  import pandas as pd
5
  import pinecone
@@ -15,13 +15,20 @@ pinecone.init(
15
  environment='gcp-starter'
16
  )
17
 
18
- pinecone_index = pinecone.Index('ikigai-chat')
19
- text_vectorizer = SentenceTransformer('all-distilroberta-v1')
20
- chat_bots = {
21
  "Mixtral 8x7B v0.1" :"mistralai/Mixtral-8x7B-Instruct-v0.1",
22
  "Mistral 7B v0.1" : "mistralai/Mistral-7B-Instruct-v0.1",
23
  }
24
- prompt_injection = [
 
 
 
 
 
 
 
25
  """
26
  You are not Mistral AI, but rather a chat bot trained at Ikigai Labs. Whenever asked, you need to answer as Ikigai Labs' assistant.
27
  Ikigai helps modern analysts and operations teams automate data-intensive business, finance, analytics, and supply-chain operations.
@@ -36,8 +43,7 @@ prompt_injection = [
36
  Is there anything else you would like to know about Ikigai Labs or their products?
37
  """
38
  ]
39
-
40
- identity_change = [
41
  """
42
  You are Ikigai Chat from now on, so answer accordingly.
43
  """,
@@ -47,10 +53,9 @@ identity_change = [
47
  """
48
  ]
49
 
50
-
51
  def gen_augmented_prompt(prompt, top_k) :
52
- query_vector = text_vectorizer.encode(prompt).tolist()
53
- res = pinecone_index.query(vector=query_vector, top_k=top_k, include_metadata=True)
54
  matches = res['matches']
55
 
56
  context = ""
@@ -59,8 +64,6 @@ def gen_augmented_prompt(prompt, top_k) :
59
  context+=match["metadata"]["chunk"] + "\n\n"
60
  links.append(match["metadata"]["link"])
61
 
62
-
63
-
64
  generated_prompt = f"""
65
  FOR THIS GIVEN CONTEXT {context},
66
 
@@ -69,148 +72,174 @@ def gen_augmented_prompt(prompt, top_k) :
69
  """
70
  return generated_prompt, links
71
 
72
- data = {
73
- "Attribute": ["LLM", "Text Vectorizer", "Vector Database","CPU", "System RAM"],
74
- "Information": ["Mistral-7B-Instruct-v0.2","all-distilroberta-v1", "Hosted Pinecone" ,"2 vCPU", "16 GB"]
75
- }
76
- df = pd.DataFrame(data)
77
-
78
-
79
- st.set_page_config(
80
- page_title="Ikigai Chat",
81
- page_icon="πŸ€–",
82
- )
83
-
84
- if "messages" not in st.session_state:
85
- st.session_state.messages = []
86
 
87
- if "tokens_used" not in st.session_state:
88
- st.session_state.tokens_used = 0
89
 
90
- if "inference_time" not in st.session_state:
91
- st.session_state.inference_time = [0.00]
92
 
93
- if "temp" not in st.session_state:
94
- st.session_state.temp = 0.8
95
 
96
- if "history" not in st.session_state:
97
- st.session_state.history = [prompt_injection]
98
 
99
- if "top_k" not in st.session_state:
100
- st.session_state.top_k = 4
101
 
102
- if "repetion_penalty" not in st.session_state :
103
- st.session_state.repetion_penalty = 1
104
 
105
- if "rag_enabled" not in st.session_state :
106
- st.session_state.rag_enabled = True
107
 
108
- if "chat_bot" not in st.session_state :
109
- st.session_state.chat_bot = "Mixtral 8x7B v0.1"
110
 
111
- with st.sidebar:
112
- st.markdown("# Retrieval Settings")
113
- st.session_state.rag_enabled = st.toggle("Activate RAG", value=True)
114
- st.session_state.top_k = st.slider(label="Documents to retrieve",
115
- min_value=1, max_value=10, value=4, disabled=not st.session_state.rag_enabled)
116
- st.markdown("---")
117
- st.markdown("# Model Analytics")
118
 
119
- st.write("Tokens used :", st.session_state['tokens_used'])
120
- st.write("Average Inference Time: ", round(sum(
121
- st.session_state["inference_time"]) / len(st.session_state["inference_time"]), 3), "Secs")
122
- st.write("Cost Incured :", round(
123
- 0.033 * st.session_state['tokens_used'] / 1000, 3), "INR")
 
 
 
 
124
 
125
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
- st.markdown("# Model Settings")
128
-
129
- st.session_state.chat_bot = st.sidebar.radio(
130
- 'Select one:', [key for key, value in chat_bots.items() ])
131
- st.session_state.temp = st.slider(
132
- label="Temperature", min_value=0.0, max_value=1.0, step=0.1, value=0.9)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- st.session_state.max_tokens = st.slider(
135
- label="New tokens to generate", min_value = 64, max_value=2048, step= 32, value=512
136
- )
137
-
138
- st.session_state.repetion_penalty = st.slider(
139
- label="Repetion Penalty", min_value=0., max_value=1., step=0.1, value=1.
140
- )
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- st.markdown("""
143
- > **2023 ©️ Pragnesh Barik**
144
- """)
145
 
146
- st.image("ikigai.svg")
147
- st.title("Ikigai Chat")
148
- # st.caption("Maintained and developed by Pragnesh Barik.")
149
-
150
- with st.expander("What is Ikigai Chat ?"):
151
- st.info("""Ikigai Chat is a vector database powered chat agent, it works on the principle of
152
- of Retrieval Augmented Generation (RAG), Its primary function revolves around maintaining an extensive repository of Ikigai Docs and providing users with answers that align with their queries.
153
- This approach ensures a more refined and tailored response to user inquiries.""")
154
 
155
- st.table(df)
 
 
 
 
 
 
156
 
157
- for message in st.session_state.messages:
158
- with st.chat_message(message["role"]):
159
- st.markdown(message["content"])
 
160
 
 
 
 
 
161
 
162
  if prompt := st.chat_input("Chat with Ikigai Docs..."):
163
  st.chat_message("user").markdown(prompt)
164
  st.session_state.messages.append({"role": "user", "content": prompt})
165
 
 
166
 
167
- tick = time.time()
168
 
169
- links = []
170
- if st.session_state.rag_enabled :
171
- with st.spinner("Fetching relevent documents from Ikigai Docs...."):
172
- prompt, links = gen_augmented_prompt(prompt=prompt, top_k=st.session_state.top_k)
173
-
174
- with st.spinner("Generating response...") :
175
- chat_stream = chat(prompt, st.session_state.history,chat_client=chat_bots[st.session_state.chat_bot] ,
176
- temperature=st.session_state.temp, max_new_tokens=st.session_state.max_tokens)
177
- tock = time.time()
178
-
179
- st.session_state.inference_time.append(tock - tick)
180
-
181
-
182
-
183
- formatted_links = ", ".join(links)
184
  with st.chat_message("assistant"):
185
- full_response = ""
186
  placeholder = st.empty()
 
187
  if st.session_state.rag_enabled :
188
- for chunk in chat_stream :
189
- if chunk.token.text!='</s>' :
190
- full_response += chunk.token.text
191
-
192
- placeholder.markdown(full_response + "β–Œ")
193
-
194
- placeholder.markdown(full_response)
195
- st.info( f"""\n\nFetched from :\n {formatted_links}""")
196
- else :
197
- for chunk in chat_stream :
198
- if chunk.token.text!='</s>' :
199
- full_response += chunk.token.text
200
- placeholder.markdown(full_response + "β–Œ")
201
- placeholder.markdown(full_response)
202
-
203
- len_response = (len(prompt.split()) + len(full_response.split())) * 1.25
204
- st.session_state["tokens_used"] = len_response + st.session_state["tokens_used"]
205
-
206
-
207
 
208
  st.session_state.history.append([prompt, full_response])
209
- st.session_state.history.append(identity_change)
210
-
211
-
212
- if st.session_state.rag_enabled :
213
- st.session_state.messages.append(
214
- {"role": "assistant", "content": full_response + f"""\n\nFetched from :\n {formatted_links}"""})
215
- else :
216
- st.session_state.messages.append({"role": "assistant", "content": full_response})
 
1
  import streamlit as st
2
+ from chat_client import chat
3
  import time
4
  import pandas as pd
5
  import pinecone
 
15
  environment='gcp-starter'
16
  )
17
 
18
+ PINECONE_INDEX = pinecone.Index('ikigai-chat')
19
+ TEXT_VECTORIZER = SentenceTransformer('all-distilroberta-v1')
20
+ CHAT_BOTS = {
21
  "Mixtral 8x7B v0.1" :"mistralai/Mixtral-8x7B-Instruct-v0.1",
22
  "Mistral 7B v0.1" : "mistralai/Mistral-7B-Instruct-v0.1",
23
  }
24
+ COST_PER_1000_TOKENS_INR = 0.139
25
+
26
+ st.set_page_config(
27
+ page_title="Ikigai Chat",
28
+ page_icon="πŸ€–",
29
+ )
30
+
31
+ SYSTEM_PROMPT = [
32
  """
33
  You are not Mistral AI, but rather a chat bot trained at Ikigai Labs. Whenever asked, you need to answer as Ikigai Labs' assistant.
34
  Ikigai helps modern analysts and operations teams automate data-intensive business, finance, analytics, and supply-chain operations.
 
43
  Is there anything else you would like to know about Ikigai Labs or their products?
44
  """
45
  ]
46
+ IDENTITY_CHANGE = [
 
47
  """
48
  You are Ikigai Chat from now on, so answer accordingly.
49
  """,
 
53
  """
54
  ]
55
 
 
56
  def gen_augmented_prompt(prompt, top_k) :
57
+ query_vector = TEXT_VECTORIZER.encode(prompt).tolist()
58
+ res = PINECONE_INDEX.query(vector=query_vector, top_k=top_k, include_metadata=True)
59
  matches = res['matches']
60
 
61
  context = ""
 
64
  context+=match["metadata"]["chunk"] + "\n\n"
65
  links.append(match["metadata"]["link"])
66
 
 
 
67
  generated_prompt = f"""
68
  FOR THIS GIVEN CONTEXT {context},
69
 
 
72
  """
73
  return generated_prompt, links
74
 
75
+ def init_state() :
76
+ if "messages" not in st.session_state:
77
+ st.session_state.messages = []
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ if "tokens_used" not in st.session_state:
80
+ st.session_state.tokens_used = 0
81
 
82
+ if "tps" not in st.session_state:
83
+ st.session_state.tps = 0
84
 
85
+ if "temp" not in st.session_state:
86
+ st.session_state.temp = 0.8
87
 
88
+ if "history" not in st.session_state:
89
+ st.session_state.history = [SYSTEM_PROMPT]
90
 
91
+ if "top_k" not in st.session_state:
92
+ st.session_state.top_k = 5
93
 
94
+ if "repetion_penalty" not in st.session_state :
95
+ st.session_state.repetion_penalty = 1
96
 
97
+ if "rag_enabled" not in st.session_state :
98
+ st.session_state.rag_enabled = True
99
 
100
+ if "chat_bot" not in st.session_state :
101
+ st.session_state.chat_bot = "Mixtral 8x7B v0.1"
102
 
103
+ def sidebar() :
104
+ def retrieval_settings() :
105
+ st.markdown("# Retrieval Settings")
106
+ st.session_state.rag_enabled = st.toggle("Activate RAG", value=True)
107
+ st.session_state.top_k = st.slider(label="Documents to retrieve",
108
+ min_value=1, max_value=20, value=10, disabled=not st.session_state.rag_enabled)
109
+ st.markdown("---")
110
 
111
+ def model_analytics() :
112
+ st.markdown("# Model Analytics")
113
+
114
+ st.write("Total tokens used :", st.session_state['tokens_used'])
115
+ st.write("Speed :", st.session_state['tps'], " tokens/sec")
116
+ st.write("Total cost incurred :", round(
117
+ COST_PER_1000_TOKENS_INR * st.session_state['tokens_used'] / 1000, 3), "INR")
118
+
119
+ st.markdown("---")
120
 
121
+ def model_settings() :
122
+ st.markdown("# Model Settings")
123
+
124
+ st.session_state.chat_bot = st.sidebar.radio(
125
+ 'Select one:', [key for key, value in CHAT_BOTS.items() ])
126
+ st.session_state.temp = st.slider(
127
+ label="Temperature", min_value=0.0, max_value=1.0, step=0.1, value=0.9)
128
+
129
+ st.session_state.max_tokens = st.slider(
130
+ label="New tokens to generate", min_value = 64, max_value=2048, step= 32, value=512
131
+ )
132
+
133
+ st.session_state.repetion_penalty = st.slider(
134
+ label="Repetion Penalty", min_value=0., max_value=1., step=0.1, value=1.
135
+ )
136
+
137
+ with st.sidebar:
138
+ retrieval_settings()
139
+ model_analytics()
140
+ model_settings()
141
+
142
+ st.markdown("""
143
+ > **2023 ©️ [Pragnesh Barik](https://barik.super.site) πŸ”—**
144
+ """)
145
 
146
+ def header() :
147
+ data = {
148
+ "Attribute": ["LLM", "Text Vectorizer", "Vector Database","CPU", "System RAM"],
149
+ "Information": ["Mixtral-8x7B-Instruct-v0.1","all-distilroberta-v1", "Hosted Pinecone" ,"2 vCPU", "16 GB"]
150
+ }
151
+ df = pd.DataFrame(data)
152
+ st.image("ikigai.svg")
153
+ st.title("Ikigai Chat")
154
+ with st.expander("What is Ikigai Chat ?"):
155
+ st.info("""Ikigai Chat is a vector database powered chat agent, it works on the principle of
156
+ of Retrieval Augmented Generation (RAG), Its primary function revolves around maintaining an extensive repository of Ikigai Docs and providing users with answers that align with their queries.
157
+ This approach ensures a more refined and tailored response to user inquiries.""")
158
+
159
+ st.table(df)
160
+
161
+ def chat_box() :
162
+ for message in st.session_state.messages:
163
+ with st.chat_message(message["role"]):
164
+ st.markdown(message["content"])
165
+
166
+ def feedback_buttons() :
167
+ is_visible = True
168
+ def click_handler() :
169
+ is_visible = False
170
+ if is_visible :
171
+ col1, col2 = st.columns(2)
172
+ with col1 :
173
+ st.button("πŸ‘ Satisfied", on_click = click_handler,type="primary")
174
+
175
+ with col2 :
176
+ st.button("πŸ‘Ž Disatisfied", on_click=click_handler, type="secondary")
177
+
178
+ def generate_chat_stream(prompt) :
179
+ links = []
180
+ if st.session_state.rag_enabled :
181
+ with st.spinner("Fetching relevent documents from Ikigai Docs...."):
182
+ prompt, links = gen_augmented_prompt(prompt=prompt, top_k=st.session_state.top_k)
183
+
184
+ with st.spinner("Generating response...") :
185
+ chat_stream = chat(prompt, st.session_state.history,chat_client=CHAT_BOTS[st.session_state.chat_bot] ,
186
+ temperature=st.session_state.temp, max_new_tokens=st.session_state.max_tokens)
187
 
188
+ return chat_stream, links
189
+
190
+ def stream_handler(chat_stream, placeholder) :
191
+ start_time = time.time()
192
+ full_response = ''
193
+
194
+ for chunk in chat_stream :
195
+ if chunk.token.text!='</s>' :
196
+ full_response += chunk.token.text
197
+ placeholder.markdown(full_response + "β–Œ")
198
+ placeholder.markdown(full_response)
199
+
200
+ end_time = time.time()
201
+ elapsed_time = end_time - start_time
202
+ total_tokens_processed = len(full_response.split())
203
+ tokens_per_second = total_tokens_processed // elapsed_time
204
+ len_response = (len(prompt.split()) + len(full_response.split())) * 1.25
205
+ col1, col2, col3 = st.columns(3)
206
 
207
+ with col1 :
208
+ st.write(f"**{tokens_per_second} tokens/second**")
 
209
 
210
+ with col2 :
211
+ st.write(f"**{int(len_response)} tokens generated**")
 
 
 
 
 
 
212
 
213
+ with col3 :
214
+ st.write(f"**β‚Ή {round(len_response * COST_PER_1000_TOKENS_INR / 1000, 5)} cost incurred**" )
215
+
216
+ st.session_state['tps'] = tokens_per_second
217
+ st.session_state["tokens_used"] = len_response + st.session_state["tokens_used"]
218
+
219
+ return full_response
220
 
221
+ def show_source(links) :
222
+ with st.expander("Show source") :
223
+ for i, link in enumerate(links) :
224
+ st.info(f"{link}")
225
 
226
+ init_state()
227
+ sidebar()
228
+ header()
229
+ chat_box()
230
 
231
  if prompt := st.chat_input("Chat with Ikigai Docs..."):
232
  st.chat_message("user").markdown(prompt)
233
  st.session_state.messages.append({"role": "user", "content": prompt})
234
 
235
+ chat_stream, links = generate_chat_stream(prompt)
236
 
 
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  with st.chat_message("assistant"):
 
239
  placeholder = st.empty()
240
+ full_response = stream_handler(chat_stream, placeholder)
241
  if st.session_state.rag_enabled :
242
+ show_source(links)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  st.session_state.history.append([prompt, full_response])
245
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
 
 
 
 
 
 
 
mistral7b.py β†’ chat_client.py RENAMED
File without changes