arad1367 commited on
Commit
c305876
β€’
1 Parent(s): 24faa28

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +277 -0
  2. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig, AutoProcessor
5
+ import gradio as gr
6
+ from threading import Thread
7
+ from PIL import Image
8
+ import subprocess
9
+ import spaces
10
+
11
+ # Install flash-attn if not already installed
12
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
13
+
14
+ # Model and tokenizer for the chatbot
15
+ MODEL_ID1 = "microsoft/Phi-3.5-mini-instruct"
16
+ MODEL_LIST1 = ["microsoft/Phi-3.5-mini-instruct"]
17
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
18
+
19
+ device = "cuda" # for GPU usage or "cpu" for CPU usage / But you need GPU :)
20
+
21
+ quantization_config = BitsAndBytesConfig(
22
+ load_in_4bit=True,
23
+ bnb_4bit_compute_dtype=torch.bfloat16,
24
+ bnb_4bit_use_double_quant=True,
25
+ bnb_4bit_quant_type="nf4")
26
+
27
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID1)
28
+ model = AutoModelForCausalLM.from_pretrained(
29
+ MODEL_ID1,
30
+ torch_dtype=torch.bfloat16,
31
+ device_map="auto",
32
+ quantization_config=quantization_config)
33
+
34
+ # Chatbot tab function
35
+ @spaces.GPU()
36
+ def stream_chat(
37
+ message: str,
38
+ history: list,
39
+ system_prompt: str,
40
+ temperature: float = 0.8,
41
+ max_new_tokens: int = 1024,
42
+ top_p: float = 1.0,
43
+ top_k: int = 20,
44
+ penalty: float = 1.2,
45
+ ):
46
+ print(f'message: {message}')
47
+ print(f'history: {history}')
48
+
49
+ conversation = [
50
+ {"role": "system", "content": system_prompt}
51
+ ]
52
+ for prompt, answer in history:
53
+ conversation.extend([
54
+ {"role": "user", "content": prompt},
55
+ {"role": "assistant", "content": answer},
56
+ ])
57
+
58
+ conversation.append({"role": "user", "content": message})
59
+
60
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
61
+
62
+ streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
63
+
64
+ generate_kwargs = dict(
65
+ input_ids=input_ids,
66
+ max_new_tokens = max_new_tokens,
67
+ do_sample = False if temperature == 0 else True,
68
+ top_p = top_p,
69
+ top_k = top_k,
70
+ temperature = temperature,
71
+ eos_token_id=[128001,128008,128009],
72
+ streamer=streamer,
73
+ )
74
+
75
+ with torch.no_grad():
76
+ thread = Thread(target=model.generate, kwargs=generate_kwargs)
77
+ thread.start()
78
+
79
+ buffer = ""
80
+ for new_text in streamer:
81
+ buffer += new_text
82
+ yield buffer
83
+
84
+ # Vision model setup
85
+ models = {
86
+ "microsoft/Phi-3.5-vision-instruct": AutoModelForCausalLM.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True, torch_dtype="auto", _attn_implementation="flash_attention_2").cuda().eval()
87
+ }
88
+
89
+ processors = {
90
+ "microsoft/Phi-3.5-vision-instruct": AutoProcessor.from_pretrained("microsoft/Phi-3.5-vision-instruct", trust_remote_code=True)
91
+ }
92
+
93
+ user_prompt = '\n'
94
+ assistant_prompt = '\n'
95
+ prompt_suffix = "\n"
96
+
97
+ # Vision model tab function
98
+ @spaces.GPU()
99
+ def stream_vision(image, text_input=None, model_id="microsoft/Phi-3.5-vision-instruct"):
100
+ model = models[model_id]
101
+ processor = processors[model_id]
102
+
103
+ # Prepare the image list and corresponding tags
104
+ images = [Image.fromarray(image).convert("RGB")]
105
+ placeholder = "<|image_1|>\n" # Using the image tag as per the example
106
+
107
+ # Construct the prompt with the image tag and the user's text input
108
+ if text_input:
109
+ prompt_content = placeholder + text_input
110
+ else:
111
+ prompt_content = placeholder
112
+
113
+ messages = [
114
+ {"role": "user", "content": prompt_content},
115
+ ]
116
+
117
+ # Apply the chat template to the messages
118
+ prompt = processor.tokenizer.apply_chat_template(
119
+ messages,
120
+ tokenize=False,
121
+ add_generation_prompt=True
122
+ )
123
+
124
+ # Process the inputs with the processor
125
+ inputs = processor(prompt, images, return_tensors="pt").to("cuda:0")
126
+
127
+ # Generation parameters
128
+ generation_args = {
129
+ "max_new_tokens": 1000,
130
+ "temperature": 0.0,
131
+ "do_sample": False,
132
+ }
133
+
134
+ # Generate the response
135
+ generate_ids = model.generate(
136
+ **inputs,
137
+ eos_token_id=processor.tokenizer.eos_token_id,
138
+ **generation_args
139
+ )
140
+
141
+ # Remove input tokens from the generated response
142
+ generate_ids = generate_ids[:, inputs['input_ids'].shape[1]:]
143
+
144
+ # Decode the generated output
145
+ response = processor.batch_decode(
146
+ generate_ids,
147
+ skip_special_tokens=True,
148
+ clean_up_tokenization_spaces=False
149
+ )[0]
150
+
151
+ return response
152
+
153
+ # CSS for the interface
154
+ CSS = """
155
+ .duplicate-button {
156
+ margin: auto !important;
157
+ color: white !important;
158
+ background: black !important;
159
+ border-radius: 100vh !important;
160
+ }
161
+ h3 {
162
+ text-align: center;
163
+ }
164
+ """
165
+
166
+ PLACEHOLDER = """
167
+ <center>
168
+ <p>Hi! I'm your assistant. Feel free to ask your questions</p>
169
+ </center>
170
+ """
171
+
172
+ TITLE = "<h1><center>Phi-3.5 Chatbot & Phi-3.5 Vision</center></h1>"
173
+
174
+ EXPLANATION = """
175
+ <div style="text-align: center; margin-top: 20px;">
176
+ <p>This app supports both the microsoft/Phi-3.5-mini-instruct model for chat bot and the microsoft/Phi-3.5-vision-instruct model for multimodal model.</p>
177
+ <p>Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.</p>
178
+ <p>Phi-3.5-mini is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data. The model belongs to the Phi-3 model family and supports 128K token context length. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure precise instruction adherence and robust safety measures.</p>
179
+ </div>
180
+ """
181
+
182
+ footer = """
183
+ <div style="text-align: center; margin-top: 20px;">
184
+ <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
185
+ <a href="https://github.com/arad1367" target="_blank">GitHub</a> |
186
+ <a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a> |
187
+ <a href="https://huggingface.co/microsoft/Phi-3.5-mini-instruct" target="_blank">microsoft/Phi-3.5-mini-instruct</a> |
188
+ <a href="https://huggingface.co/microsoft/Phi-3.5-vision-instruct" target="_blank">microsoft/Phi-3.5-vision-instruct</a>
189
+ <br>
190
+ Made with πŸ’– by Pejman Ebrahimi
191
+ </div>
192
+ """
193
+
194
+ # Gradio app with two tabs
195
+ with gr.Blocks(css=CSS, theme="small_and_pretty") as demo:
196
+ gr.HTML(TITLE)
197
+ gr.HTML(EXPLANATION)
198
+ with gr.Tab("Chatbot"):
199
+ chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
200
+ gr.ChatInterface(
201
+ fn=stream_chat,
202
+ chatbot=chatbot,
203
+ fill_height=True,
204
+ additional_inputs_accordion=gr.Accordion(label="βš™οΈ Parameters", open=False, render=False),
205
+ additional_inputs=[
206
+ gr.Textbox(
207
+ value="You are a helpful assistant",
208
+ label="System Prompt",
209
+ render=False,
210
+ ),
211
+ gr.Slider(
212
+ minimum=0,
213
+ maximum=1,
214
+ step=0.1,
215
+ value=0.8,
216
+ label="Temperature",
217
+ render=False,
218
+ ),
219
+ gr.Slider(
220
+ minimum=128,
221
+ maximum=8192,
222
+ step=1,
223
+ value=1024,
224
+ label="Max new tokens",
225
+ render=False,
226
+ ),
227
+ gr.Slider(
228
+ minimum=0.0,
229
+ maximum=1.0,
230
+ step=0.1,
231
+ value=1.0,
232
+ label="top_p",
233
+ render=False,
234
+ ),
235
+ gr.Slider(
236
+ minimum=1,
237
+ maximum=20,
238
+ step=1,
239
+ value=20,
240
+ label="top_k",
241
+ render=False,
242
+ ),
243
+ gr.Slider(
244
+ minimum=0.0,
245
+ maximum=2.0,
246
+ step=0.1,
247
+ value=1.2,
248
+ label="Repetition penalty",
249
+ render=False,
250
+ ),
251
+ ],
252
+ examples=[
253
+ ["How to make a self-driving car?"],
254
+ ["Give me a creative idea to establish a startup"],
255
+ ["How can I improve my programming skills?"],
256
+ ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
257
+ ],
258
+ cache_examples=False,
259
+ )
260
+ with gr.Tab("Vision"):
261
+ with gr.Row():
262
+ input_img = gr.Image(label="Input Picture")
263
+ with gr.Row():
264
+ model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="microsoft/Phi-3.5-vision-instruct")
265
+ with gr.Row():
266
+ text_input = gr.Textbox(label="Question")
267
+ with gr.Row():
268
+ submit_btn = gr.Button(value="Submit")
269
+ with gr.Row():
270
+ output_text = gr.Textbox(label="Output Text")
271
+
272
+ submit_btn.click(stream_vision, [input_img, text_input, model_selector], [output_text])
273
+
274
+ gr.HTML(footer)
275
+
276
+ # Launch the combined app
277
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.30.0
2
+ bitsandbytes
3
+ torch
4
+ torchvision
5
+ transformers==4.43.0
6
+ einops
7
+ sentencepiece
8
+ numpy==1.24.4
9
+ Pillow==10.3.0
10
+ Requests==2.31.0
11
+ gradio