Spaces:

freQuensy23
/

LLMhistory

Running

App Files Files Community

freQuensy23 commited on Aug 19

Commit

79b2407

•

1 Parent(s): f0c7657

Add mixtral

Browse files

Files changed (2) hide show

app.py +6 -6
generators.py +17 -26

app.py CHANGED Viewed

@@ -9,20 +9,20 @@ load_dotenv()
 async def handle(system_input: str, user_input: str):
     print(system_input, user_input)
-    buffers = ["", "", "", "", ""]
     async for outputs in async_zip_stream(
             generate_gpt2(system_input, user_input),
             generate_mistral_7bvo1(system_input, user_input),
             generate_llama2(system_input, user_input),
             generate_llama3(system_input, user_input),
             generate_t5(system_input, user_input),
     ):
         # gpt_output, mistral_output, llama_output, llama2_output, llama3_output, llama4_output = outputs
         for i, b in enumerate(buffers):
             buffers[i] += str(outputs[i])
-        yield list(buffers) + ["", ""]
-    yield list(buffers) + [generate_bloom(system_input, user_input)]
 with gr.Blocks() as demo:
@@ -30,10 +30,10 @@ with gr.Blocks() as demo:
     with gr.Row():
         gpt = gr.Textbox(label='gpt-2', lines=4, interactive=False, info='OpenAI\n14 February 2019')
         t5 = gr.Textbox(label='t5', lines=4, interactive=False, info='Google\n12 Dec 2019')
-        bloom = gr.Textbox(label='bloom [GPU]', lines=4, interactive=False, info='Big Science\n11 Jul 2022')
-    with gr.Row():
         llama2 = gr.Textbox(label='llama-2', lines=4, interactive=False, info='MetaAI\n18 Jul 2023')
         mistral = gr.Textbox(label='mistral-v01', lines=4, interactive=False, info='MistralAI\n20 Sep 2023')
         llama3 = gr.Textbox(label='llama-3.1', lines=4, interactive=False, info='MetaAI\n18 Jul 2024')
     user_input = gr.Textbox(label='User Input', lines=2, value='Calculate expression: 7-3=')
@@ -42,7 +42,7 @@ with gr.Blocks() as demo:
     gen_button.click(
         fn=handle,
         inputs=[system_input, user_input],
-        outputs=[gpt, mistral, llama2, llama3, t5, bloom],
     )
 demo.launch()

 async def handle(system_input: str, user_input: str):
     print(system_input, user_input)
+    buffers = ["", "", "", "", "", ""]
     async for outputs in async_zip_stream(
             generate_gpt2(system_input, user_input),
             generate_mistral_7bvo1(system_input, user_input),
             generate_llama2(system_input, user_input),
             generate_llama3(system_input, user_input),
             generate_t5(system_input, user_input),
+            generate_mixtral(system_input, user_input),
     ):
         # gpt_output, mistral_output, llama_output, llama2_output, llama3_output, llama4_output = outputs
         for i, b in enumerate(buffers):
             buffers[i] += str(outputs[i])
+        yield list(buffers)
 with gr.Blocks() as demo:
     with gr.Row():
         gpt = gr.Textbox(label='gpt-2', lines=4, interactive=False, info='OpenAI\n14 February 2019')
         t5 = gr.Textbox(label='t5', lines=4, interactive=False, info='Google\n12 Dec 2019')
         llama2 = gr.Textbox(label='llama-2', lines=4, interactive=False, info='MetaAI\n18 Jul 2023')
+    with gr.Row():
         mistral = gr.Textbox(label='mistral-v01', lines=4, interactive=False, info='MistralAI\n20 Sep 2023')
+        mixtral = gr.Textbox(label='mixtral', lines=4, interactive=False, info='Mistral AI\n11 Dec 2023')
         llama3 = gr.Textbox(label='llama-3.1', lines=4, interactive=False, info='MetaAI\n18 Jul 2024')
     user_input = gr.Textbox(label='User Input', lines=2, value='Calculate expression: 7-3=')
     gen_button.click(
         fn=handle,
         inputs=[system_input, user_input],
+        outputs=[gpt, mistral, llama2, llama3, t5, mixtral],
     )
 demo.launch()

generators.py CHANGED Viewed

@@ -67,36 +67,27 @@ async def generate_llama2(system_input, user_input):
         yield message.choices[0].delta.content
-@spaces.GPU(duration=120)
-def generate_openllama(system_input, user_input):
-    model_path = 'openlm-research/open_llama_3b_v2'
-    tokenizer = LlamaTokenizer.from_pretrained(model_path)
-    model = LlamaForCausalLM.from_pretrained(
-        model_path, torch_dtype=torch.float16, device_map='cuda',
-    )
-    print('model openllama loaded')
-    input_text = f"{system_input}\n{user_input}"
-    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
-    output = model.generate(input_ids, max_length=128)
-    return tokenizer.decode(output[0], skip_special_tokens=True)
-@spaces.GPU(duration=120)
-def generate_bloom(system_input, user_input):
-    model_path = 'bigscience/bloom-7b1'
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_path, torch_dtype=torch.float16, device_map='cuda',
     )
-    input_text = f"{system_input}\n{user_input}"
-    input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
-    output = model.generate(input_ids, max_length=128)
-    return tokenizer.decode(output[0], skip_special_tokens=True)
-async def generate_llama3(system_input, user_input):
     client = AsyncInferenceClient(
-        "meta-llama/Meta-Llama-3.1-8B-Instruct",
         token=os.getenv('HF_TOKEN')
     )
     try:

         yield message.choices[0].delta.content
+async def generate_llama3(system_input, user_input):
+    client = AsyncInferenceClient(
+        "meta-llama/Meta-Llama-3.1-8B-Instruct",
+        token=os.getenv('HF_TOKEN')
     )
+    try:
+        async for message in await client.chat_completion(
+                messages=[
+                    {"role": "system", "content": system_input},
+                    {"role": "user", "content": user_input}, ],
+                max_tokens=256,
+                stream=True,
+        ):
+            yield message.choices[0].delta.content
+    except json.JSONDecodeError:
+        pass
+async def generate_mixtral(system_input, user_input):
     client = AsyncInferenceClient(
+        "mistralai/Mixtral-8x7B-Instruct-v0.1",
         token=os.getenv('HF_TOKEN')
     )
     try: