ShaderCoder

Runtime error

App Files Files Community

Vipitis commited on Jun 30, 2023

Commit

3c0111d

•

1 Parent(s): 6631a55

streaming text generation in working shape

Browse files

Files changed (1) hide show

app.py +42 -5

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import gradio as gr
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import datasets
 import asyncio
 import numpy as np
 import torch
 def make_script(shader_code):
     # code copied and fixed(escaping single quotes to double quotes!!!) from https://webglfundamentals.org/webgl/webgl-shadertoy.html
@@ -274,6 +275,7 @@ outro_text ="""
  - [~] include some context for prompt (title, comments before a functions) - now works with the first comment inside a function body (has to be first)
  - [] gradio examples
  - [] use GPU if available, respect memory restrictions.
 ### Notes:
  - this is meant as a resource to show code generation for a "creative" task.
@@ -342,6 +344,34 @@ def _make_pipeline(model_cp = "Vipitis/santacoder-finetuned-Shadertoys-fine"): #
     print(f"loaded model {model_cp} as a pipline")
     return pipe
 def process_retn(retn):
     return retn.split(";")[0].strip()
@@ -458,7 +488,12 @@ def alter_body(old_code, func_id, funcs_list: list, temperature, max_new_tokens,
         # print(second_child.text.decode())
         model_context += " { \n  " + second_child.text.decode()
         print(f"{model_context=}")
-    generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
     print(f"{generation=}")
     ctx_with_generation = model_context + generation
     print(f"{ctx_with_generation=}")
@@ -474,7 +509,9 @@ def alter_body(old_code, func_id, funcs_list: list, temperature, max_new_tokens,
     generated_body = first_gened_func.child_by_field_name("body").text.decode()
     print(f"{generated_body=}")
     altered_code = old_code[:func_start_idx] + identifier_str + generated_body + old_code[body_end_idx:]
-    return altered_code, pipeline
 def add_history(func_id, orig_rtn, gened_rtn, history):
     # is this a list? or a JSON dict?
@@ -524,7 +561,7 @@ with gr.Blocks() as site:
             with column_2:
                 top_p = gr.Slider(
                     label="Top-p (nucleus sampling)",
-                    value=0.30,
                     minimum=0.0,
                     maximum=1,
                     step=0.05,
@@ -563,4 +600,4 @@ with gr.Blocks() as site:
     gen_func_button.click(fn=alter_body, inputs=[sample_code, func_dropdown, funcs, temperature, max_new_tokens, top_p, repetition_penalty, pipe], outputs=[sample_code, pipe])
     sample_code.change(fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]) # to update this after generation, so spans aren't messed up
     sample_code.change(fn=make_iframe, inputs=[sample_code], outputs=[our_embed]) #twice could cause issues, find better ways.
-site.launch()

 import gradio as gr
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import datasets
 import asyncio
 import numpy as np
 import torch
+from threading import Thread
 def make_script(shader_code):
     # code copied and fixed(escaping single quotes to double quotes!!!) from https://webglfundamentals.org/webgl/webgl-shadertoy.html
  - [~] include some context for prompt (title, comments before a functions) - now works with the first comment inside a function body (has to be first)
  - [] gradio examples
  - [] use GPU if available, respect memory restrictions.
+ - [~] stream model generation (maybe in a new window?) - WIP for body gen right now -> janky solution works.
 ### Notes:
  - this is meant as a resource to show code generation for a "creative" task.
     print(f"loaded model {model_cp} as a pipline")
     return pipe
+def _run_generation(model_ctx:str, pipe, gen_kwargs:dict):
+    """
+    Text generation function
+    Args:
+        model_ctx (str): The context to start generation from.
+        pipe (Pipeline): The pipeline to use for generation.
+        gen_kwargs (dict): The generation kwargs.
+    Returns:
+        str: The generated text. (it iterates over time)
+    """
+    # Tokenize the model_context
+    model_inputs = pipe.tokenizer(model_ctx, return_tensors="pt")
+    # Start generation on a separate thread, so that we don't block the UI. The text is pulled from the streamer
+    # in the main thread. Adds timeout to the streamer to handle exceptions in the generation thread.
+    streamer = TextIteratorStreamer(pipe.tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=15.0)
+    generate_kwargs = dict(model_inputs, streamer=streamer, **gen_kwargs)
+    t = Thread(target=pipe.model.generate, kwargs=generate_kwargs)
+    t.start()
+    # Pull the generated text from the streamer, and update the model output.
+    model_output = ""
+    for new_text in streamer:
+        # print("step", end="")
+        model_output += new_text
+        yield model_output
+    streamer.on_finalized_text("stream reached the end.")
+    return model_output #is this ever reached?
 def process_retn(retn):
     return retn.split(";")[0].strip()
         # print(second_child.text.decode())
         model_context += " { \n  " + second_child.text.decode()
         print(f"{model_context=}")
+    # generation = pipeline(model_context, return_full_text=False, **generation_kwargs)[0]["generated_text"]
+    generation = _run_generation(model_context, pipeline, generation_kwargs)
+    for i in generation:
+        print(f"{i=}")
+        yield model_context + i, pipeline #fix in between, do all the stuff in the end?
+    generation = i[:] #seems to work
     print(f"{generation=}")
     ctx_with_generation = model_context + generation
     print(f"{ctx_with_generation=}")
     generated_body = first_gened_func.child_by_field_name("body").text.decode()
     print(f"{generated_body=}")
     altered_code = old_code[:func_start_idx] + identifier_str + generated_body + old_code[body_end_idx:]
+    print(f"{altered_code=}") #we get here successfully
+    yield altered_code, pipeline #yield once so it updates? -> works... gg
+    return altered_code, pipeline #never gets used by the code block? maybe I need to yield it first? but works in the ov_notebook
 def add_history(func_id, orig_rtn, gened_rtn, history):
     # is this a list? or a JSON dict?
             with column_2:
                 top_p = gr.Slider(
                     label="Top-p (nucleus sampling)",
+                    value=0.85,
                     minimum=0.0,
                     maximum=1,
                     step=0.05,
     gen_func_button.click(fn=alter_body, inputs=[sample_code, func_dropdown, funcs, temperature, max_new_tokens, top_p, repetition_penalty, pipe], outputs=[sample_code, pipe])
     sample_code.change(fn=list_dropdown, inputs=[sample_code], outputs=[funcs, func_dropdown]) # to update this after generation, so spans aren't messed up
     sample_code.change(fn=make_iframe, inputs=[sample_code], outputs=[our_embed]) #twice could cause issues, find better ways.
+site.launch(enable_queue=True)