Spaces:

alamin655
/

replit-3B-inference

Runtime error

App Files Files Community

alamin655 commited on Jun 28, 2023

Commit

e638d96

•

1 Parent(s): ac3f7e7

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -77

app.py CHANGED Viewed

@@ -1,84 +1,23 @@
-import os
-from dataclasses import dataclass, asdict
-from ctransformers import AutoModelForCausalLM, AutoConfig
-import gradio as gr
-@dataclass
-class GenerationConfig:
-    temperature: float
-    top_k: int
-    top_p: float
-    repetition_penalty: float
-    max_new_tokens: int
-    seed: int
-    reset: bool
-    stream: bool
-    threads: int
-    stop: list[str]
-def format_prompt(user_prompt: str):
-    return f"""### Instruction:
-{user_prompt}
-### Response:"""
-def generate(
-    llm: AutoModelForCausalLM,
-    generation_config: GenerationConfig,
-    user_prompt: str,
-):
-    """run model inference, will return a Generator if streaming is true"""
-    return llm(
-        format_prompt(
-            user_prompt,
-        ),
-        **asdict(generation_config),
-    )
-def generate_response(user_input):
-    generator = generate(llm, generation_config, user_input.strip())
-    response = ""
-    for word in generator:
-        response += word
-    return response
-if __name__ == "__main__":
-    config = AutoConfig.from_pretrained(
-        "teknium/Replit-v2-CodeInstruct-3B", context_length=2048
-    )
-    llm = AutoModelForCausalLM.from_pretrained(
-        os.path.abspath("models/replit-v2-codeinstruct-3b.q4_1.bin"),
-        model_type="replit",
-        config=config,
-    )
-    generation_config = GenerationConfig(
-        temperature=0.2,
-        top_k=50,
-        top_p=0.9,
-        repetition_penalty=1.0,
-        max_new_tokens=512,  # adjust as needed
-        seed=42,
-        reset=True,  # reset history (cache)
-        stream=True,  # streaming per word/token
-        threads=int(os.cpu_count() / 6),  # adjust for your CPU
-        stop=[""],
-    )
-    user_prefix = "[user]: "
-    assistant_prefix = f"[assistant]: "
-    iface = gr.Interface(
-        fn=generate_response,
-        inputs=gr.inputs.Textbox(label=user_prefix),
-        outputs=gr.outputs.Textbox(label=assistant_prefix),
-        title="Chat with Assistant",
-        description="Ask any question and get a response from the Assistant!",
-    )
-    iface.launch()

+import subprocess
+# Clone the repository
+subprocess.run(['git', 'clone', 'https://github.com/abacaj/replit-3B-inference.git'])
+# Change directory
+subprocess.run(['cd', 'replit-3B-inference'])
+# Create and activate virtual environment
+subprocess.run(['python', '-m', 'venv', 'env'])
+subprocess.run(['source', 'env/bin/activate'])
+# Update submodules
+subprocess.run(['git', 'submodule', 'update', '--init', '--recursive'])
+# Install requirements
+subprocess.run(['pip', 'install', '-r', 'requirements.txt'])
+# Download the model
+subprocess.run(['python', 'download_model.py'])
+# Run inference
+subprocess.run(['python', 'inference.py'])