Spaces:

alamin655
/

replit-3B-inference

Runtime error

App Files Files Community

alamin655 commited on Jun 28, 2023

Commit

16365ee

•

1 Parent(s): 9a2a4ca

Upload 2 files

Browse files

Files changed (2) hide show

download_model.py +21 -0
inference.py +74 -0

download_model.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from huggingface_hub import hf_hub_download
+def download_replit_quant(destination_folder: str, repo_id: str, model_filename: str):
+    local_path = os.path.abspath(destination_folder)
+    return hf_hub_download(
+        repo_id=repo_id,
+        filename=model_filename,
+        local_dir=local_path,
+        local_dir_use_symlinks=True,
+    )
+if __name__ == "__main__":
+    """full url: https://huggingface.co/abacaj/Replit-v2-CodeInstruct-3B-ggml/blob/main/replit-v2-codeinstruct-3b.q4_1.bin"""
+    repo_id = "abacaj/Replit-v2-CodeInstruct-3B-ggml"
+    model_filename = "replit-v2-codeinstruct-3b.q4_1.bin"
+    destination_folder = "models"
+    download_replit_quant(destination_folder, repo_id, model_filename)

inference.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import os
+from dataclasses import dataclass, asdict
+from ctransformers import AutoModelForCausalLM, AutoConfig
+@dataclass
+class GenerationConfig:
+    temperature: float
+    top_k: int
+    top_p: float
+    repetition_penalty: float
+    max_new_tokens: int
+    seed: int
+    reset: bool
+    stream: bool
+    threads: int
+    stop: list[str]
+def format_prompt(user_prompt: str):
+    return f"""### Instruction:
+{user_prompt}
+### Response:"""
+def generate(
+    llm: AutoModelForCausalLM,
+    generation_config: GenerationConfig,
+    user_prompt: str,
+):
+    """run model inference, will return a Generator if streaming is true"""
+    return llm(
+        format_prompt(
+            user_prompt,
+        ),
+        **asdict(generation_config),
+    )
+if __name__ == "__main__":
+    config = AutoConfig.from_pretrained(
+        "teknium/Replit-v2-CodeInstruct-3B", context_length=2048
+    )
+    llm = AutoModelForCausalLM.from_pretrained(
+        os.path.abspath("models/replit-v2-codeinstruct-3b.q4_1.bin"),
+        model_type="replit",
+        config=config,
+    )
+    generation_config = GenerationConfig(
+        temperature=0.2,
+        top_k=50,
+        top_p=0.9,
+        repetition_penalty=1.0,
+        max_new_tokens=512,  # adjust as needed
+        seed=42,
+        reset=True,  # reset history (cache)
+        stream=True,  # streaming per word/token
+        threads=int(os.cpu_count() / 6),  # adjust for your CPU
+        stop=["<|endoftext|>"],
+    )
+    user_prefix = "[user]: "
+    assistant_prefix = f"[assistant]:"
+    while True:
+        user_prompt = input(user_prefix)
+        generator = generate(llm, generation_config, user_prompt.strip())
+        print(assistant_prefix, end=" ", flush=True)
+        for word in generator:
+            print(word, end="", flush=True)
+        print("")