alamin655 commited on
Commit
e638d96
1 Parent(s): ac3f7e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -77
app.py CHANGED
@@ -1,84 +1,23 @@
1
- import os
2
- from dataclasses import dataclass, asdict
3
- from ctransformers import AutoModelForCausalLM, AutoConfig
4
- import gradio as gr
5
 
 
 
6
 
7
- @dataclass
8
- class GenerationConfig:
9
- temperature: float
10
- top_k: int
11
- top_p: float
12
- repetition_penalty: float
13
- max_new_tokens: int
14
- seed: int
15
- reset: bool
16
- stream: bool
17
- threads: int
18
- stop: list[str]
19
 
 
 
 
20
 
21
- def format_prompt(user_prompt: str):
22
- return f"""### Instruction:
23
- {user_prompt}
24
 
25
- ### Response:"""
 
26
 
 
 
27
 
28
- def generate(
29
- llm: AutoModelForCausalLM,
30
- generation_config: GenerationConfig,
31
- user_prompt: str,
32
- ):
33
- """run model inference, will return a Generator if streaming is true"""
34
-
35
- return llm(
36
- format_prompt(
37
- user_prompt,
38
- ),
39
- **asdict(generation_config),
40
- )
41
-
42
-
43
- def generate_response(user_input):
44
- generator = generate(llm, generation_config, user_input.strip())
45
- response = ""
46
- for word in generator:
47
- response += word
48
- return response
49
-
50
-
51
- if __name__ == "__main__":
52
- config = AutoConfig.from_pretrained(
53
- "teknium/Replit-v2-CodeInstruct-3B", context_length=2048
54
- )
55
- llm = AutoModelForCausalLM.from_pretrained(
56
- os.path.abspath("models/replit-v2-codeinstruct-3b.q4_1.bin"),
57
- model_type="replit",
58
- config=config,
59
- )
60
-
61
- generation_config = GenerationConfig(
62
- temperature=0.2,
63
- top_k=50,
64
- top_p=0.9,
65
- repetition_penalty=1.0,
66
- max_new_tokens=512, # adjust as needed
67
- seed=42,
68
- reset=True, # reset history (cache)
69
- stream=True, # streaming per word/token
70
- threads=int(os.cpu_count() / 6), # adjust for your CPU
71
- stop=[""],
72
- )
73
-
74
- user_prefix = "[user]: "
75
- assistant_prefix = f"[assistant]: "
76
-
77
- iface = gr.Interface(
78
- fn=generate_response,
79
- inputs=gr.inputs.Textbox(label=user_prefix),
80
- outputs=gr.outputs.Textbox(label=assistant_prefix),
81
- title="Chat with Assistant",
82
- description="Ask any question and get a response from the Assistant!",
83
- )
84
- iface.launch()
 
1
+ import subprocess
 
 
 
2
 
3
+ # Clone the repository
4
+ subprocess.run(['git', 'clone', 'https://github.com/abacaj/replit-3B-inference.git'])
5
 
6
+ # Change directory
7
+ subprocess.run(['cd', 'replit-3B-inference'])
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # Create and activate virtual environment
10
+ subprocess.run(['python', '-m', 'venv', 'env'])
11
+ subprocess.run(['source', 'env/bin/activate'])
12
 
13
+ # Update submodules
14
+ subprocess.run(['git', 'submodule', 'update', '--init', '--recursive'])
 
15
 
16
+ # Install requirements
17
+ subprocess.run(['pip', 'install', '-r', 'requirements.txt'])
18
 
19
+ # Download the model
20
+ subprocess.run(['python', 'download_model.py'])
21
 
22
+ # Run inference
23
+ subprocess.run(['python', 'inference.py'])