srijaydeshpande commited on
Commit
1318362
1 Parent(s): 7bad814

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -36
app.py CHANGED
@@ -13,16 +13,17 @@ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
13
  from llama_cpp_agent.providers import LlamaCppPythonProvider
14
  from llama_cpp_agent.chat_history import BasicChatHistory
15
  from llama_cpp_agent.chat_history.messages import Roles
 
16
 
17
  # subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
18
  # subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
19
 
20
 
21
- hf_hub_download(
22
- repo_id="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF",
23
- filename="Meta-Llama-3-8B-Instruct.Q8_0.gguf",
24
- local_dir = "./models"
25
- )
26
 
27
  # hf_hub_download(
28
  # repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
@@ -106,14 +107,20 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
106
  # )
107
 
108
  # chat_template = MessagesFormatterType.LLAMA_3
 
 
109
 
110
- llm = Llama(
111
- model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
112
- flash_attn=True,
113
- n_gpu_layers=81,
114
- n_batch=1024,
115
- n_ctx=8192,
116
- )
 
 
 
 
117
 
118
  # provider = LlamaCppPythonProvider(llm)
119
 
@@ -146,32 +153,36 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
146
  # for op in stream:
147
  # output += op
148
 
149
- output = llm.create_chat_completion(
150
- messages=[
151
- {"role": "assistant", "content": prompt},
152
- {
153
- "role": "user",
154
- "content": pdftext
155
- }
156
- ],
157
- max_tokens=maxtokens,
158
- temperature=temperature
159
- )
160
- output = output['choices'][0]['message']['content']
161
 
162
  prompt = "Perform the following actions on given text: 1. Replace any person age with term [redacted] 2. DO NOT REPLACE ANY MEDICAL MEASUREMENTS 3. Replace only the CALENDAR DATES of format 'day/month/year' with term [redacted]. Output the modified text."
163
- output = llm.create_chat_completion(
164
- messages=[
165
- {"role": "assistant", "content": prompt},
166
- {
167
- "role": "user",
168
- "content": output
169
- }
170
- ],
171
- max_tokens=maxtokens,
172
- temperature=temperature
173
- )
174
- output = output['choices'][0]['message']['content']
 
 
 
 
175
 
176
  # print(prompt)
177
  # print(output)
 
13
  from llama_cpp_agent.providers import LlamaCppPythonProvider
14
  from llama_cpp_agent.chat_history import BasicChatHistory
15
  from llama_cpp_agent.chat_history.messages import Roles
16
+ from vllm import LLM, SamplingParams
17
 
18
  # subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
19
  # subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
20
 
21
 
22
+ # hf_hub_download(
23
+ # repo_id="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF",
24
+ # filename="Meta-Llama-3-8B-Instruct.Q8_0.gguf",
25
+ # local_dir = "./models"
26
+ # )
27
 
28
  # hf_hub_download(
29
  # repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
 
107
  # )
108
 
109
  # chat_template = MessagesFormatterType.LLAMA_3
110
+
111
+ sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
112
 
113
+ # llm = Llama(
114
+ # model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
115
+ # flash_attn=True,
116
+ # n_gpu_layers=81,
117
+ # n_batch=1024,
118
+ # n_ctx=8192,
119
+ # )
120
+
121
+ llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
122
+ outputs = llm.generate([prompt], sampling_params)
123
+ output = outputs[0].outputs[0].text
124
 
125
  # provider = LlamaCppPythonProvider(llm)
126
 
 
153
  # for op in stream:
154
  # output += op
155
 
156
+ # output = llm.create_chat_completion(
157
+ # messages=[
158
+ # {"role": "assistant", "content": prompt},
159
+ # {
160
+ # "role": "user",
161
+ # "content": pdftext
162
+ # }
163
+ # ],
164
+ # max_tokens=maxtokens,
165
+ # temperature=temperature
166
+ # )
167
+ # output = output['choices'][0]['message']['content']
168
 
169
  prompt = "Perform the following actions on given text: 1. Replace any person age with term [redacted] 2. DO NOT REPLACE ANY MEDICAL MEASUREMENTS 3. Replace only the CALENDAR DATES of format 'day/month/year' with term [redacted]. Output the modified text."
170
+
171
+ outputs = llm.generate([output], sampling_params)
172
+ output = outputs[0].outputs[0].text
173
+
174
+ # output = llm.create_chat_completion(
175
+ # messages=[
176
+ # {"role": "assistant", "content": prompt},
177
+ # {
178
+ # "role": "user",
179
+ # "content": output
180
+ # }
181
+ # ],
182
+ # max_tokens=maxtokens,
183
+ # temperature=temperature
184
+ # )
185
+ # output = output['choices'][0]['message']['content']
186
 
187
  # print(prompt)
188
  # print(output)