Spaces:

srijaydeshpande
/

DeID

Sleeping

App Files Files Community

srijaydeshpande commited on Jun 10

Commit

1318362

•

1 Parent(s): 7bad814

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -36

app.py CHANGED Viewed

@@ -13,16 +13,17 @@ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
 from llama_cpp_agent.providers import LlamaCppPythonProvider
 from llama_cpp_agent.chat_history import BasicChatHistory
 from llama_cpp_agent.chat_history.messages import Roles
 # subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
 # subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
-hf_hub_download(
-    repo_id="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF",
-    filename="Meta-Llama-3-8B-Instruct.Q8_0.gguf",
-    local_dir = "./models"
-)
 # hf_hub_download(
 #     repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
@@ -106,14 +107,20 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
     #             )
     # chat_template = MessagesFormatterType.LLAMA_3
-    llm = Llama(
-        model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
-        flash_attn=True,
-        n_gpu_layers=81,
-        n_batch=1024,
-        n_ctx=8192,
-    )
     # provider = LlamaCppPythonProvider(llm)
@@ -146,32 +153,36 @@ def deidentify_doc(pdftext, maxtokens, temperature, top_probability):
     # for op in stream:
     #     output += op
-    output = llm.create_chat_completion(
-        messages=[
-            {"role": "assistant", "content": prompt},
-            {
-                "role": "user",
-                "content": pdftext
-            }
-        ],
-        max_tokens=maxtokens,
-        temperature=temperature
-    )
-    output = output['choices'][0]['message']['content']
     prompt = "Perform the following actions on given text: 1. Replace any person age with term [redacted] 2. DO NOT REPLACE ANY MEDICAL MEASUREMENTS 3. Replace only the CALENDAR DATES of format 'day/month/year' with term [redacted]. Output the modified text."
-    output = llm.create_chat_completion(
-        messages=[
-            {"role": "assistant", "content": prompt},
-            {
-                "role": "user",
-                "content": output
-            }
-        ],
-        max_tokens=maxtokens,
-        temperature=temperature
-    )
-    output = output['choices'][0]['message']['content']
     # print(prompt)
     # print(output)

 from llama_cpp_agent.providers import LlamaCppPythonProvider
 from llama_cpp_agent.chat_history import BasicChatHistory
 from llama_cpp_agent.chat_history.messages import Roles
+from vllm import LLM, SamplingParams
 # subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
 # subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
+# hf_hub_download(
+#     repo_id="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF",
+#     filename="Meta-Llama-3-8B-Instruct.Q8_0.gguf",
+#     local_dir = "./models"
+# )
 # hf_hub_download(
 #     repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
     #             )
     # chat_template = MessagesFormatterType.LLAMA_3
+    sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+    # llm = Llama(
+    #     model_path="models/Meta-Llama-3-8B-Instruct.Q8_0.gguf",
+    #     flash_attn=True,
+    #     n_gpu_layers=81,
+    #     n_batch=1024,
+    #     n_ctx=8192,
+    # )
+    llm = LLM(model="meta-llama/Meta-Llama-3-8B-Instruct")
+    outputs = llm.generate([prompt], sampling_params)
+    output = outputs[0].outputs[0].text
     # provider = LlamaCppPythonProvider(llm)
     # for op in stream:
     #     output += op
+    # output = llm.create_chat_completion(
+    #     messages=[
+    #         {"role": "assistant", "content": prompt},
+    #         {
+    #             "role": "user",
+    #             "content": pdftext
+    #         }
+    #     ],
+    #     max_tokens=maxtokens,
+    #     temperature=temperature
+    # )
+    # output = output['choices'][0]['message']['content']
     prompt = "Perform the following actions on given text: 1. Replace any person age with term [redacted] 2. DO NOT REPLACE ANY MEDICAL MEASUREMENTS 3. Replace only the CALENDAR DATES of format 'day/month/year' with term [redacted]. Output the modified text."
+    outputs = llm.generate([output], sampling_params)
+    output = outputs[0].outputs[0].text
+    # output = llm.create_chat_completion(
+    #     messages=[
+    #         {"role": "assistant", "content": prompt},
+    #         {
+    #             "role": "user",
+    #             "content": output
+    #         }
+    #     ],
+    #     max_tokens=maxtokens,
+    #     temperature=temperature
+    # )
+    # output = output['choices'][0]['message']['content']
     # print(prompt)
     # print(output)