Smart-inbox / llama2_response_mail_generator.py
imenayadi's picture
update llama model
0cb6039
raw
history blame
2.31 kB
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format
# Download the model file
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)
# Initialize the Llama model with appropriate settings for GPU
lcpp_llm = Llama(
model_path=model_path,
n_threads=2, # CPU cores to use
n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
)
def generate_email_response(email_prompt):
# Check input received by the function
print("Received prompt:", email_prompt)
# Determine if the input is a shorthand command or an actual email
if 'email to' in email_prompt.lower():
# Assume it's a shorthand command, format appropriately
formatted_prompt = f'''
Email received: "{email_prompt}"
Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
Response:
'''
else:
# Assume it's direct email content
formatted_prompt = f'''
Email received: "{email_prompt}"
Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
Response:
'''
# Generate response using Llama-2 model
try:
response = lcpp_llm(
prompt=formatted_prompt,
max_tokens=256,
temperature=0.5,
top_p=0.95,
repeat_penalty=1.2,
top_k=150,
echo=True
)
generated_response = response["choices"][0]["text"]
# Remove the input part from the output if it is included
if formatted_prompt in generated_response:
generated_response = generated_response.replace(formatted_prompt, '').strip()
print("Generated response:", generated_response)
return generated_response
except Exception as e:
print("Error in response generation:", str(e))
return "Failed to generate response, please check the console for errors."