Spaces:

imenayadi
/

Smart-inbox

Running

App Files Files Community

Smart-inbox / llama2_response_mail_generator.py

imenayadi

update llama model

0cb6039 5 months ago

raw

history blame

2.31 kB

	from huggingface_hub import hf_hub_download

	from llama_cpp import Llama

	model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML"
	model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format

	# Download the model file
	model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)

	# Initialize the Llama model with appropriate settings for GPU
	lcpp_llm = Llama(
	model_path=model_path,
	n_threads=2, # CPU cores to use
	n_batch=512, # Batch size for processing; adjust as per your VRAM capacity
	n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM
	)

	def generate_email_response(email_prompt):
	# Check input received by the function
	print("Received prompt:", email_prompt)

	# Determine if the input is a shorthand command or an actual email
	if 'email to' in email_prompt.lower():
	# Assume it's a shorthand command, format appropriately
	formatted_prompt = f'''
	Email received: "{email_prompt}"
	Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
	Response:
	'''
	else:
	# Assume it's direct email content
	formatted_prompt = f'''
	Email received: "{email_prompt}"
	Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have.
	Response:
	'''

	# Generate response using Llama-2 model
	try:
	response = lcpp_llm(
	prompt=formatted_prompt,
	max_tokens=256,
	temperature=0.5,
	top_p=0.95,
	repeat_penalty=1.2,
	top_k=150,
	echo=True
	)
	generated_response = response["choices"][0]["text"]
	# Remove the input part from the output if it is included
	if formatted_prompt in generated_response:
	generated_response = generated_response.replace(formatted_prompt, '').strip()
	print("Generated response:", generated_response)
	return generated_response
	except Exception as e:
	print("Error in response generation:", str(e))
	return "Failed to generate response, please check the console for errors."