Spaces:

maykcaldas
/

MAPI_LLM

Build error

App Files Files Community

MAPI_LLM / mapi_tools.py

maykcaldas

Update mapi_tools.py

611d61b over 1 year ago

raw

history blame contribute delete

9.2 kB

	from mp_api.client import MPRester
	from emmet.core.summary import HasProps
	import openai
	import langchain
	from langchain import OpenAI
	from langchain import agents
	from langchain.agents import initialize_agent
	from langchain.agents import Tool, tool
	from langchain import LLMMathChain, SerpAPIWrapper
	from gpt_index import GPTListIndex, GPTIndexMemory
	from langchain import SerpAPIWrapper
	from langchain.prompts.few_shot import FewShotPromptTemplate
	from langchain.prompts.prompt import PromptTemplate
	from langchain.vectorstores import FAISS, Chroma
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.prompts.example_selector import (MaxMarginalRelevanceExampleSelector,
	SemanticSimilarityExampleSelector)
	import requests
	from rdkit import Chem
	import pandas as pd
	import os

	class MAPITools:
	def __init__(self):
	self.model = 'text-ada-001' #maybe change to gpt-4 when ready
	self.k=10

	def get_material_atoms(self, formula):
	'''Receives a material formula and returns the atoms symbols present in it separated by comma.'''
	import re
	pattern = re.compile(r"([A-Z][a-z])(\d)")
	matches = pattern.findall(formula)
	atoms = []
	for m in matches:
	atom, count = m
	count = int(count) if count else 1
	atoms.append((atom, count))
	return ",".join([a[0] for a in atoms])

	def check_prop_by_formula(self, formula):
	raise NotImplementedError('Should be implemented in children classes')

	def search_similars_by_atom(self, atoms):
	'''This function receives a string with the atoms separated by comma as input and returns a list of similar materials'''
	atoms = atoms.replace(" ", "")
	with MPRester(os.getenv("MAPI_API_KEY")) as mpr:
	docs = mpr.summary.search(elements=atoms.split(','), fields=["formula_pretty", self.prop])
	return docs

	def create_context_prompt(self, formula):
	raise NotImplementedError('Should be implemented in children classes')

	def LLM_predict(self, prompt):
	''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion'''
	llm = OpenAI(
	model_name=self.model,
	temperature=0.7,
	n=1,
	best_of=5,
	top_p=1.0,
	stop=["\n\n", "###", "#", "##"],
	# model_kwargs=kwargs,
	)
	return llm.generate([prompt]).generations[0][0].text

	def get_tools(self):
	return [
	Tool(
	name = "Get atoms in material",
	func = self.get_material_atoms,
	description = (
	"Receives a material formula and returns the atoms symbols present in it separated by comma."
	)
	),
	Tool(
	name = f"Checks if material is {self.prop_name} by formula",
	func = self.check_prop_by_formula,
	description = (
	f"This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not."
	)
	),
	# Tool(
	# name = "Search similar materials by atom",
	# func = self.search_similars_by_atom,
	# description = (
	# "This function receives a string with the atoms separated by comma as input and returns a list of similar materials."
	# )
	# ),
	Tool(
	name = f"Create {self.prop_name} context to LLM search",
	func = self.create_context_prompt,
	description = (
	f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the material is {self.prop_name}."
	if isinstance(self, MAPI_class_tools) else
	f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of a material."
	)
	),
	Tool(name = "LLM predictiom",
	func = self.LLM_predict,
	description = (
	"This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion"
	)
	)
	]

	class MAPI_class_tools(MAPITools):
	def __init__(self, prop, prop_name, p_label, n_label):
	super().__init__()
	self.prop = prop
	self.prop_name = prop_name
	self.p_label = p_label
	self.n_label = n_label

	def check_prop_by_formula(self, formula):
	f''' This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not'''
	with MPRester(os.getenv("MAPI_API_KEY")) as mpr:
	docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop])
	if docs:
	if docs[0].formula_pretty == formula:
	return self.p_label if docs[0].dict()[self.prop] else self.n_label
	return f"Could not find any material while searching {formula}"

	def create_context_prompt(self, formula):
	'''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the formula is a stable material '''
	elements = self.get_material_atoms(formula)
	similars = self.search_similars_by_atom(elements)
	similars = [
	{'formula': ex.formula_pretty,
	'prop': self.p_label if ex.dict()[self.prop] else self.n_label
	} for ex in similars
	]
	examples = pd.DataFrame(similars).drop_duplicates().to_dict(orient="records")
	example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
	examples,
	OpenAIEmbeddings(),
	FAISS,
	k=self.k,
	)

	prefix=(
	f'You are a bot who can predict if a material is {self.prop_name}.\n'
	f'Given this list of known materials and the information if they are {self.p_label} or {self.n_label}, \n'
	f'you need to answer the question if the last material is {self.prop_name}:'
	)
	prompt_template=PromptTemplate(
	input_variables=["formula", "prop"],
	template=f"Is {{formula}} a {self.prop_name} material?@@@\n{{prop}}###",
	)
	suffix = f"Is {{formula}} a {self.prop_name} material?@@@\n"
	prompt = FewShotPromptTemplate(
	# examples=examples,
	example_prompt=prompt_template,
	example_selector=example_selector,
	prefix=prefix,
	suffix=suffix,
	input_variables=["formula"])

	return prompt.format(formula=formula)

	class MAPI_reg_tools(MAPITools):
	# TODO: deal with units
	def __init__(self, prop, prop_name):
	super().__init__()
	self.prop = prop
	self.prop_name = prop_name

	def check_prop_by_formula(self, formula):
	''' This functions searches in the material project's API for the formula and returns if it is stable or not'''
	with MPRester(os.getenv("MAPI_API_KEY")) as mpr:
	docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop])
	if docs:
	if docs[0].formula_pretty == formula:
	return docs[0].dict()[self.prop]
	elif docs[0].dict()[self.prop] is None:
	return f"There is no record of {self.prop_name} for {formula}"
	return f"Could not find any material while searching {formula}"

	def create_context_prompt(self, formula):
	f'''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of the material '''
	elements = self.get_material_atoms(formula)
	similars = self.search_similars_by_atom(elements)
	similars = [
	{'formula': ex.formula_pretty,
	'prop': f"{ex.dict()[self.prop]:2f}" if ex.dict()[self.prop] is not None else None
	} for ex in similars
	]
	examples = pd.DataFrame(similars).drop_duplicates().dropna().to_dict(orient="records")

	example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
	examples,
	OpenAIEmbeddings(),
	FAISS,
	k=self.k,
	)

	prefix=(
	f'You are a bot who can predict the {self.prop_name} of a material .\n'
	f'Given this list of known materials and the measurement of their {self.prop_name}, \n'
	f'you need to answer the what is the {self.prop_name} of the material:'
	'The answer should be numeric and finish with ###'
	)
	prompt_template=PromptTemplate(
	input_variables=["formula", "prop"],
	template=f"What is the {self.prop_name} for {{formula}}?@@@\n{{prop}}###",
	)
	suffix = f"What is the {self.prop_name} for {{formula}}?@@@\n"
	prompt = FewShotPromptTemplate(
	# examples=examples,
	example_prompt=prompt_template,
	example_selector=example_selector,
	prefix=prefix,
	suffix=suffix,
	input_variables=["formula"])

	return prompt.format(formula=formula)