File size: 2,990 Bytes
33fe60d
d30c111
7f409ac
bf88efa
33fe60d
 
 
7e976dc
33fe60d
 
 
4583ba5
33fe60d
 
e769925
 
341abc7
7f409ac
33fe60d
 
6acc7d5
 
 
 
 
26cc997
33fe60d
6acc7d5
33fe60d
26cc997
e769925
d3deef7
7e976dc
7f409ac
 
33fe60d
 
396decf
33fe60d
 
 
 
 
 
 
ebf0716
33fe60d
6acc7d5
33fe60d
 
 
 
 
 
 
 
7f409ac
33fe60d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from langchain.document_loaders import ConfluenceLoader
from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter,RecursiveCharacterTextSplitter,SentenceTransformersTokenTextSplitter
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,pipeline,T5Tokenizer,T5ForConditionalGeneration
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

class ConfluenceQA:
    def init_embeddings(self) -> None:
        self.embeddings = HuggingFaceEmbeddings(model_name="multi-qa-MiniLM-L6-cos-v1")
    
    def define_model(self) -> None:
        tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xxl")
        model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xxl")
        pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer,max_new_tokens=1024)
        self.llm = HuggingFacePipeline(pipeline = pipe,model_kwargs={"temperature": 0.5})

    def store_in_vector_db(self) -> None:
        persist_directory = self.config.get("persist_directory",None)
        confluence_url = self.config.get("confluence_url",None)
        username = self.config.get("username",None)
        api_key = self.config.get("api_key",None)
        space_key = self.config.get("space_key",None)  
        include_attachment = self.config.get("include_attachment", True)      
        loader = ConfluenceLoader(
            url=confluence_url, username=username, api_key=api_key
        )
        documents = loader.load(include_attachments=include_attachment, limit=100, space_key=space_key)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        documents = text_splitter.split_documents(documents)
        self.db = Chroma.from_documents(documents, self.embeddings)
        # question = "How do I make a space public?"
        # searchDocs = self.db.similarity_search(question)

    def retrieve_qa_chain(self) -> None:
        template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
        {context}
        Question: {question}
        Helpful Answer:"""
        QA_CHAIN_PROMPT = PromptTemplate(
            template=template, input_variables=["context", "question"]
        )
        chain_type_kwargs = {"prompt": QA_CHAIN_PROMPT}
        self.qa = RetrievalQA.from_chain_type(llm=self.llm, chain_type="stuff", retriever=self.db.as_retriever(search_kwargs={"k":4}), chain_type_kwargs=chain_type_kwargs)

    def __init__(self,config:dict = {}) -> None:
        self.db=None
        self.embeddings=None
        self.llm=None
        self.config=config
        self.qa=None

    def qa_bot(self, query:str):
        result = self.qa.run(query)
        print(result)
        return result