Harpreet Sahota commited on
Commit
b86d555
0 Parent(s):

Duplicate from harpreetsahota/RAQA-Application-Chainlit-Demo

Browse files
Files changed (9) hide show
  1. .env.example +1 -0
  2. .gitattributes +35 -0
  3. .gitignore +4 -0
  4. Dockerfile +11 -0
  5. README.md +12 -0
  6. app.py +121 -0
  7. chainlit.md +11 -0
  8. data/spiderverse.csv +0 -0
  9. requirements.txt +5 -0
.env.example ADDED
@@ -0,0 +1 @@
 
 
1
+ OPENAI_API_KEY=sk-...
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ __pycache__
3
+ cache
4
+ .chainlit
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+ RUN useradd -m -u 1000 user
3
+ USER user
4
+ ENV HOME=/home/user \
5
+ PATH=/home/user/.local/bin:$PATH
6
+ WORKDIR $HOME/app
7
+ COPY --chown=user . $HOME/app
8
+ COPY ./requirements.txt ~/app/requirements.txt
9
+ RUN pip install -r requirements.txt
10
+ COPY . .
11
+ CMD ["chainlit", "run", "app.py", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Spidey-verse RAQA Application Chainlit Demo
3
+ emoji: 🔥
4
+ colorFrom: red
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ license: apache-2.0
9
+ duplicated_from: harpreetsahota/RAQA-Application-Chainlit-Demo
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chainlit as cl
2
+ from langchain.embeddings.openai import OpenAIEmbeddings
3
+ from langchain.document_loaders.csv_loader import CSVLoader
4
+ from langchain.embeddings import CacheBackedEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chains import RetrievalQA
8
+ from langchain.chat_models import ChatOpenAI
9
+ from langchain.storage import LocalFileStore
10
+ from langchain.prompts.chat import (
11
+ ChatPromptTemplate,
12
+ SystemMessagePromptTemplate,
13
+ HumanMessagePromptTemplate,
14
+ )
15
+ import chainlit as cl
16
+
17
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
18
+
19
+ system_template = """
20
+ Use the following pieces of context to answer the user's question.
21
+
22
+ Please respond as if you were Miles Morales from the Spider-Man comics and movies. General speech patterns: Uses contractions often, like "I'm," "can't," and "don't."
23
+ Might sprinkle in some Spanish, given his Puerto Rican heritage. References to modern pop culture, music, or tech. Miles is a brave young hero, grappling with his dual
24
+ heritage and urban life. He has a passion for music, especially hip-hop, and is also into art, being a graffiti artist himself. He speaks with an urban and youthful tone,
25
+ reflecting the voice of modern NYC youth. He might occasionally reference modern pop culture, his friends, or his school life.
26
+ If you don't know the answer, just say you're unsure. Don't try to make up an answer.
27
+
28
+ You can make inferences based on the context as long as it aligns with Miles' personality and experiences.
29
+
30
+ Example of your interaction:
31
+
32
+ User: "What did you think of the latest Spider-Man movie?"
33
+ MilesBot: "Haha, watching Spider-Man on screen is always surreal for me. But it's cool to see different takes on the web-slinger's story. Always reminds me of the Spider-Verse!"
34
+
35
+ Example of your response:
36
+
37
+
38
+ ```
39
+ The answer is foo
40
+ ```
41
+
42
+ Begin!
43
+ ----------------
44
+ {context}"""
45
+
46
+ messages = [
47
+ SystemMessagePromptTemplate.from_template(system_template),
48
+ HumanMessagePromptTemplate.from_template("{question}"),
49
+ ]
50
+ prompt = ChatPromptTemplate(messages=messages)
51
+ chain_type_kwargs = {"prompt": prompt}
52
+
53
+ @cl.author_rename
54
+ def rename(orig_author: str):
55
+ rename_dict = {"RetrievalQA": "Crawling the Spiderverse"}
56
+ return rename_dict.get(orig_author, orig_author)
57
+
58
+ @cl.on_chat_start
59
+ async def init():
60
+ msg = cl.Message(content=f"Building Index...")
61
+ await msg.send()
62
+
63
+ # build FAISS index from csv
64
+ loader = CSVLoader(file_path="./data/spiderverse.csv", source_column="Review_Url")
65
+ data = loader.load()
66
+ documents = text_splitter.transform_documents(data)
67
+ store = LocalFileStore("./cache/")
68
+ core_embeddings_model = OpenAIEmbeddings()
69
+ embedder = CacheBackedEmbeddings.from_bytes_store(
70
+ core_embeddings_model, store, namespace=core_embeddings_model.model
71
+ )
72
+ # make async docsearch
73
+ docsearch = await cl.make_async(FAISS.from_documents)(documents, embedder)
74
+
75
+ chain = RetrievalQA.from_chain_type(
76
+ ChatOpenAI(model="gpt-4", temperature=0, streaming=True),
77
+ chain_type="stuff",
78
+ return_source_documents=True,
79
+ retriever=docsearch.as_retriever(),
80
+ chain_type_kwargs = {"prompt": prompt}
81
+ )
82
+
83
+ msg.content = f"Index built!"
84
+ await msg.send()
85
+
86
+ cl.user_session.set("chain", chain)
87
+
88
+
89
+ @cl.on_message
90
+ async def main(message):
91
+ chain = cl.user_session.get("chain")
92
+ cb = cl.AsyncLangchainCallbackHandler(
93
+ stream_final_answer=False, answer_prefix_tokens=["FINAL", "ANSWER"]
94
+ )
95
+ cb.answer_reached = True
96
+ res = await chain.acall(message, callbacks=[cb], )
97
+
98
+ answer = res["result"]
99
+ source_elements = []
100
+ visited_sources = set()
101
+
102
+ # Get the documents from the user session
103
+ docs = res["source_documents"]
104
+ metadatas = [doc.metadata for doc in docs]
105
+ all_sources = [m["source"] for m in metadatas]
106
+
107
+ for source in all_sources:
108
+ if source in visited_sources:
109
+ continue
110
+ visited_sources.add(source)
111
+ # Create the text element referenced in the message
112
+ source_elements.append(
113
+ cl.Text(content="https://www.imdb.com" + source, name="Review URL")
114
+ )
115
+
116
+ if source_elements:
117
+ answer += f"\nSources: {', '.join([e.content.decode('utf-8') for e in source_elements])}"
118
+ else:
119
+ answer += "\nNo sources found"
120
+
121
+ await cl.Message(content=answer, elements=source_elements).send()
chainlit.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Assignment Part 2: Deploying Your Model to a Hugging Face Space
2
+
3
+ Now that you've done the hard work of setting up the RetrievalQA chain and sourcing your documents - let's tie it together in a ChainLit application.
4
+
5
+ ### Duplicating the Space
6
+
7
+ Since this is our first assignment, all you'll need to do is duplicate this space and add your own `OPENAI_API_KEY` as a secret in the space.
8
+
9
+ ### Conclusion
10
+
11
+ Now that you've shipped an LLM-powered application, it's time to share! 🚀
data/spiderverse.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ chainlit==0.6.2
2
+ langchain==0.0.265
3
+ tiktoken==0.4.0
4
+ openai==0.27.8
5
+ faiss-cpu==1.7.4