Spaces:
Running
Running
ClearLove443
commited on
Commit
•
bf12aca
1
Parent(s):
e37420b
add application file
Browse files- .gitignore +161 -0
- .streamlit/config.toml +7 -0
- .vscode/launch.json +16 -0
- LICENSE +21 -0
- requirements.txt +0 -0
- setup.sh +21 -0
- src/Home.py +63 -0
- src/modules/chatbot.py +72 -0
- src/modules/embedder.py +87 -0
- src/modules/history.py +58 -0
- src/modules/layout.py +44 -0
- src/modules/llm.py +28 -0
- src/modules/robby_sheet/table_tool.py +73 -0
- src/modules/sidebar.py +52 -0
- src/modules/utils.py +105 -0
- src/pages/1_📄Robby-Chat.py +100 -0
- src/pages/2_📊 Robby-Sheet (beta).py +77 -0
- src/pages/3_🎬 Robby-Youtube.py +71 -0
- tuto_chatbot_csv.py +73 -0
.gitignore
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
105 |
+
__pypackages__/
|
106 |
+
|
107 |
+
# Celery stuff
|
108 |
+
celerybeat-schedule
|
109 |
+
celerybeat.pid
|
110 |
+
|
111 |
+
# SageMath parsed files
|
112 |
+
*.sage.py
|
113 |
+
|
114 |
+
# Environments
|
115 |
+
|
116 |
+
.venv
|
117 |
+
env/
|
118 |
+
venv/
|
119 |
+
ENV/
|
120 |
+
env.bak/
|
121 |
+
venv.bak/
|
122 |
+
|
123 |
+
# Spyder project settings
|
124 |
+
.spyderproject
|
125 |
+
.spyproject
|
126 |
+
|
127 |
+
# Rope project settings
|
128 |
+
.ropeproject
|
129 |
+
|
130 |
+
# mkdocs documentation
|
131 |
+
/site
|
132 |
+
|
133 |
+
# mypy
|
134 |
+
.mypy_cache/
|
135 |
+
.dmypy.json
|
136 |
+
dmypy.json
|
137 |
+
|
138 |
+
# Pyre type checker
|
139 |
+
.pyre/
|
140 |
+
|
141 |
+
# pytype static type analyzer
|
142 |
+
.pytype/
|
143 |
+
|
144 |
+
# Cython debug symbols
|
145 |
+
cython_debug/
|
146 |
+
|
147 |
+
# PyCharm
|
148 |
+
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
149 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
150 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
151 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
152 |
+
#.idea/
|
153 |
+
|
154 |
+
#venv
|
155 |
+
|
156 |
+
*.pkl
|
157 |
+
*.csv
|
158 |
+
|
159 |
+
.env
|
160 |
+
embeddings/
|
161 |
+
*bk
|
.streamlit/config.toml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base = "light"
|
3 |
+
primaryColor = "#89CFF0"
|
4 |
+
backgroundColor = "#E0F7FE"
|
5 |
+
secondaryBackgroundColor = "#FFFCE4"
|
6 |
+
textColor = "#000000"
|
7 |
+
font = "sans serif"
|
.vscode/launch.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
// Use IntelliSense to learn about possible attributes.
|
3 |
+
// Hover to view descriptions of existing attributes.
|
4 |
+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
5 |
+
"version": "0.2.0",
|
6 |
+
"configurations": [
|
7 |
+
{
|
8 |
+
"name": "Python: Current File",
|
9 |
+
"type": "python",
|
10 |
+
"request": "launch",
|
11 |
+
"program": "${file}",
|
12 |
+
"console": "integratedTerminal",
|
13 |
+
"justMyCode": false
|
14 |
+
}
|
15 |
+
]
|
16 |
+
}
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 yvann-hub
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
requirements.txt
ADDED
Binary file (434 Bytes). View file
|
|
setup.sh
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mkdir -p ~/.streamlit/
|
2 |
+
|
3 |
+
echo "\
|
4 |
+
[general]\n\
|
5 |
+
email = \"[email protected]\"\n\
|
6 |
+
" > ~/.streamlit/credentials.toml
|
7 |
+
|
8 |
+
echo "\
|
9 |
+
[server]\n\
|
10 |
+
headless = true\n\
|
11 |
+
enableCORS=false\n\
|
12 |
+
port = $PORT\n\
|
13 |
+
\n\
|
14 |
+
[theme]\n\
|
15 |
+
base = \"light\"\n\
|
16 |
+
primaryColor = \"#89CFF0\"\n\
|
17 |
+
backgroundColor = \"#E0F7FE\"\n\
|
18 |
+
secondaryBackgroundColor = \"#FFFCE4\"\n\
|
19 |
+
textColor = \"#000000\"\n\
|
20 |
+
font = \"sans serif\"\n\
|
21 |
+
" > ~/.streamlit/config.toml
|
src/Home.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
|
4 |
+
#Config
|
5 |
+
st.set_page_config(layout="wide", page_icon="💬", page_title="Robby | Chat-Bot 🤖")
|
6 |
+
|
7 |
+
|
8 |
+
#Contact
|
9 |
+
with st.sidebar.expander("📬 Contact"):
|
10 |
+
|
11 |
+
st.write("**GitHub:**",
|
12 |
+
"[yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot)")
|
13 |
+
|
14 |
+
st.write("**Medium:** "
|
15 |
+
"[@yvann-hub](https://medium.com/@yvann-hub)")
|
16 |
+
|
17 |
+
st.write("**Twitter:** [@yvann_hub](https://twitter.com/yvann_hub)")
|
18 |
+
st.write("**Mail** : [email protected]")
|
19 |
+
st.write("**Created by Yvann**")
|
20 |
+
|
21 |
+
|
22 |
+
#Title
|
23 |
+
st.markdown(
|
24 |
+
"""
|
25 |
+
<h2 style='text-align: center;'>Robby, your data-aware assistant 🤖</h1>
|
26 |
+
""",
|
27 |
+
unsafe_allow_html=True,)
|
28 |
+
|
29 |
+
st.markdown("---")
|
30 |
+
|
31 |
+
|
32 |
+
#Description
|
33 |
+
st.markdown(
|
34 |
+
"""
|
35 |
+
<h5 style='text-align:center;'>I'm Robby, an intelligent chatbot created by combining
|
36 |
+
the strengths of Langchain and Streamlit. I use large language models to provide
|
37 |
+
context-sensitive interactions. My goal is to help you better understand your data.
|
38 |
+
I support PDF, TXT, CSV, Youtube transcript 🧠</h5>
|
39 |
+
""",
|
40 |
+
unsafe_allow_html=True)
|
41 |
+
st.markdown("---")
|
42 |
+
|
43 |
+
|
44 |
+
#Robby's Pages
|
45 |
+
st.subheader("🚀 Robby's Pages")
|
46 |
+
st.write("""
|
47 |
+
- **Robby-Chat**: General Chat on data (PDF, TXT,CSV) with a [vectorstore](https://github.com/facebookresearch/faiss) (index useful parts(max 4) for respond to the user) | works with [ConversationalRetrievalChain](https://python.langchain.com/en/latest/modules/chains/index_examples/chat_vector_db.html)
|
48 |
+
- **Robby-Sheet** (beta): Chat on tabular data (CSV) | for precise information | process the whole file | works with [CSV_Agent](https://python.langchain.com/en/latest/modules/agents/toolkits/examples/csv.html) + [PandasAI](https://github.com/gventuri/pandas-ai) for data manipulation and graph creation
|
49 |
+
- **Robby-Youtube**: Summarize YouTube videos with [summarize-chain](https://python.langchain.com/en/latest/modules/chains/index_examples/summarize.html)
|
50 |
+
""")
|
51 |
+
st.markdown("---")
|
52 |
+
|
53 |
+
|
54 |
+
#Contributing
|
55 |
+
st.markdown("### 🎯 Contributing")
|
56 |
+
st.markdown("""
|
57 |
+
**Robby is under regular development. Feel free to contribute and help me make it even more data-aware!**
|
58 |
+
""", unsafe_allow_html=True)
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
|
src/modules/chatbot.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# fix Error: module 'langchain' has no attribute 'verbose'
|
2 |
+
import langchain
|
3 |
+
import streamlit as st
|
4 |
+
from langchain.callbacks import get_openai_callback
|
5 |
+
from langchain.chains import ConversationalRetrievalChain
|
6 |
+
from langchain.chat_models import ChatOpenAI
|
7 |
+
from langchain.prompts.prompt import PromptTemplate
|
8 |
+
|
9 |
+
langchain.verbose = False
|
10 |
+
|
11 |
+
|
12 |
+
class Chatbot:
|
13 |
+
def __init__(self, model_name, temperature, vectors):
|
14 |
+
self.model_name = model_name
|
15 |
+
self.temperature = temperature
|
16 |
+
self.vectors = vectors
|
17 |
+
|
18 |
+
qa_template = """
|
19 |
+
You are a helpful AI assistant named Robby. The user gives you a file its content is represented by the following pieces of context, use them to answer the question at the end.
|
20 |
+
If you don't know the answer, just say you don't know. Do NOT try to make up an answer.
|
21 |
+
If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
|
22 |
+
Use as much detail as possible when responding.
|
23 |
+
|
24 |
+
context: {context}
|
25 |
+
=========
|
26 |
+
question: {question}
|
27 |
+
======
|
28 |
+
"""
|
29 |
+
|
30 |
+
QA_PROMPT = PromptTemplate(
|
31 |
+
template=qa_template, input_variables=["context", "question"]
|
32 |
+
)
|
33 |
+
|
34 |
+
def conversational_chat(self, query):
|
35 |
+
"""
|
36 |
+
Start a conversational chat with a model via Langchain
|
37 |
+
"""
|
38 |
+
# llm = ChatOpenAI(model_name=self.model_name, temperature=self.temperature)
|
39 |
+
|
40 |
+
from modules.llm import ChatGLM
|
41 |
+
|
42 |
+
llm = ChatGLM()
|
43 |
+
|
44 |
+
retriever = self.vectors.as_retriever()
|
45 |
+
|
46 |
+
chain = ConversationalRetrievalChain.from_llm(
|
47 |
+
llm=llm,
|
48 |
+
retriever=retriever,
|
49 |
+
verbose=True,
|
50 |
+
return_source_documents=True,
|
51 |
+
max_tokens_limit=4097,
|
52 |
+
combine_docs_chain_kwargs={"prompt": self.QA_PROMPT},
|
53 |
+
)
|
54 |
+
|
55 |
+
chain_input = {"question": query, "chat_history": st.session_state["history"]}
|
56 |
+
with get_openai_callback() as cb:
|
57 |
+
result = chain(chain_input)
|
58 |
+
|
59 |
+
st.session_state["history"].append((query, result["answer"]))
|
60 |
+
# count_tokens_chain(chain, chain_input)
|
61 |
+
st.write(
|
62 |
+
f"###### Tokens used in this conversation : {cb.total_tokens} tokens"
|
63 |
+
)
|
64 |
+
|
65 |
+
return result["answer"]
|
66 |
+
|
67 |
+
|
68 |
+
def count_tokens_chain(chain, query):
|
69 |
+
with get_openai_callback() as cb:
|
70 |
+
result = chain(query)
|
71 |
+
st.write(f"###### Tokens used in this conversation : {cb.total_tokens} tokens")
|
72 |
+
return result
|
src/modules/embedder.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
import tempfile
|
4 |
+
|
5 |
+
from langchain.document_loaders import PyPDFLoader, TextLoader
|
6 |
+
from langchain.document_loaders.csv_loader import CSVLoader
|
7 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
8 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
9 |
+
from langchain.vectorstores import FAISS
|
10 |
+
|
11 |
+
|
12 |
+
class Embedder:
|
13 |
+
def __init__(self):
|
14 |
+
self.PATH = "embeddings"
|
15 |
+
self.createEmbeddingsDir()
|
16 |
+
|
17 |
+
def createEmbeddingsDir(self):
|
18 |
+
"""
|
19 |
+
Creates a directory to store the embeddings vectors
|
20 |
+
"""
|
21 |
+
if not os.path.exists(self.PATH):
|
22 |
+
os.mkdir(self.PATH)
|
23 |
+
|
24 |
+
def storeDocEmbeds(self, file, original_filename):
|
25 |
+
"""
|
26 |
+
Stores document embeddings using Langchain and FAISS
|
27 |
+
"""
|
28 |
+
with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp_file:
|
29 |
+
tmp_file.write(file)
|
30 |
+
tmp_file_path = tmp_file.name
|
31 |
+
|
32 |
+
def get_file_extension(uploaded_file):
|
33 |
+
file_extension = os.path.splitext(uploaded_file)[1].lower()
|
34 |
+
|
35 |
+
return file_extension
|
36 |
+
|
37 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
38 |
+
chunk_size=2000,
|
39 |
+
chunk_overlap=100,
|
40 |
+
length_function=len,
|
41 |
+
)
|
42 |
+
|
43 |
+
file_extension = get_file_extension(original_filename)
|
44 |
+
|
45 |
+
if file_extension == ".csv":
|
46 |
+
loader = CSVLoader(
|
47 |
+
file_path=tmp_file_path,
|
48 |
+
encoding="utf-8",
|
49 |
+
csv_args={
|
50 |
+
"delimiter": ",",
|
51 |
+
},
|
52 |
+
)
|
53 |
+
data = loader.load()
|
54 |
+
|
55 |
+
elif file_extension == ".pdf":
|
56 |
+
loader = PyPDFLoader(file_path=tmp_file_path)
|
57 |
+
data = loader.load_and_split(text_splitter)
|
58 |
+
|
59 |
+
elif file_extension == ".txt":
|
60 |
+
loader = TextLoader(file_path=tmp_file_path, encoding="utf-8")
|
61 |
+
data = loader.load_and_split(text_splitter)
|
62 |
+
|
63 |
+
# embeddings = OpenAIEmbeddings()
|
64 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
65 |
+
|
66 |
+
modelpath = "intfloat/e5-large-v2"
|
67 |
+
embeddings = HuggingFaceEmbeddings(model_name=modelpath)
|
68 |
+
|
69 |
+
vectors = FAISS.from_documents(data, embeddings)
|
70 |
+
os.remove(tmp_file_path)
|
71 |
+
|
72 |
+
# Save the vectors to a pickle file
|
73 |
+
with open(f"{self.PATH}/{original_filename}.pkl", "wb") as f:
|
74 |
+
pickle.dump(vectors, f)
|
75 |
+
|
76 |
+
def getDocEmbeds(self, file, original_filename):
|
77 |
+
"""
|
78 |
+
Retrieves document embeddings
|
79 |
+
"""
|
80 |
+
if not os.path.isfile(f"{self.PATH}/{original_filename}.pkl"):
|
81 |
+
self.storeDocEmbeds(file, original_filename)
|
82 |
+
|
83 |
+
# Load the vectors from the pickle file
|
84 |
+
with open(f"{self.PATH}/{original_filename}.pkl", "rb") as f:
|
85 |
+
vectors = pickle.load(f)
|
86 |
+
|
87 |
+
return vectors
|
src/modules/history.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from streamlit_chat import message
|
4 |
+
|
5 |
+
class ChatHistory:
|
6 |
+
|
7 |
+
def __init__(self):
|
8 |
+
self.history = st.session_state.get("history", [])
|
9 |
+
st.session_state["history"] = self.history
|
10 |
+
|
11 |
+
def default_greeting(self):
|
12 |
+
return "Hey Robby ! 👋"
|
13 |
+
|
14 |
+
def default_prompt(self, topic):
|
15 |
+
return f"Hello ! Ask me anything about {topic} 🤗"
|
16 |
+
|
17 |
+
def initialize_user_history(self):
|
18 |
+
st.session_state["user"] = [self.default_greeting()]
|
19 |
+
|
20 |
+
def initialize_assistant_history(self, uploaded_file):
|
21 |
+
st.session_state["assistant"] = [self.default_prompt(uploaded_file.name)]
|
22 |
+
|
23 |
+
def initialize(self, uploaded_file):
|
24 |
+
if "assistant" not in st.session_state:
|
25 |
+
self.initialize_assistant_history(uploaded_file)
|
26 |
+
if "user" not in st.session_state:
|
27 |
+
self.initialize_user_history()
|
28 |
+
|
29 |
+
def reset(self, uploaded_file):
|
30 |
+
st.session_state["history"] = []
|
31 |
+
|
32 |
+
self.initialize_user_history()
|
33 |
+
self.initialize_assistant_history(uploaded_file)
|
34 |
+
st.session_state["reset_chat"] = False
|
35 |
+
|
36 |
+
def append(self, mode, message):
|
37 |
+
st.session_state[mode].append(message)
|
38 |
+
|
39 |
+
def generate_messages(self, container):
|
40 |
+
if st.session_state["assistant"]:
|
41 |
+
with container:
|
42 |
+
for i in range(len(st.session_state["assistant"])):
|
43 |
+
message(
|
44 |
+
st.session_state["user"][i],
|
45 |
+
is_user=True,
|
46 |
+
key=f"history_{i}_user",
|
47 |
+
avatar_style="big-smile",
|
48 |
+
)
|
49 |
+
message(st.session_state["assistant"][i], key=str(i), avatar_style="thumbs")
|
50 |
+
|
51 |
+
def load(self):
|
52 |
+
if os.path.exists(self.history_file):
|
53 |
+
with open(self.history_file, "r") as f:
|
54 |
+
self.history = f.read().splitlines()
|
55 |
+
|
56 |
+
def save(self):
|
57 |
+
with open(self.history_file, "w") as f:
|
58 |
+
f.write("\n".join(self.history))
|
src/modules/layout.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
class Layout:
|
4 |
+
|
5 |
+
def show_header(self, types_files):
|
6 |
+
"""
|
7 |
+
Displays the header of the app
|
8 |
+
"""
|
9 |
+
st.markdown(
|
10 |
+
f"""
|
11 |
+
<h1 style='text-align: center;'> Ask Robby about your {types_files} files ! 😁</h1>
|
12 |
+
""",
|
13 |
+
unsafe_allow_html=True,
|
14 |
+
)
|
15 |
+
|
16 |
+
def show_api_key_missing(self):
|
17 |
+
"""
|
18 |
+
Displays a message if the user has not entered an API key
|
19 |
+
"""
|
20 |
+
st.markdown(
|
21 |
+
"""
|
22 |
+
<div style='text-align: center;'>
|
23 |
+
<h4>Enter your <a href="https://platform.openai.com/account/api-keys" target="_blank">OpenAI API key</a> to start chatting</h4>
|
24 |
+
</div>
|
25 |
+
""",
|
26 |
+
unsafe_allow_html=True,
|
27 |
+
)
|
28 |
+
|
29 |
+
def prompt_form(self):
|
30 |
+
"""
|
31 |
+
Displays the prompt form
|
32 |
+
"""
|
33 |
+
with st.form(key="my_form", clear_on_submit=True):
|
34 |
+
user_input = st.text_area(
|
35 |
+
"Query:",
|
36 |
+
placeholder="Ask me anything about the document...",
|
37 |
+
key="input",
|
38 |
+
label_visibility="collapsed",
|
39 |
+
)
|
40 |
+
submit_button = st.form_submit_button(label="Send")
|
41 |
+
|
42 |
+
is_ready = submit_button and user_input
|
43 |
+
return is_ready, user_input
|
44 |
+
|
src/modules/llm.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from typing import Any, List, Mapping, Optional
|
3 |
+
|
4 |
+
import requests
|
5 |
+
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
6 |
+
from langchain.llms.base import LLM
|
7 |
+
|
8 |
+
url = "https://openai.proxy.onlyyounotothers.top/chat"
|
9 |
+
headers = {"Content-Type": "application/json"}
|
10 |
+
|
11 |
+
|
12 |
+
class ChatGLM(LLM):
|
13 |
+
@property
|
14 |
+
def _llm_type(self) -> str:
|
15 |
+
return "custom"
|
16 |
+
|
17 |
+
type = "custom"
|
18 |
+
|
19 |
+
# 重写基类方法,根据用户输入的prompt来响应用户,返回字符串
|
20 |
+
def _call(
|
21 |
+
self,
|
22 |
+
prompt: str,
|
23 |
+
stop: Optional[List[str]] = None,
|
24 |
+
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
25 |
+
) -> str:
|
26 |
+
payload = json.dumps({"q": prompt})
|
27 |
+
response = requests.request("POST", url, headers=headers, data=payload)
|
28 |
+
return response.text
|
src/modules/robby_sheet/table_tool.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import sys
|
3 |
+
from io import BytesIO, StringIO
|
4 |
+
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import streamlit as st
|
7 |
+
from langchain.callbacks import get_openai_callback
|
8 |
+
from pandasai import PandasAI
|
9 |
+
from pandasai.llm.openai import OpenAI
|
10 |
+
from streamlit_chat import message
|
11 |
+
|
12 |
+
|
13 |
+
class PandasAgent:
|
14 |
+
@staticmethod
|
15 |
+
def count_tokens_agent(agent, query):
|
16 |
+
"""
|
17 |
+
Count the tokens used by the CSV Agent
|
18 |
+
"""
|
19 |
+
with get_openai_callback() as cb:
|
20 |
+
result = agent(query)
|
21 |
+
st.write(f"Spent a total of {cb.total_tokens} tokens")
|
22 |
+
|
23 |
+
return result
|
24 |
+
|
25 |
+
def __init__(self):
|
26 |
+
pass
|
27 |
+
|
28 |
+
def get_agent_response(self, uploaded_file_content, query):
|
29 |
+
llm = OpenAI()
|
30 |
+
|
31 |
+
# from modules.llm import ChatGLM
|
32 |
+
|
33 |
+
# llm = ChatGLM()
|
34 |
+
pandas_ai = PandasAI(llm, verbose=True)
|
35 |
+
old_stdout = sys.stdout
|
36 |
+
sys.stdout = captured_output = StringIO()
|
37 |
+
|
38 |
+
response = pandas_ai.run(data_frame=uploaded_file_content, prompt=query)
|
39 |
+
fig = plt.gcf()
|
40 |
+
if fig.get_axes():
|
41 |
+
# Adjust the figure size
|
42 |
+
fig.set_size_inches(12, 6)
|
43 |
+
|
44 |
+
# Adjust the layout tightness
|
45 |
+
plt.tight_layout()
|
46 |
+
buf = BytesIO()
|
47 |
+
fig.savefig(buf, format="png")
|
48 |
+
buf.seek(0)
|
49 |
+
st.image(buf, caption="Generated Plot")
|
50 |
+
|
51 |
+
sys.stdout = old_stdout
|
52 |
+
return response, captured_output
|
53 |
+
|
54 |
+
def process_agent_thoughts(self, captured_output):
|
55 |
+
thoughts = captured_output.getvalue()
|
56 |
+
cleaned_thoughts = re.sub(r"\x1b\[[0-9;]*[a-zA-Z]", "", thoughts)
|
57 |
+
cleaned_thoughts = re.sub(r"\[1m>", "", cleaned_thoughts)
|
58 |
+
return cleaned_thoughts
|
59 |
+
|
60 |
+
def display_agent_thoughts(self, cleaned_thoughts):
|
61 |
+
with st.expander("Display the agent's thoughts"):
|
62 |
+
st.write(cleaned_thoughts)
|
63 |
+
|
64 |
+
def update_chat_history(self, query, result):
|
65 |
+
st.session_state.chat_history.append(("user", query))
|
66 |
+
st.session_state.chat_history.append(("agent", result))
|
67 |
+
|
68 |
+
def display_chat_history(self):
|
69 |
+
for i, (sender, message_text) in enumerate(st.session_state.chat_history):
|
70 |
+
if sender == "user":
|
71 |
+
message(message_text, is_user=True, key=f"{i}_user")
|
72 |
+
else:
|
73 |
+
message(message_text, key=f"{i}")
|
src/modules/sidebar.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
class Sidebar:
|
4 |
+
|
5 |
+
MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4"]
|
6 |
+
TEMPERATURE_MIN_VALUE = 0.0
|
7 |
+
TEMPERATURE_MAX_VALUE = 1.0
|
8 |
+
TEMPERATURE_DEFAULT_VALUE = 0.0
|
9 |
+
TEMPERATURE_STEP = 0.01
|
10 |
+
|
11 |
+
@staticmethod
|
12 |
+
def about():
|
13 |
+
about = st.sidebar.expander("🧠 About Robby ")
|
14 |
+
sections = [
|
15 |
+
"#### Robby is an AI chatbot with a conversational memory, designed to allow users to discuss their data in a more intuitive way. 📄",
|
16 |
+
"#### It uses large language models to provide users with natural language interactions about user data content. 🌐",
|
17 |
+
"#### Powered by [Langchain](https://github.com/hwchase17/langchain), [OpenAI](https://platform.openai.com/docs/models/gpt-3-5) and [Streamlit](https://github.com/streamlit/streamlit) ⚡",
|
18 |
+
"#### Source code: [yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot)",
|
19 |
+
]
|
20 |
+
for section in sections:
|
21 |
+
about.write(section)
|
22 |
+
|
23 |
+
@staticmethod
|
24 |
+
def reset_chat_button():
|
25 |
+
if st.button("Reset chat"):
|
26 |
+
st.session_state["reset_chat"] = True
|
27 |
+
st.session_state.setdefault("reset_chat", False)
|
28 |
+
|
29 |
+
def model_selector(self):
|
30 |
+
model = st.selectbox(label="Model", options=self.MODEL_OPTIONS)
|
31 |
+
st.session_state["model"] = model
|
32 |
+
|
33 |
+
def temperature_slider(self):
|
34 |
+
temperature = st.slider(
|
35 |
+
label="Temperature",
|
36 |
+
min_value=self.TEMPERATURE_MIN_VALUE,
|
37 |
+
max_value=self.TEMPERATURE_MAX_VALUE,
|
38 |
+
value=self.TEMPERATURE_DEFAULT_VALUE,
|
39 |
+
step=self.TEMPERATURE_STEP,
|
40 |
+
)
|
41 |
+
st.session_state["temperature"] = temperature
|
42 |
+
|
43 |
+
def show_options(self):
|
44 |
+
with st.sidebar.expander("🛠️ Robby's Tools", expanded=False):
|
45 |
+
|
46 |
+
self.reset_chat_button()
|
47 |
+
self.model_selector()
|
48 |
+
self.temperature_slider()
|
49 |
+
st.session_state.setdefault("model", self.MODEL_OPTIONS[0])
|
50 |
+
st.session_state.setdefault("temperature", self.TEMPERATURE_DEFAULT_VALUE)
|
51 |
+
|
52 |
+
|
src/modules/utils.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import streamlit as st
|
4 |
+
import pdfplumber
|
5 |
+
|
6 |
+
from modules.chatbot import Chatbot
|
7 |
+
from modules.embedder import Embedder
|
8 |
+
|
9 |
+
class Utilities:
|
10 |
+
|
11 |
+
@staticmethod
|
12 |
+
def load_api_key():
|
13 |
+
"""
|
14 |
+
Loads the OpenAI API key from the .env file or
|
15 |
+
from the user's input and returns it
|
16 |
+
"""
|
17 |
+
if not hasattr(st.session_state, "api_key"):
|
18 |
+
st.session_state.api_key = None
|
19 |
+
#you can define your API key in .env directly
|
20 |
+
if os.path.exists(".env") and os.environ.get("OPENAI_API_KEY") is not None:
|
21 |
+
user_api_key = os.environ["OPENAI_API_KEY"]
|
22 |
+
st.sidebar.success("API key loaded from .env", icon="🚀")
|
23 |
+
else:
|
24 |
+
if st.session_state.api_key is not None:
|
25 |
+
user_api_key = st.session_state.api_key
|
26 |
+
st.sidebar.success("API key loaded from previous input", icon="🚀")
|
27 |
+
else:
|
28 |
+
user_api_key = st.sidebar.text_input(
|
29 |
+
label="#### Your OpenAI API key 👇", placeholder="sk-...", type="password"
|
30 |
+
)
|
31 |
+
if user_api_key:
|
32 |
+
st.session_state.api_key = user_api_key
|
33 |
+
|
34 |
+
return user_api_key
|
35 |
+
|
36 |
+
|
37 |
+
@staticmethod
|
38 |
+
def handle_upload(file_types):
|
39 |
+
"""
|
40 |
+
Handles and display uploaded_file
|
41 |
+
:param file_types: List of accepted file types, e.g., ["csv", "pdf", "txt"]
|
42 |
+
"""
|
43 |
+
uploaded_file = st.sidebar.file_uploader("upload", type=file_types, label_visibility="collapsed")
|
44 |
+
if uploaded_file is not None:
|
45 |
+
|
46 |
+
def show_csv_file(uploaded_file):
|
47 |
+
file_container = st.expander("Your CSV file :")
|
48 |
+
uploaded_file.seek(0)
|
49 |
+
shows = pd.read_csv(uploaded_file)
|
50 |
+
file_container.write(shows)
|
51 |
+
|
52 |
+
def show_pdf_file(uploaded_file):
|
53 |
+
file_container = st.expander("Your PDF file :")
|
54 |
+
with pdfplumber.open(uploaded_file) as pdf:
|
55 |
+
pdf_text = ""
|
56 |
+
for page in pdf.pages:
|
57 |
+
pdf_text += page.extract_text() + "\n\n"
|
58 |
+
file_container.write(pdf_text)
|
59 |
+
|
60 |
+
def show_txt_file(uploaded_file):
|
61 |
+
file_container = st.expander("Your TXT file:")
|
62 |
+
uploaded_file.seek(0)
|
63 |
+
content = uploaded_file.read().decode("utf-8")
|
64 |
+
file_container.write(content)
|
65 |
+
|
66 |
+
def get_file_extension(uploaded_file):
|
67 |
+
return os.path.splitext(uploaded_file)[1].lower()
|
68 |
+
|
69 |
+
file_extension = get_file_extension(uploaded_file.name)
|
70 |
+
|
71 |
+
# Show the contents of the file based on its extension
|
72 |
+
#if file_extension == ".csv" :
|
73 |
+
# show_csv_file(uploaded_file)
|
74 |
+
if file_extension== ".pdf" :
|
75 |
+
show_pdf_file(uploaded_file)
|
76 |
+
elif file_extension== ".txt" :
|
77 |
+
show_txt_file(uploaded_file)
|
78 |
+
|
79 |
+
else:
|
80 |
+
st.session_state["reset_chat"] = True
|
81 |
+
|
82 |
+
#print(uploaded_file)
|
83 |
+
return uploaded_file
|
84 |
+
|
85 |
+
@staticmethod
|
86 |
+
def setup_chatbot(uploaded_file, model, temperature):
|
87 |
+
"""
|
88 |
+
Sets up the chatbot with the uploaded file, model, and temperature
|
89 |
+
"""
|
90 |
+
embeds = Embedder()
|
91 |
+
|
92 |
+
with st.spinner("Processing..."):
|
93 |
+
uploaded_file.seek(0)
|
94 |
+
file = uploaded_file.read()
|
95 |
+
# Get the document embeddings for the uploaded file
|
96 |
+
vectors = embeds.getDocEmbeds(file, uploaded_file.name)
|
97 |
+
|
98 |
+
# Create a Chatbot instance with the specified model and temperature
|
99 |
+
chatbot = Chatbot(model, temperature,vectors)
|
100 |
+
st.session_state["ready"] = True
|
101 |
+
|
102 |
+
return chatbot
|
103 |
+
|
104 |
+
|
105 |
+
|
src/pages/1_📄Robby-Chat.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
from io import StringIO
|
4 |
+
import re
|
5 |
+
import sys
|
6 |
+
from modules.history import ChatHistory
|
7 |
+
from modules.layout import Layout
|
8 |
+
from modules.utils import Utilities
|
9 |
+
from modules.sidebar import Sidebar
|
10 |
+
|
11 |
+
#To be able to update the changes made to modules in localhost (press r)
|
12 |
+
def reload_module(module_name):
|
13 |
+
import importlib
|
14 |
+
import sys
|
15 |
+
if module_name in sys.modules:
|
16 |
+
importlib.reload(sys.modules[module_name])
|
17 |
+
return sys.modules[module_name]
|
18 |
+
|
19 |
+
history_module = reload_module('modules.history')
|
20 |
+
layout_module = reload_module('modules.layout')
|
21 |
+
utils_module = reload_module('modules.utils')
|
22 |
+
sidebar_module = reload_module('modules.sidebar')
|
23 |
+
|
24 |
+
ChatHistory = history_module.ChatHistory
|
25 |
+
Layout = layout_module.Layout
|
26 |
+
Utilities = utils_module.Utilities
|
27 |
+
Sidebar = sidebar_module.Sidebar
|
28 |
+
|
29 |
+
st.set_page_config(layout="wide", page_icon="💬", page_title="Robby | Chat-Bot 🤖")
|
30 |
+
|
31 |
+
# Instantiate the main components
|
32 |
+
layout, sidebar, utils = Layout(), Sidebar(), Utilities()
|
33 |
+
|
34 |
+
layout.show_header("PDF, TXT, CSV")
|
35 |
+
|
36 |
+
user_api_key = utils.load_api_key()
|
37 |
+
|
38 |
+
if not user_api_key:
|
39 |
+
layout.show_api_key_missing()
|
40 |
+
else:
|
41 |
+
os.environ["OPENAI_API_KEY"] = user_api_key
|
42 |
+
|
43 |
+
uploaded_file = utils.handle_upload(["pdf", "txt", "csv"])
|
44 |
+
|
45 |
+
if uploaded_file:
|
46 |
+
|
47 |
+
# Configure the sidebar
|
48 |
+
sidebar.show_options()
|
49 |
+
sidebar.about()
|
50 |
+
|
51 |
+
# Initialize chat history
|
52 |
+
history = ChatHistory()
|
53 |
+
try:
|
54 |
+
chatbot = utils.setup_chatbot(
|
55 |
+
uploaded_file, st.session_state["model"], st.session_state["temperature"]
|
56 |
+
)
|
57 |
+
st.session_state["chatbot"] = chatbot
|
58 |
+
|
59 |
+
if st.session_state["ready"]:
|
60 |
+
# Create containers for chat responses and user prompts
|
61 |
+
response_container, prompt_container = st.container(), st.container()
|
62 |
+
|
63 |
+
with prompt_container:
|
64 |
+
# Display the prompt form
|
65 |
+
is_ready, user_input = layout.prompt_form()
|
66 |
+
|
67 |
+
# Initialize the chat history
|
68 |
+
history.initialize(uploaded_file)
|
69 |
+
|
70 |
+
# Reset the chat history if button clicked
|
71 |
+
if st.session_state["reset_chat"]:
|
72 |
+
history.reset(uploaded_file)
|
73 |
+
|
74 |
+
if is_ready:
|
75 |
+
# Update the chat history and display the chat messages
|
76 |
+
history.append("user", user_input)
|
77 |
+
|
78 |
+
old_stdout = sys.stdout
|
79 |
+
sys.stdout = captured_output = StringIO()
|
80 |
+
|
81 |
+
output = st.session_state["chatbot"].conversational_chat(user_input)
|
82 |
+
|
83 |
+
sys.stdout = old_stdout
|
84 |
+
|
85 |
+
history.append("assistant", output)
|
86 |
+
|
87 |
+
# Clean up the agent's thoughts to remove unwanted characters
|
88 |
+
thoughts = captured_output.getvalue()
|
89 |
+
cleaned_thoughts = re.sub(r'\x1b\[[0-9;]*[a-zA-Z]', '', thoughts)
|
90 |
+
cleaned_thoughts = re.sub(r'\[1m>', '', cleaned_thoughts)
|
91 |
+
|
92 |
+
# Display the agent's thoughts
|
93 |
+
with st.expander("Display the agent's thoughts"):
|
94 |
+
st.write(cleaned_thoughts)
|
95 |
+
|
96 |
+
history.generate_messages(response_container)
|
97 |
+
except Exception as e:
|
98 |
+
st.error(f"Error: {str(e)}")
|
99 |
+
|
100 |
+
|
src/pages/2_📊 Robby-Sheet (beta).py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import importlib
|
3 |
+
import sys
|
4 |
+
import pandas as pd
|
5 |
+
import streamlit as st
|
6 |
+
from io import BytesIO
|
7 |
+
from modules.robby_sheet.table_tool import PandasAgent
|
8 |
+
from modules.layout import Layout
|
9 |
+
from modules.utils import Utilities
|
10 |
+
from modules.sidebar import Sidebar
|
11 |
+
|
12 |
+
def reload_module(module_name):
|
13 |
+
"""For update changes
|
14 |
+
made to modules in localhost (press r)"""
|
15 |
+
|
16 |
+
if module_name in sys.modules:
|
17 |
+
importlib.reload(sys.modules[module_name])
|
18 |
+
return sys.modules[module_name]
|
19 |
+
|
20 |
+
table_tool_module = reload_module('modules.robby_sheet.table_tool')
|
21 |
+
layout_module = reload_module('modules.layout')
|
22 |
+
utils_module = reload_module('modules.utils')
|
23 |
+
sidebar_module = reload_module('modules.sidebar')
|
24 |
+
|
25 |
+
|
26 |
+
st.set_page_config(layout="wide", page_icon="💬", page_title="Robby | Chat-Bot 🤖")
|
27 |
+
|
28 |
+
layout, sidebar, utils = Layout(), Sidebar(), Utilities()
|
29 |
+
|
30 |
+
layout.show_header("CSV, Excel")
|
31 |
+
|
32 |
+
user_api_key = utils.load_api_key()
|
33 |
+
os.environ["OPENAI_API_KEY"] = user_api_key
|
34 |
+
|
35 |
+
|
36 |
+
if not user_api_key:
|
37 |
+
layout.show_api_key_missing()
|
38 |
+
|
39 |
+
else:
|
40 |
+
st.session_state.setdefault("reset_chat", False)
|
41 |
+
|
42 |
+
uploaded_file = utils.handle_upload(["csv", "xlsx"])
|
43 |
+
|
44 |
+
if uploaded_file:
|
45 |
+
sidebar.about()
|
46 |
+
|
47 |
+
uploaded_file_content = BytesIO(uploaded_file.getvalue())
|
48 |
+
if uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" or uploaded_file.type == "application/vnd.ms-excel":
|
49 |
+
df = pd.read_excel(uploaded_file_content)
|
50 |
+
else:
|
51 |
+
df = pd.read_csv(uploaded_file_content)
|
52 |
+
|
53 |
+
st.session_state.df = df
|
54 |
+
|
55 |
+
if "chat_history" not in st.session_state:
|
56 |
+
st.session_state["chat_history"] = []
|
57 |
+
csv_agent = PandasAgent()
|
58 |
+
|
59 |
+
with st.form(key="query"):
|
60 |
+
|
61 |
+
query = st.text_input("Ask [PandasAI](https://github.com/gventuri/pandas-ai) (look the pandas-AI read-me for how use it)", value="", type="default",
|
62 |
+
placeholder="e-g : How many rows ? "
|
63 |
+
)
|
64 |
+
submitted_query = st.form_submit_button("Submit")
|
65 |
+
reset_chat_button = st.form_submit_button("Reset Chat")
|
66 |
+
if reset_chat_button:
|
67 |
+
st.session_state["chat_history"] = []
|
68 |
+
if submitted_query:
|
69 |
+
result, captured_output = csv_agent.get_agent_response(df, query)
|
70 |
+
cleaned_thoughts = csv_agent.process_agent_thoughts(captured_output)
|
71 |
+
csv_agent.display_agent_thoughts(cleaned_thoughts)
|
72 |
+
csv_agent.update_chat_history(query, result)
|
73 |
+
csv_agent.display_chat_history()
|
74 |
+
if st.session_state.df is not None:
|
75 |
+
st.subheader("Current dataframe:")
|
76 |
+
st.write(st.session_state.df)
|
77 |
+
|
src/pages/3_🎬 Robby-Youtube.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import streamlit as st
|
3 |
+
import re
|
4 |
+
from modules.layout import Layout
|
5 |
+
from modules.utils import Utilities
|
6 |
+
from modules.sidebar import Sidebar
|
7 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
8 |
+
from langchain.chains.summarize import load_summarize_chain
|
9 |
+
from langchain.chains import AnalyzeDocumentChain
|
10 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
11 |
+
from langchain.llms import OpenAI
|
12 |
+
import os
|
13 |
+
from langchain.text_splitter import CharacterTextSplitter
|
14 |
+
|
15 |
+
st.set_page_config(layout="wide", page_icon="💬", page_title="Robby | Chat-Bot 🤖")
|
16 |
+
|
17 |
+
# Instantiate the main components
|
18 |
+
layout, sidebar, utils = Layout(), Sidebar(), Utilities()
|
19 |
+
|
20 |
+
st.markdown(
|
21 |
+
f"""
|
22 |
+
<h1 style='text-align: center;'> Ask Robby to summarize youtube video ! 😁</h1>
|
23 |
+
""",
|
24 |
+
unsafe_allow_html=True,
|
25 |
+
)
|
26 |
+
|
27 |
+
user_api_key = utils.load_api_key()
|
28 |
+
|
29 |
+
sidebar.about()
|
30 |
+
|
31 |
+
if not user_api_key:
|
32 |
+
layout.show_api_key_missing()
|
33 |
+
|
34 |
+
else:
|
35 |
+
os.environ["OPENAI_API_KEY"] = user_api_key
|
36 |
+
|
37 |
+
script_docs = []
|
38 |
+
|
39 |
+
def get_youtube_id(url):
|
40 |
+
video_id = None
|
41 |
+
match = re.search(r"(?<=v=)[^&#]+", url)
|
42 |
+
if match :
|
43 |
+
video_id = match.group()
|
44 |
+
else :
|
45 |
+
match = re.search(r"(?<=youtu.be/)[^&#]+", url)
|
46 |
+
if match :
|
47 |
+
video_id = match.group()
|
48 |
+
return video_id
|
49 |
+
|
50 |
+
video_url = st.text_input(placeholder="Enter Youtube Video URL", label_visibility="hidden", label =" ")
|
51 |
+
if video_url :
|
52 |
+
video_id = get_youtube_id(video_url)
|
53 |
+
|
54 |
+
if video_id != "":
|
55 |
+
t = YouTubeTranscriptApi.get_transcript(video_id, languages=('en','fr','es', 'zh-cn', 'hi', 'ar', 'bn', 'ru', 'pt', 'sw' ))
|
56 |
+
finalString = ""
|
57 |
+
for item in t:
|
58 |
+
text = item['text']
|
59 |
+
finalString += text + " "
|
60 |
+
|
61 |
+
text_splitter = CharacterTextSplitter()
|
62 |
+
chunks = text_splitter.split_text(finalString)
|
63 |
+
|
64 |
+
summary_chain = load_summarize_chain(OpenAI(temperature=0),
|
65 |
+
chain_type="map_reduce",verbose=True)
|
66 |
+
|
67 |
+
summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain)
|
68 |
+
|
69 |
+
answer = summarize_document_chain.run(chunks)
|
70 |
+
|
71 |
+
st.subheader(answer)
|
tuto_chatbot_csv.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#pip install streamlit langchain openai faiss-cpu tiktoken
|
2 |
+
|
3 |
+
import streamlit as st
|
4 |
+
from streamlit_chat import message
|
5 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
6 |
+
from langchain.chat_models import ChatOpenAI
|
7 |
+
from langchain.chains import ConversationalRetrievalChain
|
8 |
+
from langchain.document_loaders.csv_loader import CSVLoader
|
9 |
+
from langchain.vectorstores import FAISS
|
10 |
+
import tempfile
|
11 |
+
|
12 |
+
|
13 |
+
user_api_key = st.sidebar.text_input(
|
14 |
+
label="#### Your OpenAI API key 👇",
|
15 |
+
placeholder="Paste your openAI API key, sk-",
|
16 |
+
type="password")
|
17 |
+
|
18 |
+
uploaded_file = st.sidebar.file_uploader("upload", type="csv")
|
19 |
+
|
20 |
+
if uploaded_file :
|
21 |
+
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
|
22 |
+
tmp_file.write(uploaded_file.getvalue())
|
23 |
+
tmp_file_path = tmp_file.name
|
24 |
+
|
25 |
+
loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8")
|
26 |
+
data = loader.load()
|
27 |
+
|
28 |
+
embeddings = OpenAIEmbeddings()
|
29 |
+
vectors = FAISS.from_documents(data, embeddings)
|
30 |
+
|
31 |
+
chain = ConversationalRetrievalChain.from_llm(llm = ChatOpenAI(temperature=0.0,model_name='gpt-3.5-turbo', openai_api_key=user_api_key),
|
32 |
+
retriever=vectors.as_retriever())
|
33 |
+
|
34 |
+
def conversational_chat(query):
|
35 |
+
|
36 |
+
result = chain({"question": query, "chat_history": st.session_state['history']})
|
37 |
+
st.session_state['history'].append((query, result["answer"]))
|
38 |
+
|
39 |
+
return result["answer"]
|
40 |
+
|
41 |
+
if 'history' not in st.session_state:
|
42 |
+
st.session_state['history'] = []
|
43 |
+
|
44 |
+
if 'generated' not in st.session_state:
|
45 |
+
st.session_state['generated'] = ["Hello ! Ask me anything about " + uploaded_file.name + " 🤗"]
|
46 |
+
|
47 |
+
if 'past' not in st.session_state:
|
48 |
+
st.session_state['past'] = ["Hey ! 👋"]
|
49 |
+
|
50 |
+
#container for the chat history
|
51 |
+
response_container = st.container()
|
52 |
+
#container for the user's text input
|
53 |
+
container = st.container()
|
54 |
+
|
55 |
+
with container:
|
56 |
+
with st.form(key='my_form', clear_on_submit=True):
|
57 |
+
|
58 |
+
user_input = st.text_input("Query:", placeholder="Talk about your csv data here (:", key='input')
|
59 |
+
submit_button = st.form_submit_button(label='Send')
|
60 |
+
|
61 |
+
if submit_button and user_input:
|
62 |
+
output = conversational_chat(user_input)
|
63 |
+
|
64 |
+
st.session_state['past'].append(user_input)
|
65 |
+
st.session_state['generated'].append(output)
|
66 |
+
|
67 |
+
if st.session_state['generated']:
|
68 |
+
with response_container:
|
69 |
+
for i in range(len(st.session_state['generated'])):
|
70 |
+
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
|
71 |
+
message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")
|
72 |
+
|
73 |
+
#streamlit run tuto_chatbot_csv.py
|