ClearLove443 commited on
Commit
bf12aca
1 Parent(s): e37420b

add application file

Browse files
.gitignore ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105
+ __pypackages__/
106
+
107
+ # Celery stuff
108
+ celerybeat-schedule
109
+ celerybeat.pid
110
+
111
+ # SageMath parsed files
112
+ *.sage.py
113
+
114
+ # Environments
115
+
116
+ .venv
117
+ env/
118
+ venv/
119
+ ENV/
120
+ env.bak/
121
+ venv.bak/
122
+
123
+ # Spyder project settings
124
+ .spyderproject
125
+ .spyproject
126
+
127
+ # Rope project settings
128
+ .ropeproject
129
+
130
+ # mkdocs documentation
131
+ /site
132
+
133
+ # mypy
134
+ .mypy_cache/
135
+ .dmypy.json
136
+ dmypy.json
137
+
138
+ # Pyre type checker
139
+ .pyre/
140
+
141
+ # pytype static type analyzer
142
+ .pytype/
143
+
144
+ # Cython debug symbols
145
+ cython_debug/
146
+
147
+ # PyCharm
148
+ # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
149
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
150
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
151
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
152
+ #.idea/
153
+
154
+ #venv
155
+
156
+ *.pkl
157
+ *.csv
158
+
159
+ .env
160
+ embeddings/
161
+ *bk
.streamlit/config.toml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ base = "light"
3
+ primaryColor = "#89CFF0"
4
+ backgroundColor = "#E0F7FE"
5
+ secondaryBackgroundColor = "#FFFCE4"
6
+ textColor = "#000000"
7
+ font = "sans serif"
.vscode/launch.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+ {
8
+ "name": "Python: Current File",
9
+ "type": "python",
10
+ "request": "launch",
11
+ "program": "${file}",
12
+ "console": "integratedTerminal",
13
+ "justMyCode": false
14
+ }
15
+ ]
16
+ }
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 yvann-hub
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
requirements.txt ADDED
Binary file (434 Bytes). View file
 
setup.sh ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mkdir -p ~/.streamlit/
2
+
3
+ echo "\
4
+ [general]\n\
5
+ email = \"[email protected]\"\n\
6
+ " > ~/.streamlit/credentials.toml
7
+
8
+ echo "\
9
+ [server]\n\
10
+ headless = true\n\
11
+ enableCORS=false\n\
12
+ port = $PORT\n\
13
+ \n\
14
+ [theme]\n\
15
+ base = \"light\"\n\
16
+ primaryColor = \"#89CFF0\"\n\
17
+ backgroundColor = \"#E0F7FE\"\n\
18
+ secondaryBackgroundColor = \"#FFFCE4\"\n\
19
+ textColor = \"#000000\"\n\
20
+ font = \"sans serif\"\n\
21
+ " > ~/.streamlit/config.toml
src/Home.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+ #Config
5
+ st.set_page_config(layout="wide", page_icon="💬", page_title="Robby | Chat-Bot 🤖")
6
+
7
+
8
+ #Contact
9
+ with st.sidebar.expander("📬 Contact"):
10
+
11
+ st.write("**GitHub:**",
12
+ "[yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot)")
13
+
14
+ st.write("**Medium:** "
15
+ "[@yvann-hub](https://medium.com/@yvann-hub)")
16
+
17
+ st.write("**Twitter:** [@yvann_hub](https://twitter.com/yvann_hub)")
18
+ st.write("**Mail** : [email protected]")
19
+ st.write("**Created by Yvann**")
20
+
21
+
22
+ #Title
23
+ st.markdown(
24
+ """
25
+ <h2 style='text-align: center;'>Robby, your data-aware assistant 🤖</h1>
26
+ """,
27
+ unsafe_allow_html=True,)
28
+
29
+ st.markdown("---")
30
+
31
+
32
+ #Description
33
+ st.markdown(
34
+ """
35
+ <h5 style='text-align:center;'>I'm Robby, an intelligent chatbot created by combining
36
+ the strengths of Langchain and Streamlit. I use large language models to provide
37
+ context-sensitive interactions. My goal is to help you better understand your data.
38
+ I support PDF, TXT, CSV, Youtube transcript 🧠</h5>
39
+ """,
40
+ unsafe_allow_html=True)
41
+ st.markdown("---")
42
+
43
+
44
+ #Robby's Pages
45
+ st.subheader("🚀 Robby's Pages")
46
+ st.write("""
47
+ - **Robby-Chat**: General Chat on data (PDF, TXT,CSV) with a [vectorstore](https://github.com/facebookresearch/faiss) (index useful parts(max 4) for respond to the user) | works with [ConversationalRetrievalChain](https://python.langchain.com/en/latest/modules/chains/index_examples/chat_vector_db.html)
48
+ - **Robby-Sheet** (beta): Chat on tabular data (CSV) | for precise information | process the whole file | works with [CSV_Agent](https://python.langchain.com/en/latest/modules/agents/toolkits/examples/csv.html) + [PandasAI](https://github.com/gventuri/pandas-ai) for data manipulation and graph creation
49
+ - **Robby-Youtube**: Summarize YouTube videos with [summarize-chain](https://python.langchain.com/en/latest/modules/chains/index_examples/summarize.html)
50
+ """)
51
+ st.markdown("---")
52
+
53
+
54
+ #Contributing
55
+ st.markdown("### 🎯 Contributing")
56
+ st.markdown("""
57
+ **Robby is under regular development. Feel free to contribute and help me make it even more data-aware!**
58
+ """, unsafe_allow_html=True)
59
+
60
+
61
+
62
+
63
+
src/modules/chatbot.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # fix Error: module 'langchain' has no attribute 'verbose'
2
+ import langchain
3
+ import streamlit as st
4
+ from langchain.callbacks import get_openai_callback
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.prompts.prompt import PromptTemplate
8
+
9
+ langchain.verbose = False
10
+
11
+
12
+ class Chatbot:
13
+ def __init__(self, model_name, temperature, vectors):
14
+ self.model_name = model_name
15
+ self.temperature = temperature
16
+ self.vectors = vectors
17
+
18
+ qa_template = """
19
+ You are a helpful AI assistant named Robby. The user gives you a file its content is represented by the following pieces of context, use them to answer the question at the end.
20
+ If you don't know the answer, just say you don't know. Do NOT try to make up an answer.
21
+ If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.
22
+ Use as much detail as possible when responding.
23
+
24
+ context: {context}
25
+ =========
26
+ question: {question}
27
+ ======
28
+ """
29
+
30
+ QA_PROMPT = PromptTemplate(
31
+ template=qa_template, input_variables=["context", "question"]
32
+ )
33
+
34
+ def conversational_chat(self, query):
35
+ """
36
+ Start a conversational chat with a model via Langchain
37
+ """
38
+ # llm = ChatOpenAI(model_name=self.model_name, temperature=self.temperature)
39
+
40
+ from modules.llm import ChatGLM
41
+
42
+ llm = ChatGLM()
43
+
44
+ retriever = self.vectors.as_retriever()
45
+
46
+ chain = ConversationalRetrievalChain.from_llm(
47
+ llm=llm,
48
+ retriever=retriever,
49
+ verbose=True,
50
+ return_source_documents=True,
51
+ max_tokens_limit=4097,
52
+ combine_docs_chain_kwargs={"prompt": self.QA_PROMPT},
53
+ )
54
+
55
+ chain_input = {"question": query, "chat_history": st.session_state["history"]}
56
+ with get_openai_callback() as cb:
57
+ result = chain(chain_input)
58
+
59
+ st.session_state["history"].append((query, result["answer"]))
60
+ # count_tokens_chain(chain, chain_input)
61
+ st.write(
62
+ f"###### Tokens used in this conversation : {cb.total_tokens} tokens"
63
+ )
64
+
65
+ return result["answer"]
66
+
67
+
68
+ def count_tokens_chain(chain, query):
69
+ with get_openai_callback() as cb:
70
+ result = chain(query)
71
+ st.write(f"###### Tokens used in this conversation : {cb.total_tokens} tokens")
72
+ return result
src/modules/embedder.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ import tempfile
4
+
5
+ from langchain.document_loaders import PyPDFLoader, TextLoader
6
+ from langchain.document_loaders.csv_loader import CSVLoader
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.vectorstores import FAISS
10
+
11
+
12
+ class Embedder:
13
+ def __init__(self):
14
+ self.PATH = "embeddings"
15
+ self.createEmbeddingsDir()
16
+
17
+ def createEmbeddingsDir(self):
18
+ """
19
+ Creates a directory to store the embeddings vectors
20
+ """
21
+ if not os.path.exists(self.PATH):
22
+ os.mkdir(self.PATH)
23
+
24
+ def storeDocEmbeds(self, file, original_filename):
25
+ """
26
+ Stores document embeddings using Langchain and FAISS
27
+ """
28
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tmp_file:
29
+ tmp_file.write(file)
30
+ tmp_file_path = tmp_file.name
31
+
32
+ def get_file_extension(uploaded_file):
33
+ file_extension = os.path.splitext(uploaded_file)[1].lower()
34
+
35
+ return file_extension
36
+
37
+ text_splitter = RecursiveCharacterTextSplitter(
38
+ chunk_size=2000,
39
+ chunk_overlap=100,
40
+ length_function=len,
41
+ )
42
+
43
+ file_extension = get_file_extension(original_filename)
44
+
45
+ if file_extension == ".csv":
46
+ loader = CSVLoader(
47
+ file_path=tmp_file_path,
48
+ encoding="utf-8",
49
+ csv_args={
50
+ "delimiter": ",",
51
+ },
52
+ )
53
+ data = loader.load()
54
+
55
+ elif file_extension == ".pdf":
56
+ loader = PyPDFLoader(file_path=tmp_file_path)
57
+ data = loader.load_and_split(text_splitter)
58
+
59
+ elif file_extension == ".txt":
60
+ loader = TextLoader(file_path=tmp_file_path, encoding="utf-8")
61
+ data = loader.load_and_split(text_splitter)
62
+
63
+ # embeddings = OpenAIEmbeddings()
64
+ from langchain.embeddings import HuggingFaceEmbeddings
65
+
66
+ modelpath = "intfloat/e5-large-v2"
67
+ embeddings = HuggingFaceEmbeddings(model_name=modelpath)
68
+
69
+ vectors = FAISS.from_documents(data, embeddings)
70
+ os.remove(tmp_file_path)
71
+
72
+ # Save the vectors to a pickle file
73
+ with open(f"{self.PATH}/{original_filename}.pkl", "wb") as f:
74
+ pickle.dump(vectors, f)
75
+
76
+ def getDocEmbeds(self, file, original_filename):
77
+ """
78
+ Retrieves document embeddings
79
+ """
80
+ if not os.path.isfile(f"{self.PATH}/{original_filename}.pkl"):
81
+ self.storeDocEmbeds(file, original_filename)
82
+
83
+ # Load the vectors from the pickle file
84
+ with open(f"{self.PATH}/{original_filename}.pkl", "rb") as f:
85
+ vectors = pickle.load(f)
86
+
87
+ return vectors
src/modules/history.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from streamlit_chat import message
4
+
5
+ class ChatHistory:
6
+
7
+ def __init__(self):
8
+ self.history = st.session_state.get("history", [])
9
+ st.session_state["history"] = self.history
10
+
11
+ def default_greeting(self):
12
+ return "Hey Robby ! 👋"
13
+
14
+ def default_prompt(self, topic):
15
+ return f"Hello ! Ask me anything about {topic} 🤗"
16
+
17
+ def initialize_user_history(self):
18
+ st.session_state["user"] = [self.default_greeting()]
19
+
20
+ def initialize_assistant_history(self, uploaded_file):
21
+ st.session_state["assistant"] = [self.default_prompt(uploaded_file.name)]
22
+
23
+ def initialize(self, uploaded_file):
24
+ if "assistant" not in st.session_state:
25
+ self.initialize_assistant_history(uploaded_file)
26
+ if "user" not in st.session_state:
27
+ self.initialize_user_history()
28
+
29
+ def reset(self, uploaded_file):
30
+ st.session_state["history"] = []
31
+
32
+ self.initialize_user_history()
33
+ self.initialize_assistant_history(uploaded_file)
34
+ st.session_state["reset_chat"] = False
35
+
36
+ def append(self, mode, message):
37
+ st.session_state[mode].append(message)
38
+
39
+ def generate_messages(self, container):
40
+ if st.session_state["assistant"]:
41
+ with container:
42
+ for i in range(len(st.session_state["assistant"])):
43
+ message(
44
+ st.session_state["user"][i],
45
+ is_user=True,
46
+ key=f"history_{i}_user",
47
+ avatar_style="big-smile",
48
+ )
49
+ message(st.session_state["assistant"][i], key=str(i), avatar_style="thumbs")
50
+
51
+ def load(self):
52
+ if os.path.exists(self.history_file):
53
+ with open(self.history_file, "r") as f:
54
+ self.history = f.read().splitlines()
55
+
56
+ def save(self):
57
+ with open(self.history_file, "w") as f:
58
+ f.write("\n".join(self.history))
src/modules/layout.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ class Layout:
4
+
5
+ def show_header(self, types_files):
6
+ """
7
+ Displays the header of the app
8
+ """
9
+ st.markdown(
10
+ f"""
11
+ <h1 style='text-align: center;'> Ask Robby about your {types_files} files ! 😁</h1>
12
+ """,
13
+ unsafe_allow_html=True,
14
+ )
15
+
16
+ def show_api_key_missing(self):
17
+ """
18
+ Displays a message if the user has not entered an API key
19
+ """
20
+ st.markdown(
21
+ """
22
+ <div style='text-align: center;'>
23
+ <h4>Enter your <a href="https://platform.openai.com/account/api-keys" target="_blank">OpenAI API key</a> to start chatting</h4>
24
+ </div>
25
+ """,
26
+ unsafe_allow_html=True,
27
+ )
28
+
29
+ def prompt_form(self):
30
+ """
31
+ Displays the prompt form
32
+ """
33
+ with st.form(key="my_form", clear_on_submit=True):
34
+ user_input = st.text_area(
35
+ "Query:",
36
+ placeholder="Ask me anything about the document...",
37
+ key="input",
38
+ label_visibility="collapsed",
39
+ )
40
+ submit_button = st.form_submit_button(label="Send")
41
+
42
+ is_ready = submit_button and user_input
43
+ return is_ready, user_input
44
+
src/modules/llm.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Any, List, Mapping, Optional
3
+
4
+ import requests
5
+ from langchain.callbacks.manager import CallbackManagerForLLMRun
6
+ from langchain.llms.base import LLM
7
+
8
+ url = "https://openai.proxy.onlyyounotothers.top/chat"
9
+ headers = {"Content-Type": "application/json"}
10
+
11
+
12
+ class ChatGLM(LLM):
13
+ @property
14
+ def _llm_type(self) -> str:
15
+ return "custom"
16
+
17
+ type = "custom"
18
+
19
+ # 重写基类方法,根据用户输入的prompt来响应用户,返回字符串
20
+ def _call(
21
+ self,
22
+ prompt: str,
23
+ stop: Optional[List[str]] = None,
24
+ run_manager: Optional[CallbackManagerForLLMRun] = None,
25
+ ) -> str:
26
+ payload = json.dumps({"q": prompt})
27
+ response = requests.request("POST", url, headers=headers, data=payload)
28
+ return response.text
src/modules/robby_sheet/table_tool.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import sys
3
+ from io import BytesIO, StringIO
4
+
5
+ import matplotlib.pyplot as plt
6
+ import streamlit as st
7
+ from langchain.callbacks import get_openai_callback
8
+ from pandasai import PandasAI
9
+ from pandasai.llm.openai import OpenAI
10
+ from streamlit_chat import message
11
+
12
+
13
+ class PandasAgent:
14
+ @staticmethod
15
+ def count_tokens_agent(agent, query):
16
+ """
17
+ Count the tokens used by the CSV Agent
18
+ """
19
+ with get_openai_callback() as cb:
20
+ result = agent(query)
21
+ st.write(f"Spent a total of {cb.total_tokens} tokens")
22
+
23
+ return result
24
+
25
+ def __init__(self):
26
+ pass
27
+
28
+ def get_agent_response(self, uploaded_file_content, query):
29
+ llm = OpenAI()
30
+
31
+ # from modules.llm import ChatGLM
32
+
33
+ # llm = ChatGLM()
34
+ pandas_ai = PandasAI(llm, verbose=True)
35
+ old_stdout = sys.stdout
36
+ sys.stdout = captured_output = StringIO()
37
+
38
+ response = pandas_ai.run(data_frame=uploaded_file_content, prompt=query)
39
+ fig = plt.gcf()
40
+ if fig.get_axes():
41
+ # Adjust the figure size
42
+ fig.set_size_inches(12, 6)
43
+
44
+ # Adjust the layout tightness
45
+ plt.tight_layout()
46
+ buf = BytesIO()
47
+ fig.savefig(buf, format="png")
48
+ buf.seek(0)
49
+ st.image(buf, caption="Generated Plot")
50
+
51
+ sys.stdout = old_stdout
52
+ return response, captured_output
53
+
54
+ def process_agent_thoughts(self, captured_output):
55
+ thoughts = captured_output.getvalue()
56
+ cleaned_thoughts = re.sub(r"\x1b\[[0-9;]*[a-zA-Z]", "", thoughts)
57
+ cleaned_thoughts = re.sub(r"\[1m>", "", cleaned_thoughts)
58
+ return cleaned_thoughts
59
+
60
+ def display_agent_thoughts(self, cleaned_thoughts):
61
+ with st.expander("Display the agent's thoughts"):
62
+ st.write(cleaned_thoughts)
63
+
64
+ def update_chat_history(self, query, result):
65
+ st.session_state.chat_history.append(("user", query))
66
+ st.session_state.chat_history.append(("agent", result))
67
+
68
+ def display_chat_history(self):
69
+ for i, (sender, message_text) in enumerate(st.session_state.chat_history):
70
+ if sender == "user":
71
+ message(message_text, is_user=True, key=f"{i}_user")
72
+ else:
73
+ message(message_text, key=f"{i}")
src/modules/sidebar.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ class Sidebar:
4
+
5
+ MODEL_OPTIONS = ["gpt-3.5-turbo", "gpt-4"]
6
+ TEMPERATURE_MIN_VALUE = 0.0
7
+ TEMPERATURE_MAX_VALUE = 1.0
8
+ TEMPERATURE_DEFAULT_VALUE = 0.0
9
+ TEMPERATURE_STEP = 0.01
10
+
11
+ @staticmethod
12
+ def about():
13
+ about = st.sidebar.expander("🧠 About Robby ")
14
+ sections = [
15
+ "#### Robby is an AI chatbot with a conversational memory, designed to allow users to discuss their data in a more intuitive way. 📄",
16
+ "#### It uses large language models to provide users with natural language interactions about user data content. 🌐",
17
+ "#### Powered by [Langchain](https://github.com/hwchase17/langchain), [OpenAI](https://platform.openai.com/docs/models/gpt-3-5) and [Streamlit](https://github.com/streamlit/streamlit) ⚡",
18
+ "#### Source code: [yvann-hub/Robby-chatbot](https://github.com/yvann-hub/Robby-chatbot)",
19
+ ]
20
+ for section in sections:
21
+ about.write(section)
22
+
23
+ @staticmethod
24
+ def reset_chat_button():
25
+ if st.button("Reset chat"):
26
+ st.session_state["reset_chat"] = True
27
+ st.session_state.setdefault("reset_chat", False)
28
+
29
+ def model_selector(self):
30
+ model = st.selectbox(label="Model", options=self.MODEL_OPTIONS)
31
+ st.session_state["model"] = model
32
+
33
+ def temperature_slider(self):
34
+ temperature = st.slider(
35
+ label="Temperature",
36
+ min_value=self.TEMPERATURE_MIN_VALUE,
37
+ max_value=self.TEMPERATURE_MAX_VALUE,
38
+ value=self.TEMPERATURE_DEFAULT_VALUE,
39
+ step=self.TEMPERATURE_STEP,
40
+ )
41
+ st.session_state["temperature"] = temperature
42
+
43
+ def show_options(self):
44
+ with st.sidebar.expander("🛠️ Robby's Tools", expanded=False):
45
+
46
+ self.reset_chat_button()
47
+ self.model_selector()
48
+ self.temperature_slider()
49
+ st.session_state.setdefault("model", self.MODEL_OPTIONS[0])
50
+ st.session_state.setdefault("temperature", self.TEMPERATURE_DEFAULT_VALUE)
51
+
52
+
src/modules/utils.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import streamlit as st
4
+ import pdfplumber
5
+
6
+ from modules.chatbot import Chatbot
7
+ from modules.embedder import Embedder
8
+
9
+ class Utilities:
10
+
11
+ @staticmethod
12
+ def load_api_key():
13
+ """
14
+ Loads the OpenAI API key from the .env file or
15
+ from the user's input and returns it
16
+ """
17
+ if not hasattr(st.session_state, "api_key"):
18
+ st.session_state.api_key = None
19
+ #you can define your API key in .env directly
20
+ if os.path.exists(".env") and os.environ.get("OPENAI_API_KEY") is not None:
21
+ user_api_key = os.environ["OPENAI_API_KEY"]
22
+ st.sidebar.success("API key loaded from .env", icon="🚀")
23
+ else:
24
+ if st.session_state.api_key is not None:
25
+ user_api_key = st.session_state.api_key
26
+ st.sidebar.success("API key loaded from previous input", icon="🚀")
27
+ else:
28
+ user_api_key = st.sidebar.text_input(
29
+ label="#### Your OpenAI API key 👇", placeholder="sk-...", type="password"
30
+ )
31
+ if user_api_key:
32
+ st.session_state.api_key = user_api_key
33
+
34
+ return user_api_key
35
+
36
+
37
+ @staticmethod
38
+ def handle_upload(file_types):
39
+ """
40
+ Handles and display uploaded_file
41
+ :param file_types: List of accepted file types, e.g., ["csv", "pdf", "txt"]
42
+ """
43
+ uploaded_file = st.sidebar.file_uploader("upload", type=file_types, label_visibility="collapsed")
44
+ if uploaded_file is not None:
45
+
46
+ def show_csv_file(uploaded_file):
47
+ file_container = st.expander("Your CSV file :")
48
+ uploaded_file.seek(0)
49
+ shows = pd.read_csv(uploaded_file)
50
+ file_container.write(shows)
51
+
52
+ def show_pdf_file(uploaded_file):
53
+ file_container = st.expander("Your PDF file :")
54
+ with pdfplumber.open(uploaded_file) as pdf:
55
+ pdf_text = ""
56
+ for page in pdf.pages:
57
+ pdf_text += page.extract_text() + "\n\n"
58
+ file_container.write(pdf_text)
59
+
60
+ def show_txt_file(uploaded_file):
61
+ file_container = st.expander("Your TXT file:")
62
+ uploaded_file.seek(0)
63
+ content = uploaded_file.read().decode("utf-8")
64
+ file_container.write(content)
65
+
66
+ def get_file_extension(uploaded_file):
67
+ return os.path.splitext(uploaded_file)[1].lower()
68
+
69
+ file_extension = get_file_extension(uploaded_file.name)
70
+
71
+ # Show the contents of the file based on its extension
72
+ #if file_extension == ".csv" :
73
+ # show_csv_file(uploaded_file)
74
+ if file_extension== ".pdf" :
75
+ show_pdf_file(uploaded_file)
76
+ elif file_extension== ".txt" :
77
+ show_txt_file(uploaded_file)
78
+
79
+ else:
80
+ st.session_state["reset_chat"] = True
81
+
82
+ #print(uploaded_file)
83
+ return uploaded_file
84
+
85
+ @staticmethod
86
+ def setup_chatbot(uploaded_file, model, temperature):
87
+ """
88
+ Sets up the chatbot with the uploaded file, model, and temperature
89
+ """
90
+ embeds = Embedder()
91
+
92
+ with st.spinner("Processing..."):
93
+ uploaded_file.seek(0)
94
+ file = uploaded_file.read()
95
+ # Get the document embeddings for the uploaded file
96
+ vectors = embeds.getDocEmbeds(file, uploaded_file.name)
97
+
98
+ # Create a Chatbot instance with the specified model and temperature
99
+ chatbot = Chatbot(model, temperature,vectors)
100
+ st.session_state["ready"] = True
101
+
102
+ return chatbot
103
+
104
+
105
+
src/pages/1_📄Robby-Chat.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from io import StringIO
4
+ import re
5
+ import sys
6
+ from modules.history import ChatHistory
7
+ from modules.layout import Layout
8
+ from modules.utils import Utilities
9
+ from modules.sidebar import Sidebar
10
+
11
+ #To be able to update the changes made to modules in localhost (press r)
12
+ def reload_module(module_name):
13
+ import importlib
14
+ import sys
15
+ if module_name in sys.modules:
16
+ importlib.reload(sys.modules[module_name])
17
+ return sys.modules[module_name]
18
+
19
+ history_module = reload_module('modules.history')
20
+ layout_module = reload_module('modules.layout')
21
+ utils_module = reload_module('modules.utils')
22
+ sidebar_module = reload_module('modules.sidebar')
23
+
24
+ ChatHistory = history_module.ChatHistory
25
+ Layout = layout_module.Layout
26
+ Utilities = utils_module.Utilities
27
+ Sidebar = sidebar_module.Sidebar
28
+
29
+ st.set_page_config(layout="wide", page_icon="💬", page_title="Robby | Chat-Bot 🤖")
30
+
31
+ # Instantiate the main components
32
+ layout, sidebar, utils = Layout(), Sidebar(), Utilities()
33
+
34
+ layout.show_header("PDF, TXT, CSV")
35
+
36
+ user_api_key = utils.load_api_key()
37
+
38
+ if not user_api_key:
39
+ layout.show_api_key_missing()
40
+ else:
41
+ os.environ["OPENAI_API_KEY"] = user_api_key
42
+
43
+ uploaded_file = utils.handle_upload(["pdf", "txt", "csv"])
44
+
45
+ if uploaded_file:
46
+
47
+ # Configure the sidebar
48
+ sidebar.show_options()
49
+ sidebar.about()
50
+
51
+ # Initialize chat history
52
+ history = ChatHistory()
53
+ try:
54
+ chatbot = utils.setup_chatbot(
55
+ uploaded_file, st.session_state["model"], st.session_state["temperature"]
56
+ )
57
+ st.session_state["chatbot"] = chatbot
58
+
59
+ if st.session_state["ready"]:
60
+ # Create containers for chat responses and user prompts
61
+ response_container, prompt_container = st.container(), st.container()
62
+
63
+ with prompt_container:
64
+ # Display the prompt form
65
+ is_ready, user_input = layout.prompt_form()
66
+
67
+ # Initialize the chat history
68
+ history.initialize(uploaded_file)
69
+
70
+ # Reset the chat history if button clicked
71
+ if st.session_state["reset_chat"]:
72
+ history.reset(uploaded_file)
73
+
74
+ if is_ready:
75
+ # Update the chat history and display the chat messages
76
+ history.append("user", user_input)
77
+
78
+ old_stdout = sys.stdout
79
+ sys.stdout = captured_output = StringIO()
80
+
81
+ output = st.session_state["chatbot"].conversational_chat(user_input)
82
+
83
+ sys.stdout = old_stdout
84
+
85
+ history.append("assistant", output)
86
+
87
+ # Clean up the agent's thoughts to remove unwanted characters
88
+ thoughts = captured_output.getvalue()
89
+ cleaned_thoughts = re.sub(r'\x1b\[[0-9;]*[a-zA-Z]', '', thoughts)
90
+ cleaned_thoughts = re.sub(r'\[1m>', '', cleaned_thoughts)
91
+
92
+ # Display the agent's thoughts
93
+ with st.expander("Display the agent's thoughts"):
94
+ st.write(cleaned_thoughts)
95
+
96
+ history.generate_messages(response_container)
97
+ except Exception as e:
98
+ st.error(f"Error: {str(e)}")
99
+
100
+
src/pages/2_📊 Robby-Sheet (beta).py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import importlib
3
+ import sys
4
+ import pandas as pd
5
+ import streamlit as st
6
+ from io import BytesIO
7
+ from modules.robby_sheet.table_tool import PandasAgent
8
+ from modules.layout import Layout
9
+ from modules.utils import Utilities
10
+ from modules.sidebar import Sidebar
11
+
12
+ def reload_module(module_name):
13
+ """For update changes
14
+ made to modules in localhost (press r)"""
15
+
16
+ if module_name in sys.modules:
17
+ importlib.reload(sys.modules[module_name])
18
+ return sys.modules[module_name]
19
+
20
+ table_tool_module = reload_module('modules.robby_sheet.table_tool')
21
+ layout_module = reload_module('modules.layout')
22
+ utils_module = reload_module('modules.utils')
23
+ sidebar_module = reload_module('modules.sidebar')
24
+
25
+
26
+ st.set_page_config(layout="wide", page_icon="💬", page_title="Robby | Chat-Bot 🤖")
27
+
28
+ layout, sidebar, utils = Layout(), Sidebar(), Utilities()
29
+
30
+ layout.show_header("CSV, Excel")
31
+
32
+ user_api_key = utils.load_api_key()
33
+ os.environ["OPENAI_API_KEY"] = user_api_key
34
+
35
+
36
+ if not user_api_key:
37
+ layout.show_api_key_missing()
38
+
39
+ else:
40
+ st.session_state.setdefault("reset_chat", False)
41
+
42
+ uploaded_file = utils.handle_upload(["csv", "xlsx"])
43
+
44
+ if uploaded_file:
45
+ sidebar.about()
46
+
47
+ uploaded_file_content = BytesIO(uploaded_file.getvalue())
48
+ if uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" or uploaded_file.type == "application/vnd.ms-excel":
49
+ df = pd.read_excel(uploaded_file_content)
50
+ else:
51
+ df = pd.read_csv(uploaded_file_content)
52
+
53
+ st.session_state.df = df
54
+
55
+ if "chat_history" not in st.session_state:
56
+ st.session_state["chat_history"] = []
57
+ csv_agent = PandasAgent()
58
+
59
+ with st.form(key="query"):
60
+
61
+ query = st.text_input("Ask [PandasAI](https://github.com/gventuri/pandas-ai) (look the pandas-AI read-me for how use it)", value="", type="default",
62
+ placeholder="e-g : How many rows ? "
63
+ )
64
+ submitted_query = st.form_submit_button("Submit")
65
+ reset_chat_button = st.form_submit_button("Reset Chat")
66
+ if reset_chat_button:
67
+ st.session_state["chat_history"] = []
68
+ if submitted_query:
69
+ result, captured_output = csv_agent.get_agent_response(df, query)
70
+ cleaned_thoughts = csv_agent.process_agent_thoughts(captured_output)
71
+ csv_agent.display_agent_thoughts(cleaned_thoughts)
72
+ csv_agent.update_chat_history(query, result)
73
+ csv_agent.display_chat_history()
74
+ if st.session_state.df is not None:
75
+ st.subheader("Current dataframe:")
76
+ st.write(st.session_state.df)
77
+
src/pages/3_🎬 Robby-Youtube.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import re
4
+ from modules.layout import Layout
5
+ from modules.utils import Utilities
6
+ from modules.sidebar import Sidebar
7
+ from youtube_transcript_api import YouTubeTranscriptApi
8
+ from langchain.chains.summarize import load_summarize_chain
9
+ from langchain.chains import AnalyzeDocumentChain
10
+ from youtube_transcript_api import YouTubeTranscriptApi
11
+ from langchain.llms import OpenAI
12
+ import os
13
+ from langchain.text_splitter import CharacterTextSplitter
14
+
15
+ st.set_page_config(layout="wide", page_icon="💬", page_title="Robby | Chat-Bot 🤖")
16
+
17
+ # Instantiate the main components
18
+ layout, sidebar, utils = Layout(), Sidebar(), Utilities()
19
+
20
+ st.markdown(
21
+ f"""
22
+ <h1 style='text-align: center;'> Ask Robby to summarize youtube video ! 😁</h1>
23
+ """,
24
+ unsafe_allow_html=True,
25
+ )
26
+
27
+ user_api_key = utils.load_api_key()
28
+
29
+ sidebar.about()
30
+
31
+ if not user_api_key:
32
+ layout.show_api_key_missing()
33
+
34
+ else:
35
+ os.environ["OPENAI_API_KEY"] = user_api_key
36
+
37
+ script_docs = []
38
+
39
+ def get_youtube_id(url):
40
+ video_id = None
41
+ match = re.search(r"(?<=v=)[^&#]+", url)
42
+ if match :
43
+ video_id = match.group()
44
+ else :
45
+ match = re.search(r"(?<=youtu.be/)[^&#]+", url)
46
+ if match :
47
+ video_id = match.group()
48
+ return video_id
49
+
50
+ video_url = st.text_input(placeholder="Enter Youtube Video URL", label_visibility="hidden", label =" ")
51
+ if video_url :
52
+ video_id = get_youtube_id(video_url)
53
+
54
+ if video_id != "":
55
+ t = YouTubeTranscriptApi.get_transcript(video_id, languages=('en','fr','es', 'zh-cn', 'hi', 'ar', 'bn', 'ru', 'pt', 'sw' ))
56
+ finalString = ""
57
+ for item in t:
58
+ text = item['text']
59
+ finalString += text + " "
60
+
61
+ text_splitter = CharacterTextSplitter()
62
+ chunks = text_splitter.split_text(finalString)
63
+
64
+ summary_chain = load_summarize_chain(OpenAI(temperature=0),
65
+ chain_type="map_reduce",verbose=True)
66
+
67
+ summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain)
68
+
69
+ answer = summarize_document_chain.run(chunks)
70
+
71
+ st.subheader(answer)
tuto_chatbot_csv.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #pip install streamlit langchain openai faiss-cpu tiktoken
2
+
3
+ import streamlit as st
4
+ from streamlit_chat import message
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.chains import ConversationalRetrievalChain
8
+ from langchain.document_loaders.csv_loader import CSVLoader
9
+ from langchain.vectorstores import FAISS
10
+ import tempfile
11
+
12
+
13
+ user_api_key = st.sidebar.text_input(
14
+ label="#### Your OpenAI API key 👇",
15
+ placeholder="Paste your openAI API key, sk-",
16
+ type="password")
17
+
18
+ uploaded_file = st.sidebar.file_uploader("upload", type="csv")
19
+
20
+ if uploaded_file :
21
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
22
+ tmp_file.write(uploaded_file.getvalue())
23
+ tmp_file_path = tmp_file.name
24
+
25
+ loader = CSVLoader(file_path=tmp_file_path, encoding="utf-8")
26
+ data = loader.load()
27
+
28
+ embeddings = OpenAIEmbeddings()
29
+ vectors = FAISS.from_documents(data, embeddings)
30
+
31
+ chain = ConversationalRetrievalChain.from_llm(llm = ChatOpenAI(temperature=0.0,model_name='gpt-3.5-turbo', openai_api_key=user_api_key),
32
+ retriever=vectors.as_retriever())
33
+
34
+ def conversational_chat(query):
35
+
36
+ result = chain({"question": query, "chat_history": st.session_state['history']})
37
+ st.session_state['history'].append((query, result["answer"]))
38
+
39
+ return result["answer"]
40
+
41
+ if 'history' not in st.session_state:
42
+ st.session_state['history'] = []
43
+
44
+ if 'generated' not in st.session_state:
45
+ st.session_state['generated'] = ["Hello ! Ask me anything about " + uploaded_file.name + " 🤗"]
46
+
47
+ if 'past' not in st.session_state:
48
+ st.session_state['past'] = ["Hey ! 👋"]
49
+
50
+ #container for the chat history
51
+ response_container = st.container()
52
+ #container for the user's text input
53
+ container = st.container()
54
+
55
+ with container:
56
+ with st.form(key='my_form', clear_on_submit=True):
57
+
58
+ user_input = st.text_input("Query:", placeholder="Talk about your csv data here (:", key='input')
59
+ submit_button = st.form_submit_button(label='Send')
60
+
61
+ if submit_button and user_input:
62
+ output = conversational_chat(user_input)
63
+
64
+ st.session_state['past'].append(user_input)
65
+ st.session_state['generated'].append(output)
66
+
67
+ if st.session_state['generated']:
68
+ with response_container:
69
+ for i in range(len(st.session_state['generated'])):
70
+ message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="big-smile")
71
+ message(st.session_state["generated"][i], key=str(i), avatar_style="thumbs")
72
+
73
+ #streamlit run tuto_chatbot_csv.py