kheopss commited on
Commit
bc65f06
1 Parent(s): d4cf559

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ from llama_index.core import Document
4
+ import pandas as pd
5
+ import getpass
6
+ import os
7
+ from llama_index.core.retrievers import VectorIndexRetriever
8
+ from llama_index.core import VectorStoreIndex
9
+ from llama_index.core import QueryBundle
10
+ from IPython.display import display, HTML
11
+ from llama_index.core.postprocessor import LLMRerank
12
+ import logging
13
+ import sys
14
+ from llama_index.core.memory import ChatMemoryBuffer
15
+ from llama_index.llms.openai import OpenAI
16
+
17
+ # Load the JSON file
18
+ file_path = 'response_metropol.json'
19
+ data = pd.read_json(file_path)
20
+ data.head()
21
+ documents = [Document(text=row['values'],metadata={"filename": row['file_name'], "description":row['file_description']},) for index, row in data.iterrows()]
22
+ os.environ["OPENAI_API_KEY"] = "sk-proj-YoKkhPHBkCsTLPu155ddT3BlbkFJI434xaZyUK4YHUA7wwHL"
23
+ #pd.set_option("display.max_colwidth", -1)
24
+ # build index
25
+ index = VectorStoreIndex.from_documents(documents)
26
+
27
+ def get_retrieved_nodes(
28
+ query_str, vector_top_k=10, reranker_top_n=5, with_reranker=False
29
+ ):
30
+ query_bundle = QueryBundle(query_str)
31
+ # configure retriever
32
+ retriever = VectorIndexRetriever(
33
+ index=index,
34
+ similarity_top_k=vector_top_k,
35
+ )
36
+ retrieved_nodes = retriever.retrieve(query_bundle)
37
+
38
+ if with_reranker:
39
+ # configure reranker
40
+ reranker = LLMRerank(
41
+ choice_batch_size=5,
42
+ top_n=reranker_top_n,
43
+ )
44
+ retrieved_nodes = reranker.postprocess_nodes(
45
+ retrieved_nodes, query_bundle
46
+ )
47
+
48
+ return retrieved_nodes
49
+
50
+
51
+ def pretty_print(df):
52
+ return display(HTML(df.to_html().replace("\\n", "")))
53
+
54
+
55
+ def visualize_retrieved_nodes(nodes) -> None:
56
+ result_dicts = []
57
+ for node in nodes:
58
+ result_dict = {"Score": node.score, "Text": node.node.get_text()}
59
+ result_dicts.append(result_dict)
60
+
61
+ pretty_print(pd.DataFrame(result_dicts))
62
+
63
+ new_nodes = get_retrieved_nodes(
64
+ "quel sont les agence qui sont adaptée aux personnes à mobilité réduite",
65
+ vector_top_k=10,
66
+ reranker_top_n=10,
67
+ with_reranker=True,
68
+ )
69
+ visualize_retrieved_nodes(new_nodes)
70
+
71
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
72
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
73
+
74
+
75
+
76
+ def get_all_text(new_nodes):
77
+ """
78
+ This function takes a list of nodes and returns a single string containing the text of each node, joined together.
79
+
80
+ Args:
81
+ new_nodes (list): A list of nodes from which text is to be extracted.
82
+
83
+ Returns:
84
+ str: A single string containing the text from each node, joined together.
85
+ """
86
+ texts = []
87
+ for node in new_nodes:
88
+ texts.append(node.get_text())
89
+ return ' '.join(texts)
90
+
91
+
92
+ get_texts = get_all_text(new_nodes)
93
+ print(get_texts)
94
+
95
+ memory = ChatMemoryBuffer.from_defaults(token_limit=6500)
96
+
97
+ chat_engine = index.as_chat_engine(
98
+ llm = OpenAI(temperature=0, model="gpt-4"),
99
+ chat_mode="context",
100
+ memory=memory,
101
+ system_prompt=(
102
+ "Assist public agents in providing responses to the residents and citizens of the metropolis in nice, guiding them to the appropriate services that best address their requests for assistance. This involves equipping public agents with the necessary tools and information to efficiently and effectively direct citizens to the services that can fulfill their needs.answer using french"
103
+ ),
104
+ similarity_top_k=10,
105
+ node_postprocessors=[
106
+ LLMRerank(
107
+ choice_batch_size=5,
108
+ top_n=5,
109
+ )
110
+ ],
111
+ response_mode="tree_summarize",
112
+ )
113
+
114
+ def process(input):
115
+ response = chat_engine.stream_chat(input)
116
+ for token in response.response_gen:
117
+ print(token, end="")
118
+ return token
119
+
120
+ iface = gr.chatInterface(
121
+ fn=process,
122
+ inputs="text",
123
+ outputs="text",
124
+ title="Métropole Signature Expert",
125
+ description="Provide a question related to signing authorities and get a response.",
126
+ examples=examples,
127
+ cache_examples=False,
128
+
129
+ )
130
+
131
+ iface.launch()
132
+