File size: 4,360 Bytes
c114fc1
 
 
 
 
 
 
 
 
18bcedb
c114fc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18bcedb
c114fc1
 
 
 
 
 
 
 
18bcedb
c114fc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import pandas as pd
from pandasai import Agent, SmartDataframe
from typing import Tuple
from PIL import Image
from pandasai.llm import HuggingFaceTextGen
from dotenv import load_dotenv
from langchain_groq.chat_models import ChatGroq

load_dotenv()
Groq_Token = os.environ["GROQ_API_KEY"]
models = {"mixtral": "mixtral-8x7b-32768", "llama": "llama2-70b-4096", "gemma": "gemma-7b-it"}

hf_token = os.getenv("HF_READ")

def preprocess_and_load_df(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    df["Timestamp"] = pd.to_datetime(df["Timestamp"])
    return df

def load_agent(df: pd.DataFrame, context: str, inference_server: str, name="mixtral") -> Agent:
    # llm = HuggingFaceTextGen(
    # inference_server_url=inference_server,
    # max_new_tokens=250,
    # temperature=0.1,
    # repetition_penalty=1.2,
    # top_k=5,
    # )
    # llm.client.headers = {"Authorization": f"Bearer {hf_token}"}
    llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1)
    
    agent = Agent(df, config={"llm": llm, "enable_cache": False, "options": {"wait_for_model": True}})
    agent.add_message(context)
    return agent

def load_smart_df(df: pd.DataFrame, inference_server: str, name="mixtral") -> SmartDataframe:
    # llm = HuggingFaceTextGen(
    # inference_server_url=inference_server,
    # )
    # llm.client.headers = {"Authorization": f"Bearer {hf_token}"}
    llm = ChatGroq(model=models[name], api_key=os.getenv("GROQ_API"), temperature=0.1)
    df = SmartDataframe(df, config={"llm": llm, "max_retries": 5, "enable_cache": False})
    return df

def get_from_user(prompt):
    return {"role": "user", "content": prompt}

def ask_agent(agent: Agent, prompt: str) -> Tuple[str, str, str]:
    response = agent.chat(prompt)
    gen_code = agent.last_code_generated
    ex_code = agent.last_code_executed
    last_prompt = agent.last_prompt
    return {"role": "assistant", "content": response, "gen_code": gen_code, "ex_code": ex_code, "last_prompt": last_prompt}

def decorate_with_code(response: dict) -> str:
    return f"""<details>
<summary>Generated Code</summary>
    
```python
{response["gen_code"]}
```
</details>

<details>
<summary>Prompt</summary>

{response["last_prompt"]}
"""

def show_response(st, response):
    with st.chat_message(response["role"]):
        try:
            image = Image.open(response["content"])
            if "gen_code" in response:
                st.markdown(decorate_with_code(response), unsafe_allow_html=True)
            st.image(image)
            return {"is_image": True}
        except Exception as e:
            if "gen_code" in response:
                display_content = decorate_with_code(response) + f"""</details>

{response["content"]}"""
            else:
                display_content = response["content"]
            st.markdown(display_content, unsafe_allow_html=True)
            return {"is_image": False}

def ask_question(model_name, question):
    llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0.1)

    df_check = pd.read_csv("Data.csv")
    df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
    df_check = df_check.head(5)

    new_line = "\n"

    template = f"""```python
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("Data.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])

    # df.dtypes
    {new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}

    # {question.strip()}
    # <your code here>
    ```
    """

    query = f"""I have a pandas dataframe data of PM2.5 and PM10.
    * Frequency of data is daily. 
    * `pollution` generally means `PM2.5`.
    * Save result in a variable `answer` and make it global.
    * If result is a plot, save it and save path in `answer`. Example: `answer='plot.png'`
    * If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`

    Complete the following code.

    {template}

    """

    answer = llm.invoke(query)
    code = f"""
    {template.split("```python")[1].split("```")[0]}
    {answer.content.split("```python")[1].split("```")[0]}
    """
    # update variable `answer` when code is executed
    exec(code)

    return {"role": "assistant", "content": answer.content, "gen_code": code, "ex_code": code, "last_prompt": question}