safety-card / app.py
max-lakera's picture
adding in MC template + parsing (#2)
4a394a0
raw
history blame
3.52 kB
# %%
from jinja2 import Environment, FileSystemLoader
import pandas as pd
import gradio as gr
df = pd.read_csv("./data.csv")
def parse_into_jinja_markdown(model_name, performance,accuracy,Precision, Recall, Robustness, Fairness, Failure_Clusters ):
env = Environment(loader=FileSystemLoader('.'), autoescape=True)
temp = env.get_template('mc_template.md')
return( temp.render(model_id =model_name, accuracy=accuracy,Precision=Precision,Recall=Recall,Robustness=Robustness,Fairness=Fairness,Performance =performance, Failure_Cluster=Failure_Clusters))
def md_builder(model, dataset, displayed_metrics):
row = df[df["friendly_name"] == model]
str = ""
## f"# <span style='font-size: 16px;'> Model Card for <code style='font-weight: 400'>{model}</code></span>\n"
##f"On dataset `{dataset}`\n"
## )
if "Performance" in displayed_metrics:
perform_val = f"\nPerformance: `{row['performance'].values[0]}`"
if "Accuracy" in displayed_metrics:
accuracy_val= f"\nAccuracy: `{row['accuracy'].values[0]}`"
if "Precision" in displayed_metrics:
precision_val= f"\nPrecision: `{row['precision_weighted'].values[0]}`"
if "Recall" in displayed_metrics:
recall_val= f"\nRecall: `{row['recall_weighted'].values[0]}`"
if "Robustness" in displayed_metrics:
robustness_val = f"\nRobustness: `{100-row['robustness'].values[0]}`"
if "Fairness" in displayed_metrics:
fairness_val = f"\nFairness: `{0}`"
if "Failure Clusters" in displayed_metrics:
cl_count = row['cluster_count'].values[0]
fail_cluster = f"\nTop failures: {row['top_failure_cluster'].values[0]}(+{cl_count - 1} others)(details for all {cl_count} clusters)"
str += "\n<div style='text-align: right'>⛶ Expand safety card</div>"
str = parse_into_jinja_markdown(model,perform_val,accuracy_val,precision_val,recall_val,robustness_val,fairness_val,fail_cluster)
return str
iface = gr.Interface(
md_builder,
[
gr.Dropdown(
list(df["friendly_name"]),
label="Model",
value="ViT",
info="Select a model to use for testing.",
),
gr.Dropdown(
["marmal88/skin_cancer"],
value="marmal88/skin_cancer",
label="Dataset",
info="Select the sampling dataset to use for testing.",
),
gr.CheckboxGroup(
[
"Performance",
"Accuracy",
"Precision",
"Recall",
"Robustness",
"Fairness",
"Failure Clusters",
],
value=["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
label="Metrics",
info="Select displayed metrics.",
),
# gr.Radio(["park", "zoo", "road"], label="Location", info="Where did they go?"),
# gr.Dropdown(
# ["ran", "swam", "ate", "slept"], value=["swam", "slept"], multiselect=True, label="Activity", info="Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed auctor, nisl eget ultricies aliquam, nunc nisl aliquet nunc, eget aliquam nisl nunc vel nisl."
# ),
# gr.Checkbox(label="Morning", info="Did they do it in the morning?"),
],
"markdown",
examples=[
[
"ViT",
"marmal88/skin_cancer",
["Accuracy", "Robustness", "Fairness", "Failure Clusters"],
],
],
)
iface.launch()
# %%