import transformers

import gradio as gr
import numpy as np
import tensorflow as tf

def encode(sentences, tokenizer, sequence_length):
    return tokenizer.batch_encode_plus(
        sentences,
        max_length=sequence_length, # set the length of the sequences
        add_special_tokens=True, # add [CLS] and [SEP] tokens
        return_attention_mask=True,
        return_token_type_ids=False, # not needed for this type of ML task
        pad_to_max_length=True, # add 0 pad tokens to the sequences less than max_length
        return_tensors='tf'
    )

hs_detection_model_1 = tf.keras.models.load_model('./model_1', compile=True)
hs_detection_model_2 = tf.keras.models.load_model('./model_2', compile=True)

def model_inference(sentence):
    encoded_model1_sentence = encode([sentence], transformers.BertTokenizer.from_pretrained('dbmdz/bert-base-german-cased'), 300)
    encoded_model2_sentence = encode([sentence], transformers.BertTokenizer.from_pretrained('dbmdz/bert-base-german-uncased'), 512)
    predictions_1 = hs_detection_model_1.predict(encoded_model1_sentence.values()).flatten()
    predictions_2 = hs_detection_model_2.predict(encoded_model2_sentence.values()).flatten()
    return {'Hassrede': float(predictions_1[0])}, {'Hassrede': float(predictions_2[0])}

title = "HS-Detector Demonstrator (deutsch)"
description = """
<div style="float: none; overflow: hidden;">
<div style="display:block; width:100%;">
<center>
<div style="width:50%; float: left; display: inline-block;">
    <h2>Ausgangsmodell</h2>
    <p>Modell: Bert ('dbmdz/bert-base-german-cased')</p>
    <p>Dataset: germeval18_hasoc19_rp21_combi_dataset <br/> (77.161 Einträge mit einem Hassrede-Anteil von 17,7%)</p>
    <p>Fine-Tuning Parameter: 2 Epochen, 300 Token pro Eintrag, 2e-5 LR</p>

    Evaluationsergebnisse:
    Balanced Accuracy: 0.756
    (Accuracy: 0.880)
    Binary F1-Score: 0.625
    Binary Precision: 0.699
    Binary Recall: 0.565
    MCC score: 0.559
    AUROC score: 0.756
</div>
<div style="width:50%; float: left; display: inline-block;">
    <h2>Challenger-Modell</h2>
    <p>Modell: Bert ('dbmdz/bert-base-german-uncased')</p>
    <p>Dataset: germeval18_hasoc19_rp21_combi_dataset_no-url_no-address  <br/> (~77.161 Einträge mit einem Hassrede-Anteil von 17,7%)</p>
    <p>Fine-Tuning Parameter: 2 Epochen, 512 Token pro Eintrag, 2e-5 LR</p>

    Evaluationsergebnisse:
    Balanced Accuracy: 0.749
    (Accuracy: 0.867)
    Binary F1-Score: 0.602
    Binary Precision: 0.642
    Binary Recall: 0.567
    MCC score: 0.524
    AUROC score: 0.749
</div>
</center>
</div>
</div>
"""
# <p>Dataset: germeval18_hasoc19_rp21_glasebach22_combi_dataset_no-addr.csv <br/> (84.239 Einträge mit einem Hassrede-Anteil von 18,2%)</p>
article = """Die Eingaben werden nicht geloggt. Klassifikator einfach ausprobieren. 
Unter dem Button 'Ersteller' kann inspiziert werden, welche Satz-Bestandteile für die Modelle vermutlich entscheident waren. 
Dabei werden automatisiert Satzteile verändert und die Auswirkungen auf die jeweils abgefragten Predictions beobachtet."""

input_sentence_text = gr.inputs.Textbox(lines=5, placeholder="Geben Sie hier den Satz ein, der von den Modellen auf Hassrede geprüft werden soll.")
output_predictions = [gr.outputs.Label(label="Prediction of initial model", num_top_classes=1), gr.outputs.Label(label="Prediction of challenging model", num_top_classes=1)]
ui = gr.Interface(fn=model_inference, inputs=input_sentence_text, outputs=output_predictions, title=title, article=article, description=description, interpretation="default", 
                  flagging_options=["incorrect", "ambiguous", "other"], theme="huggingface", css=".confidence {color: black !important}")
ui.launch(enable_queue=True)