Spaces:
Running
Running
File size: 2,800 Bytes
9966b88 98cc895 f555c09 9c53030 7d8479e 9c53030 98cc895 9966b88 98cc895 9c53030 0bbc8ff 98cc895 9c53030 0bbc8ff 9c53030 0bbc8ff 9c53030 0bbc8ff 98cc895 9c53030 0bbc8ff 9c53030 98cc895 9c53030 98cc895 9c53030 0bbc8ff ee5fabd 0bbc8ff ee5fabd 0bbc8ff 9c53030 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from spacy.lang.en import English
nlp = English()
nlp.add_pipe("sentencizer")
import pandas as pd
import gradio as gr
from transformers import pipeline
from gradio.themes.utils.colors import red, green
detector = pipeline(task='text-classification', model='SJTU-CL/RoBERTa-large-ArguGPT-sent')
color_map = {
'0%': green.c400,
'10%': green.c300,
'20%': green.c200,
'30%': green.c100,
'40%': green.c50,
'50%': red.c50,
'60%': red.c100,
'70%': red.c200,
'80%': red.c300,
'90%': red.c400,
'100%': red.c500
}
def predict_doc(doc):
# sents = sent_tokenize(doc)
sents = [s.text for s in nlp(doc).sents]
data = {'sentence': [], 'label': [], 'score': []}
res = []
for sent in sents:
prob = predict_one_sent(sent)
data['sentence'].append(sent)
data['score'].append(round(prob, 4))
if prob <= 0.5:
data['label'].append('Human')
else: data['label'].append('Machine')
if prob < 0.1: label = '0%'
elif prob < 0.2: label = '10%'
elif prob < 0.3: label = '20%'
elif prob < 0.4: label = '30%'
elif prob < 0.5: label = '40%'
elif prob < 0.6: label = '50%'
elif prob < 0.7: label = '60%'
elif prob < 0.8: label = '70%'
elif prob < 0.9: label = '80%'
elif prob < 1: label = '90%'
else: label = '100%'
res.append((sent, label))
df = pd.DataFrame(data)
df.to_csv('result.csv')
overall_score = df.score.mean()
sum_str = ''
if overall_score <= 0.5: overall_label = 'Human'
else: overall_label = 'Machine'
sum_str = f'The essay is probably written by {overall_label}. The probability of being generated by AI is {overall_score}'
return sum_str, res, df, 'result.csv'
def predict_one_sent(sent):
'''
convert to prob
LABEL_1, 0.66 -> 0.66
LABEL_0, 0.66 -> 0.34
'''
res = detector(sent)[0]
org_label, prob = res['label'], res['score']
if org_label == 'LABEL_0': prob = 1 - prob
return prob
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
text_in = gr.Textbox(
lines=5,
label='Essay input',
info='Please enter the essay in the textbox'
)
btn = gr.Button('Predict who writes this essay!')
sent_res = gr.HighlightedText(label='Labeled Result', color_map=color_map)
with gr.Row():
summary = gr.Text(label='Result summary')
csv_f = gr.File(label='CSV file storing data with all sentences.')
tab = gr.Dataframe(label='Table with Probability Score', row_count=100)
btn.click(predict_doc, inputs=[text_in], outputs=[summary, sent_res, tab, csv_f], api_name='predict_doc')
demo.launch()
|