|
import streamlit as st |
|
import sacrebleu |
|
from bert_score import score as bert_score |
|
import jieba |
|
|
|
|
|
def calculate_bleu(translations, references): |
|
return sacrebleu.corpus_bleu(translations, [references]).score |
|
|
|
|
|
def calculate_ter(translations, references): |
|
return sacrebleu.corpus_ter(translations, [references]).score |
|
|
|
|
|
def calculate_chrf(translations, references): |
|
return sacrebleu.corpus_chrf(translations, [references]).score |
|
|
|
|
|
def calculate_bertscore(translations, references, lang): |
|
P, R, F1 = bert_score(translations, references, lang=lang) |
|
return F1.mean().item() |
|
|
|
|
|
st.title("Machine Translation Quality Evaluation") |
|
st.write("Input the translated text and the reference translation to compute BLEU, TER, CHRF, and BERTScore metrics.") |
|
|
|
|
|
languages = { |
|
"English": "en", |
|
"Chinese": "zh", |
|
"French": "fr", |
|
"German": "de", |
|
"Spanish": "es", |
|
"Russian": "ru", |
|
"Japanese": "ja", |
|
"Korean": "ko", |
|
"Arabic": "ar", |
|
"Italian": "it", |
|
"Dutch": "nl", |
|
"Portuguese": "pt", |
|
"Turkish": "tr", |
|
"Polish": "pl", |
|
"Czech": "cs", |
|
"Swedish": "sv", |
|
"Danish": "da", |
|
"Finnish": "fi", |
|
"Greek": "el", |
|
"Hungarian": "hu", |
|
"Indonesian": "id", |
|
"Norwegian": "no", |
|
"Romanian": "ro", |
|
"Thai": "th", |
|
"Vietnamese": "vi", |
|
"Hebrew": "he", |
|
"Hindi": "hi", |
|
"Bengali": "bn", |
|
"Tamil": "ta", |
|
"Urdu": "ur", |
|
"Other": "other" |
|
} |
|
|
|
|
|
source_lang = st.selectbox("Select Source Language", list(languages.keys())) |
|
target_lang = st.selectbox("Select Target Language", list(languages.keys())) |
|
|
|
|
|
source_lang_code = st.text_input("Enter Source Language Code (ISO 639-1):", value=languages[source_lang]) if source_lang == "Other" else languages[source_lang] |
|
target_lang_code = st.text_input("Enter Target Language Code (ISO 639-1):", value=languages[target_lang]) if target_lang == "Other" else languages[target_lang] |
|
|
|
|
|
translation_input = st.text_area("Translated Text", height=200) |
|
reference_input = st.text_area("Reference Translation", height=200) |
|
|
|
|
|
if st.button("Evaluate"): |
|
if translation_input and reference_input: |
|
translations = [translation_input.strip()] |
|
references = [reference_input.strip()] |
|
|
|
|
|
if source_lang_code == "zh" or target_lang_code == "zh": |
|
translations = [' '.join(jieba.cut(text)) for text in translations] |
|
references = [' '.join(jieba.cut(text)) for text in references] |
|
|
|
bleu_score = calculate_bleu(translations, references) |
|
ter_score = calculate_ter(translations, references) |
|
chrf_score = calculate_chrf(translations, references) |
|
bertscore = calculate_bertscore(translations, references, target_lang_code) |
|
|
|
st.write(f"**BLEU Score:** {bleu_score:.2f}") |
|
st.write(f"**TER Score:** {ter_score:.2f}") |
|
st.write(f"**CHRF Score:** {chrf_score:.2f}") |
|
st.write(f"**BERTScore:** {bertscore:.2f}") |
|
else: |
|
st.error("Please provide both translated text and reference translation.") |