Spaces:

SanctiMoly
/

SanctiMolyTopic

Runtime error

App Files Files Community

alex6095 commited on Dec 12, 2021

Commit

c1eabe2

•

1 Parent(s): 43b149d

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -7

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 import re
 import streamlit as st
 from transformers import DistilBertForSequenceClassification
 from tokenization_kobert import KoBertTokenizer
@@ -8,12 +9,15 @@ from tokenization_kobert import KoBertTokenizer
 tokenizer = KoBertTokenizer.from_pretrained('monologg/distilkobert')
 @st.cache(allow_output_mutation=True)
 def get_model():
-    model = DistilBertForSequenceClassification.from_pretrained('alex6095/SanctiMolyTopic', problem_type="multi_label_classification", num_labels=9)
     model.eval()
     return model
 class RegexSubstitution(object):
     """Regex substitution class for transform"""
@@ -23,10 +27,10 @@ class RegexSubstitution(object):
         else:
             self.regex = re.compile(regex)
         self.sub = sub
     def __call__(self, target):
         if isinstance(target, list):
-            return [ self.regex.sub(self.sub, self.regex.sub(self.sub, string)) for string in target ]
         else:
             return self.regex.sub(self.sub, self.regex.sub(self.sub, target))
@@ -41,21 +45,23 @@ topics_raw = ['IT/과학', '경제', '문화', '미용/건강', '사회', '생
 model = get_model()
-st.title("Topic estimate Model Test")
 text = st.text_area("Input news :", value=default_text)
 st.markdown("## Original News Data")
 st.write(text)
 if text:
-    st.markdown("## Predict Topic")
     with st.spinner('processing..'):
         text = RegexSubstitution(r'\([^()]+\)|[<>\'"△▲□■]')(text)
         encoded_dict = tokenizer(
             text=text,
             add_special_tokens=True,
-            max_length = 512,
             truncation=True,
             return_tensors='pt',
             return_length=True
@@ -68,4 +74,13 @@ if text:
         _, preds = torch.max(outputs.logits, 1)
-    st.write(topics_raw[preds.squeeze(0)])

 import torch
 import re
 import streamlit as st
+import pandas as pd
 from transformers import DistilBertForSequenceClassification
 from tokenization_kobert import KoBertTokenizer
 tokenizer = KoBertTokenizer.from_pretrained('monologg/distilkobert')
 @st.cache(allow_output_mutation=True)
 def get_model():
+    model = DistilBertForSequenceClassification.from_pretrained(
+        'alex6095/SanctiMolyTopic', problem_type="multi_label_classification", num_labels=9)
     model.eval()
     return model
 class RegexSubstitution(object):
     """Regex substitution class for transform"""
         else:
             self.regex = re.compile(regex)
         self.sub = sub
     def __call__(self, target):
         if isinstance(target, list):
+            return [self.regex.sub(self.sub, self.regex.sub(self.sub, string)) for string in target]
         else:
             return self.regex.sub(self.sub, self.regex.sub(self.sub, target))
 model = get_model()
+st.title("News Topic Classification")
 text = st.text_area("Input news :", value=default_text)
 st.markdown("## Original News Data")
 st.write(text)
+st.markdown("## Predict Topic")
+col1, col2 = st.columns(2)
 if text:
     with st.spinner('processing..'):
         text = RegexSubstitution(r'\([^()]+\)|[<>\'"△▲□■]')(text)
         encoded_dict = tokenizer(
             text=text,
             add_special_tokens=True,
+            max_length=512,
             truncation=True,
             return_tensors='pt',
             return_length=True
         _, preds = torch.max(outputs.logits, 1)
+    col1.write(topics_raw[preds.squeeze(0)])
+    softmax = torch.nn.Softmax(dim=1)
+    prob = softmax(outputs.logits).squeeze(0).detach()
+    chart_data = pd.DataFrame({
+        'Topic': topics_raw,
+        'Probability': prob
+    })
+    chart_data = chart_data.set_index('Topic')
+    col2.bar_chart(chart_data)