Spaces:

TeresaK
/

cpv_test

Runtime error

App Files Files Community

leavoigt commited on Sep 25, 2023

Commit

48bf795

•

1 Parent(s): e5fe546

Rename appStore/sdg_analysis.py to appStore/vulnerability_analysis.py

Browse files

Files changed (1) hide show

appStore/{sdg_analysis.py → vulnerability_analysis.py} +23 -24

appStore/{sdg_analysis.py → vulnerability_analysis.py} RENAMED Viewed

@@ -10,9 +10,8 @@ import pandas as pd
 import streamlit as st
 from st_aggrid import AgGrid
 from st_aggrid.shared import ColumnsAutoSizeMode
-from utils.sdg_classifier import sdg_classification
-from utils.sdg_classifier import runSDGPreprocessingPipeline, load_sdgClassifier
-from utils.keyword_extraction import textrank
 import logging
 logger = logging.getLogger(__name__)
 from utils.checkconfig import getconfig
@@ -20,21 +19,21 @@ from utils.checkconfig import getconfig
 # Declare all the necessary variables
 config = getconfig('paramconfig.cfg')
-model_name = config.get('sdg','MODEL')
-split_by = config.get('sdg','SPLIT_BY')
-split_length = int(config.get('sdg','SPLIT_LENGTH'))
-split_overlap = int(config.get('sdg','SPLIT_OVERLAP'))
-remove_punc = bool(int(config.get('sdg','REMOVE_PUNC')))
-split_respect_sentence_boundary = bool(int(config.get('sdg','RESPECT_SENTENCE_BOUNDARY')))
-threshold = float(config.get('sdg','THRESHOLD'))
-top_n = int(config.get('sdg','TOP_KEY'))
 def app():
     #### APP INFO #####
     with st.container():
-        st.markdown("<h1 style='text-align: center; color: black;'> SDG Classification and Keyphrase Extraction </h1>", unsafe_allow_html=True)
         st.write(' ')
         st.write(' ')
@@ -106,14 +105,14 @@ def app():
     ### Main app code ###
     with st.container():
-        if st.button("RUN SDG Analysis"):
             if 'filepath' in st.session_state:
                 file_name = st.session_state['filename']
                 file_path = st.session_state['filepath']
-                classifier = load_sdgClassifier(classifier_name=model_name)
-                st.session_state['sdg_classifier'] = classifier
-                all_documents = runSDGPreprocessingPipeline(file_name= file_name,
                                         file_path= file_path, split_by= split_by,
                                         split_length= split_length,
                 split_respect_sentence_boundary= split_respect_sentence_boundary,
@@ -124,18 +123,18 @@ def app():
                 else:
                     warning_msg = ""
-                with st.spinner("Running SDG Classification{}".format(warning_msg)):
-                    df, x = sdg_classification(haystack_doc=all_documents['documents'],
                                                 threshold= threshold)
                     df = df.drop(['Relevancy'], axis = 1)
-                    sdg_labels = x.SDG.unique()
                     textrank_keyword_list = []
                     for label in sdg_labels:
-                        sdgdata = " ".join(df[df.SDG == label].text.to_list())
                         textranklist_ = textrank(textdata=sdgdata, words= top_n)
                         if len(textranklist_) > 0:
-                            textrank_keyword_list.append({'SDG':label, 'TextRank Keywords':",".join(textranklist_)})
                     textrank_keywords_df = pd.DataFrame(textrank_keyword_list)
@@ -151,7 +150,7 @@ def app():
                     # fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
-                    st.markdown("#### Anything related to SDGs? ####")
                     c4, c5, c6 = st.columns([1,2,2])
@@ -162,13 +161,13 @@ def app():
                         labeldf = "<br>".join(labeldf)
                         st.markdown(labeldf, unsafe_allow_html=True)
                     st.write("")
-                    st.markdown("###### What keywords are present under SDG classified text? ######")
                     AgGrid(textrank_keywords_df, reload_data = False,
                             update_mode="value_changed",
                     columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
                     st.write("")
-                    st.markdown("###### Top few SDG Classified paragraph/text results ######")
                     AgGrid(df, reload_data = False, update_mode="value_changed",
                     columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)

 import streamlit as st
 from st_aggrid import AgGrid
 from st_aggrid.shared import ColumnsAutoSizeMode
+from utils.vulnerability_classifier import vulnerability_classification
+from utils.vulnerability_classifier import runPreprocessingPipeline, load_Classifier
 import logging
 logger = logging.getLogger(__name__)
 from utils.checkconfig import getconfig
 # Declare all the necessary variables
 config = getconfig('paramconfig.cfg')
+model_name = config.get('vulnerability','MODEL')
+split_by = config.get('vulnerability','SPLIT_BY')
+split_length = int(config.get('vulnerability','SPLIT_LENGTH'))
+split_overlap = int(config.get('vulnerability','SPLIT_OVERLAP'))
+remove_punc = bool(int(config.get('vulnerability','REMOVE_PUNC')))
+split_respect_sentence_boundary = bool(int(config.get('vulnerability','RESPECT_SENTENCE_BOUNDARY')))
+threshold = float(config.get('vulnerability','THRESHOLD'))
+top_n = int(config.get('vulnerability','TOP_KEY'))
 def app():
     #### APP INFO #####
     with st.container():
+        st.markdown("<h1 style='text-align: center; color: black;'> Vulnerability Classification  </h1>", unsafe_allow_html=True)
         st.write(' ')
         st.write(' ')
     ### Main app code ###
     with st.container():
+        if st.button("RUN Vulnerability Analysis"):
             if 'filepath' in st.session_state:
                 file_name = st.session_state['filename']
                 file_path = st.session_state['filepath']
+                classifier = load_Classifier(classifier_name=model_name)
+                st.session_state['vulnerability_classifier'] = classifier
+                all_documents = runPreprocessingPipeline(file_name= file_name,
                                         file_path= file_path, split_by= split_by,
                                         split_length= split_length,
                 split_respect_sentence_boundary= split_respect_sentence_boundary,
                 else:
                     warning_msg = ""
+                with st.spinner("Running Classification{}".format(warning_msg)):
+                    df, x = vulnerability_classification(haystack_doc=all_documents['documents'],
                                                 threshold= threshold)
                     df = df.drop(['Relevancy'], axis = 1)
+                    vulnerability_labels = x.vulnerability.unique()
                     textrank_keyword_list = []
                     for label in sdg_labels:
+                        vulnerability_data = " ".join(df[df.vulnerability == label].text.to_list())
                         textranklist_ = textrank(textdata=sdgdata, words= top_n)
                         if len(textranklist_) > 0:
+                            textrank_keyword_list.append({'Vulnerability':label, 'TextRank Keywords':",".join(textranklist_)})
                     textrank_keywords_df = pd.DataFrame(textrank_keyword_list)
                     # fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
+                    st.markdown("#### Anything related to Vulnerabilities? ####")
                     c4, c5, c6 = st.columns([1,2,2])
                         labeldf = "<br>".join(labeldf)
                         st.markdown(labeldf, unsafe_allow_html=True)
                     st.write("")
+                    st.markdown("###### What keywords are present under vulnerability classified text? ######")
                     AgGrid(textrank_keywords_df, reload_data = False,
                             update_mode="value_changed",
                     columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
                     st.write("")
+                    st.markdown("###### Top few vulnerability Classified paragraph/text results ######")
                     AgGrid(df, reload_data = False, update_mode="value_changed",
                     columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)