Rename appStore/sdg_analysis.py to appStore/vulnerability_analysis.py
Browse files
appStore/{sdg_analysis.py → vulnerability_analysis.py}
RENAMED
@@ -10,9 +10,8 @@ import pandas as pd
|
|
10 |
import streamlit as st
|
11 |
from st_aggrid import AgGrid
|
12 |
from st_aggrid.shared import ColumnsAutoSizeMode
|
13 |
-
from utils.
|
14 |
-
from utils.
|
15 |
-
from utils.keyword_extraction import textrank
|
16 |
import logging
|
17 |
logger = logging.getLogger(__name__)
|
18 |
from utils.checkconfig import getconfig
|
@@ -20,21 +19,21 @@ from utils.checkconfig import getconfig
|
|
20 |
|
21 |
# Declare all the necessary variables
|
22 |
config = getconfig('paramconfig.cfg')
|
23 |
-
model_name = config.get('
|
24 |
-
split_by = config.get('
|
25 |
-
split_length = int(config.get('
|
26 |
-
split_overlap = int(config.get('
|
27 |
-
remove_punc = bool(int(config.get('
|
28 |
-
split_respect_sentence_boundary = bool(int(config.get('
|
29 |
-
threshold = float(config.get('
|
30 |
-
top_n = int(config.get('
|
31 |
|
32 |
|
33 |
def app():
|
34 |
|
35 |
#### APP INFO #####
|
36 |
with st.container():
|
37 |
-
st.markdown("<h1 style='text-align: center; color: black;'>
|
38 |
st.write(' ')
|
39 |
st.write(' ')
|
40 |
|
@@ -106,14 +105,14 @@ def app():
|
|
106 |
|
107 |
### Main app code ###
|
108 |
with st.container():
|
109 |
-
if st.button("RUN
|
110 |
|
111 |
if 'filepath' in st.session_state:
|
112 |
file_name = st.session_state['filename']
|
113 |
file_path = st.session_state['filepath']
|
114 |
-
classifier =
|
115 |
-
st.session_state['
|
116 |
-
all_documents =
|
117 |
file_path= file_path, split_by= split_by,
|
118 |
split_length= split_length,
|
119 |
split_respect_sentence_boundary= split_respect_sentence_boundary,
|
@@ -124,18 +123,18 @@ def app():
|
|
124 |
else:
|
125 |
warning_msg = ""
|
126 |
|
127 |
-
with st.spinner("Running
|
128 |
|
129 |
-
df, x =
|
130 |
threshold= threshold)
|
131 |
df = df.drop(['Relevancy'], axis = 1)
|
132 |
-
|
133 |
textrank_keyword_list = []
|
134 |
for label in sdg_labels:
|
135 |
-
|
136 |
textranklist_ = textrank(textdata=sdgdata, words= top_n)
|
137 |
if len(textranklist_) > 0:
|
138 |
-
textrank_keyword_list.append({'
|
139 |
textrank_keywords_df = pd.DataFrame(textrank_keyword_list)
|
140 |
|
141 |
|
@@ -151,7 +150,7 @@ def app():
|
|
151 |
# fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
|
152 |
|
153 |
|
154 |
-
st.markdown("#### Anything related to
|
155 |
|
156 |
c4, c5, c6 = st.columns([1,2,2])
|
157 |
|
@@ -162,13 +161,13 @@ def app():
|
|
162 |
labeldf = "<br>".join(labeldf)
|
163 |
st.markdown(labeldf, unsafe_allow_html=True)
|
164 |
st.write("")
|
165 |
-
st.markdown("###### What keywords are present under
|
166 |
|
167 |
AgGrid(textrank_keywords_df, reload_data = False,
|
168 |
update_mode="value_changed",
|
169 |
columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
|
170 |
st.write("")
|
171 |
-
st.markdown("###### Top few
|
172 |
|
173 |
AgGrid(df, reload_data = False, update_mode="value_changed",
|
174 |
columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
|
|
|
10 |
import streamlit as st
|
11 |
from st_aggrid import AgGrid
|
12 |
from st_aggrid.shared import ColumnsAutoSizeMode
|
13 |
+
from utils.vulnerability_classifier import vulnerability_classification
|
14 |
+
from utils.vulnerability_classifier import runPreprocessingPipeline, load_Classifier
|
|
|
15 |
import logging
|
16 |
logger = logging.getLogger(__name__)
|
17 |
from utils.checkconfig import getconfig
|
|
|
19 |
|
20 |
# Declare all the necessary variables
|
21 |
config = getconfig('paramconfig.cfg')
|
22 |
+
model_name = config.get('vulnerability','MODEL')
|
23 |
+
split_by = config.get('vulnerability','SPLIT_BY')
|
24 |
+
split_length = int(config.get('vulnerability','SPLIT_LENGTH'))
|
25 |
+
split_overlap = int(config.get('vulnerability','SPLIT_OVERLAP'))
|
26 |
+
remove_punc = bool(int(config.get('vulnerability','REMOVE_PUNC')))
|
27 |
+
split_respect_sentence_boundary = bool(int(config.get('vulnerability','RESPECT_SENTENCE_BOUNDARY')))
|
28 |
+
threshold = float(config.get('vulnerability','THRESHOLD'))
|
29 |
+
top_n = int(config.get('vulnerability','TOP_KEY'))
|
30 |
|
31 |
|
32 |
def app():
|
33 |
|
34 |
#### APP INFO #####
|
35 |
with st.container():
|
36 |
+
st.markdown("<h1 style='text-align: center; color: black;'> Vulnerability Classification </h1>", unsafe_allow_html=True)
|
37 |
st.write(' ')
|
38 |
st.write(' ')
|
39 |
|
|
|
105 |
|
106 |
### Main app code ###
|
107 |
with st.container():
|
108 |
+
if st.button("RUN Vulnerability Analysis"):
|
109 |
|
110 |
if 'filepath' in st.session_state:
|
111 |
file_name = st.session_state['filename']
|
112 |
file_path = st.session_state['filepath']
|
113 |
+
classifier = load_Classifier(classifier_name=model_name)
|
114 |
+
st.session_state['vulnerability_classifier'] = classifier
|
115 |
+
all_documents = runPreprocessingPipeline(file_name= file_name,
|
116 |
file_path= file_path, split_by= split_by,
|
117 |
split_length= split_length,
|
118 |
split_respect_sentence_boundary= split_respect_sentence_boundary,
|
|
|
123 |
else:
|
124 |
warning_msg = ""
|
125 |
|
126 |
+
with st.spinner("Running Classification{}".format(warning_msg)):
|
127 |
|
128 |
+
df, x = vulnerability_classification(haystack_doc=all_documents['documents'],
|
129 |
threshold= threshold)
|
130 |
df = df.drop(['Relevancy'], axis = 1)
|
131 |
+
vulnerability_labels = x.vulnerability.unique()
|
132 |
textrank_keyword_list = []
|
133 |
for label in sdg_labels:
|
134 |
+
vulnerability_data = " ".join(df[df.vulnerability == label].text.to_list())
|
135 |
textranklist_ = textrank(textdata=sdgdata, words= top_n)
|
136 |
if len(textranklist_) > 0:
|
137 |
+
textrank_keyword_list.append({'Vulnerability':label, 'TextRank Keywords':",".join(textranklist_)})
|
138 |
textrank_keywords_df = pd.DataFrame(textrank_keyword_list)
|
139 |
|
140 |
|
|
|
150 |
# fig.savefig('temp.png', bbox_inches='tight',dpi= 100)
|
151 |
|
152 |
|
153 |
+
st.markdown("#### Anything related to Vulnerabilities? ####")
|
154 |
|
155 |
c4, c5, c6 = st.columns([1,2,2])
|
156 |
|
|
|
161 |
labeldf = "<br>".join(labeldf)
|
162 |
st.markdown(labeldf, unsafe_allow_html=True)
|
163 |
st.write("")
|
164 |
+
st.markdown("###### What keywords are present under vulnerability classified text? ######")
|
165 |
|
166 |
AgGrid(textrank_keywords_df, reload_data = False,
|
167 |
update_mode="value_changed",
|
168 |
columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
|
169 |
st.write("")
|
170 |
+
st.markdown("###### Top few vulnerability Classified paragraph/text results ######")
|
171 |
|
172 |
AgGrid(df, reload_data = False, update_mode="value_changed",
|
173 |
columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
|