the-stack-inspection

Sleeping

App Files Files Community

loubnabnl HF staff commited on Feb 13, 2023

Commit

41e4b90

•

1 Parent(s): 28f08c2

add sliders

Browse files

Files changed (1) hide show

app.py +17 -35

app.py CHANGED Viewed

@@ -27,7 +27,6 @@ def load_data(language, ext):
     )
     return ds
 col1, col2, _ = st.columns([1, 1, 4])
 with col1:
     chosen_language = st.sidebar.selectbox(
@@ -39,30 +38,26 @@ with col2:
     )
 st.sidebar.header("Filters")
-not_lexable = st.sidebar.checkbox("Not lexable?")
-low_alphanum = st.sidebar.checkbox("Low alphanum count?")
-long_lines = st.sidebar.checkbox("Long lines?")
-# load the dataset and get indexes of non lexable files
 samples = load_data(chosen_language, chosen_ext)
 if not_lexable:
     samples = samples.filter(lambda x: not x["lexable"])
-if low_alphanum:
-    samples = samples.filter(lambda x: x["low_alphanum"])
-if long_lines:
-    samples = samples.filter(lambda x: x["long_lines"])
 max_docs = len(samples)
 samples = samples.add_column("idx", range(len(samples)))
-# info about extension
-# st.sidebar.markdown("### Information about the extension:")
-# text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
-# {df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
-# are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
-# st.sidebar.markdown(text)
 if max_docs > 0:
     col_1, _ = st.columns([3, 3])
@@ -74,26 +69,13 @@ if max_docs > 0:
             value=0,
             step=1,
         )
     # info about the chosen example
     example = samples[index_example]
-    # st.markdown("#### Information about the chosen example:")
-    # text_alpha = "**has**" if example["long_lines"] else "doesn't have"
-    # text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
-    # text_lexer = "is" if example["lexable"] else "**isn't**"
-    # st.markdown(
-    #     f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
-    #     {text_lines} very long lines,  and {text_lexer} lexable."
-    # )
-    # display file content
     st.markdown("#### File content:")
-    if not example["lexable"]:
-        st.write(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
-        st.text(example['content'])
     else:
-        st.code(example["content"], language=chosen_language)

     )
     return ds
 col1, col2, _ = st.columns([1, 1, 4])
 with col1:
     chosen_language = st.sidebar.selectbox(
     )
 st.sidebar.header("Filters")
+not_lexable = st.sidebar.checkbox("Not lexable")
+min_alphanum = st.sidebar.slider("Minimum alphanumeric fraction", 0.0, 1.0, 0.25)
+max_line_length = st.sidebar.slider("Maximum line length", 0, 1000, 100)
+max_mean_line_length = st.sidebar.slider("Maximum average line length", 0, 2000, 1000)
+st.sidebar.markdown("Printed files have `max_line_length`  and `average_line_length` larger than the selected values.\
+`alphanumeric_fraction` is smaller than the selected value.")
 samples = load_data(chosen_language, chosen_ext)
+samples = samples.filter(
+    lambda x: x["alphanum_fraction"] < min_alphanum
+    and x["max_line_length"] > max_line_length
+    and x["avg_line_length"] > max_mean_line_length
+)
 if not_lexable:
     samples = samples.filter(lambda x: not x["lexable"])
 max_docs = len(samples)
 samples = samples.add_column("idx", range(len(samples)))
 if max_docs > 0:
     col_1, _ = st.columns([3, 3])
             value=0,
             step=1,
         )
     # info about the chosen example
     example = samples[index_example]
     st.markdown("#### File content:")
+    if example["lexable"]:
+        st.code(example["content"], language=chosen_language)
     else:
+        st.text(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
+        st.text(str(example["content"]))