loubnabnl HF staff commited on
Commit
41e4b90
1 Parent(s): 28f08c2

add sliders

Browse files
Files changed (1) hide show
  1. app.py +17 -35
app.py CHANGED
@@ -27,7 +27,6 @@ def load_data(language, ext):
27
  )
28
  return ds
29
 
30
-
31
  col1, col2, _ = st.columns([1, 1, 4])
32
  with col1:
33
  chosen_language = st.sidebar.selectbox(
@@ -39,30 +38,26 @@ with col2:
39
  )
40
 
41
  st.sidebar.header("Filters")
42
- not_lexable = st.sidebar.checkbox("Not lexable?")
43
- low_alphanum = st.sidebar.checkbox("Low alphanum count?")
44
- long_lines = st.sidebar.checkbox("Long lines?")
45
-
 
 
46
 
47
- # load the dataset and get indexes of non lexable files
48
  samples = load_data(chosen_language, chosen_ext)
49
 
 
 
 
 
 
50
  if not_lexable:
51
  samples = samples.filter(lambda x: not x["lexable"])
52
- if low_alphanum:
53
- samples = samples.filter(lambda x: x["low_alphanum"])
54
- if long_lines:
55
- samples = samples.filter(lambda x: x["long_lines"])
56
 
57
  max_docs = len(samples)
58
  samples = samples.add_column("idx", range(len(samples)))
59
 
60
- # info about extension
61
- # st.sidebar.markdown("### Information about the extension:")
62
- # text = f"Extension {chosen_ext} has {max_docs} files, {df[df['extension'] == chosen_ext]['low_alphanum_count'].values[0]} with very low alphanumeric ratio, \
63
- # {df[df['extension'] == chosen_ext]['long_lines_count'].values[0]} with very long lines, and {df[df['extension'] == chosen_ext]['non_lexable_count'].values[0]} \
64
- # are not lexable.\n These files are at indexes:\n {indexes_not_lexed}."
65
- # st.sidebar.markdown(text)
66
 
67
  if max_docs > 0:
68
  col_1, _ = st.columns([3, 3])
@@ -74,26 +69,13 @@ if max_docs > 0:
74
  value=0,
75
  step=1,
76
  )
77
-
78
-
79
  # info about the chosen example
80
  example = samples[index_example]
81
-
82
- # st.markdown("#### Information about the chosen example:")
83
- # text_alpha = "**has**" if example["long_lines"] else "doesn't have"
84
- # text_lines = "**has**" if example["low_alphanum"] else "doesn't have"
85
- # text_lexer = "is" if example["lexable"] else "**isn't**"
86
-
87
- # st.markdown(
88
- # f"Example {index_example} {text_alpha} a very low alphanumeric ratio, \
89
- # {text_lines} very long lines, and {text_lexer} lexable."
90
- # )
91
-
92
-
93
- # display file content
94
  st.markdown("#### File content:")
95
- if not example["lexable"]:
96
- st.write(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
97
- st.text(example['content'])
98
  else:
99
- st.code(example["content"], language=chosen_language)
 
 
27
  )
28
  return ds
29
 
 
30
  col1, col2, _ = st.columns([1, 1, 4])
31
  with col1:
32
  chosen_language = st.sidebar.selectbox(
 
38
  )
39
 
40
  st.sidebar.header("Filters")
41
+ not_lexable = st.sidebar.checkbox("Not lexable")
42
+ min_alphanum = st.sidebar.slider("Minimum alphanumeric fraction", 0.0, 1.0, 0.25)
43
+ max_line_length = st.sidebar.slider("Maximum line length", 0, 1000, 100)
44
+ max_mean_line_length = st.sidebar.slider("Maximum average line length", 0, 2000, 1000)
45
+ st.sidebar.markdown("Printed files have `max_line_length` and `average_line_length` larger than the selected values.\
46
+ `alphanumeric_fraction` is smaller than the selected value.")
47
 
 
48
  samples = load_data(chosen_language, chosen_ext)
49
 
50
+ samples = samples.filter(
51
+ lambda x: x["alphanum_fraction"] < min_alphanum
52
+ and x["max_line_length"] > max_line_length
53
+ and x["avg_line_length"] > max_mean_line_length
54
+ )
55
  if not_lexable:
56
  samples = samples.filter(lambda x: not x["lexable"])
 
 
 
 
57
 
58
  max_docs = len(samples)
59
  samples = samples.add_column("idx", range(len(samples)))
60
 
 
 
 
 
 
 
61
 
62
  if max_docs > 0:
63
  col_1, _ = st.columns([3, 3])
 
69
  value=0,
70
  step=1,
71
  )
72
+
 
73
  # info about the chosen example
74
  example = samples[index_example]
75
+
 
 
 
 
 
 
 
 
 
 
 
 
76
  st.markdown("#### File content:")
77
+ if example["lexable"]:
78
+ st.code(example["content"], language=chosen_language)
 
79
  else:
80
+ st.text(f"File can't be lexed so we remove syntax highlighting.\nContent:\n")
81
+ st.text(str(example["content"]))