jgyasu commited on
Commit
4f150bd
1 Parent(s): d3347e0

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +3 -1
  2. highlighter.py +44 -0
  3. lcs.py +16 -0
  4. sampling_methods.py +32 -1
README.md CHANGED
@@ -1,6 +1,8 @@
1
  ---
2
- title: AIISC Watermarking Model
3
  app_file: app.py
4
  sdk: gradio
5
  sdk_version: 4.36.0
6
  ---
 
 
 
1
  ---
2
+ title: aiisc-watermarking-model
3
  app_file: app.py
4
  sdk: gradio
5
  sdk_version: 4.36.0
6
  ---
7
+
8
+ Clone the repository and ``cd`` into it. Run ``gradio app.py`` to start the server.
highlighter.py CHANGED
@@ -38,6 +38,50 @@ def highlight_common_words(common_words, sentences, title):
38
  </div>
39
  '''
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  import re
43
 
 
38
  </div>
39
  '''
40
 
41
+ # import re
42
+
43
+ # def highlight_common_words_dict(common_words, sentences, title, bg_color):
44
+ # color_map = {}
45
+ # color_index = 0
46
+ # highlighted_html = []
47
+
48
+ # for idx, (sentence, score) in enumerate(sentences.items(), start=1):
49
+ # sentence_with_idx = f"{idx}. {sentence}"
50
+ # highlighted_sentence = sentence_with_idx
51
+
52
+ # for index, word in common_words:
53
+ # if word not in color_map:
54
+ # color_map[word] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
55
+ # color_index += 1
56
+ # escaped_word = re.escape(word)
57
+ # pattern = rf'\b{escaped_word}\b'
58
+ # highlighted_sentence = re.sub(
59
+ # pattern,
60
+ # lambda m, idx=index, color=color_map[word]: (
61
+ # f'<span style="background-color: {color}; font-weight: bold;'
62
+ # f' padding: 1px 2px; border-radius: 2px; position: relative;">'
63
+ # f'<span style="background-color: black; color: white; border-radius: 50%;'
64
+ # f' padding: 1px 3px; margin-right: 3px; font-size: 0.8em;">{idx}</span>'
65
+ # f'{m.group(0)}'
66
+ # f'</span>'
67
+ # ),
68
+ # highlighted_sentence,
69
+ # flags=re.IGNORECASE
70
+ # )
71
+ # highlighted_html.append(
72
+ # f'<div style="margin-bottom: 5px;">'
73
+ # f'{highlighted_sentence}'
74
+ # f'<div style="display: inline-block; margin-left: 5px; border: 1px solid #ddd; padding: 3px 5px; border-radius: 3px; background-color: white; font-size: 0.9em;">'
75
+ # f'Entailment Score: {score}</div></div>'
76
+ # )
77
+
78
+ # final_html = "<br>".join(highlighted_html)
79
+ # return f'''
80
+ # <div style="border: solid 1px #; padding: 16px; background-color: {bg_color}; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 8px;">
81
+ # <h3 style="margin-top: 0; font-size: 1em; color: #111827;">{title}</h3>
82
+ # <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 8px;">{final_html}</div>
83
+ # </div>
84
+ # '''
85
 
86
  import re
87
 
lcs.py CHANGED
@@ -39,3 +39,19 @@ def find_common_subsequences(sentence, str_list):
39
 
40
  return indexed_common_grams
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  return indexed_common_grams
41
 
42
+ # Example usage
43
+ sentence = "Billie Eilish, Charli XCX and Lorde are among a group of young female pop stars who are revealing, in their music, the pressure they have felt to look thin in a time of especially punishing beauty standards."
44
+ str_list = [
45
+ 'Young female pop stars, such as Billie Eilish, Charli XCX, and Lorde, are using their music to showcase how they have been subjected to harsh beauty standards in recent times.',
46
+ 'A group of young female pop stars, such as Billie Eilish, Charli XCX, and Lorde, are using their music to showcase how they have been subjected to harsh beauty standards in recent times.',
47
+ 'The music of a group of young female pop stars, such as Billie Eilish, Charli XCX and Lorde, is revealing the pressure to appear slim in an age where beauty is highly regulated.',
48
+ 'Through their songs, young female pop stars like Billie Eilish, Charli XCX and Lordé reveal the pressure they have been subjected to in order to appear attractive in an age of strict beauty standards.',
49
+ 'A number of female pop stars, including Billie Eilish, Charli XCx, and Lorde, are using their music to showcase the pressure they have experienced in order not to look unappealing in an age where beauty is highly valued.',
50
+ "Some young female pop stars, such as Billie Eilish from the R&B Music Hall of Fame in Las Vegas and Charli XCX from Lorde from Manchester's Outer Banks, are using their music to showcase how they have been subjected to harsh beauty standards",
51
+ 'Among the group of young female pop stars who are using their music to showcase their unappealing appearance, are Billié Eilish and Charli XCX while Lorde is currently struggling to maintain her attractive looks.',
52
+ 'Young female pop icons such as Billie Eilish, Charliile XCX and Lorde are using their music to showcase the pressure they have been subjected too harshly to look attractive in an age where beauty is highly regulated.',
53
+ 'Billie Eilish, Charliile XCX and Lordé are just some of the young female pop stars who have been exposing their looks to music in an age where beauty standards are particularly hard to find.',
54
+ 'In the music industry, young female pop stars like Billie Eilish and Charliile XCx (with an average height of 160cm), as well as Lorde, are displaying how they have been subjected to harsh beauty standards.'
55
+ ]
56
+
57
+ print(find_common_subsequences(sentence, str_list))
sampling_methods.py CHANGED
@@ -129,4 +129,35 @@ def exponential_minimum_sampling(original_sentence, paraphrased_sentences):
129
  f"Paraphrased Sentence {idx+1}": sentence,
130
  "Common Substrings": common_substrings
131
  })
132
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  f"Paraphrased Sentence {idx+1}": sentence,
130
  "Common Substrings": common_substrings
131
  })
132
+ return results
133
+
134
+
135
+
136
+ #---------------------------------------------------------------------------
137
+ # aryans implementation please refactor it as you see fit
138
+
139
+ import torch
140
+ import random
141
+
142
+ def sample_word(words, logits, sampling_technique='inverse_transform', temperature=1.0):
143
+ if sampling_technique == 'inverse_transform':
144
+ probs = torch.softmax(torch.tensor(logits), dim=-1)
145
+ cumulative_probs = torch.cumsum(probs, dim=-1)
146
+ random_prob = random.random()
147
+ sampled_index = torch.where(cumulative_probs >= random_prob)[0][0]
148
+ elif sampling_technique == 'exponential_minimum':
149
+ probs = torch.softmax(torch.tensor(logits), dim=-1)
150
+ exp_probs = torch.exp(-torch.log(probs))
151
+ random_probs = torch.rand_like(exp_probs)
152
+ sampled_index = torch.argmax(random_probs * exp_probs)
153
+ elif sampling_technique == 'temperature':
154
+ scaled_logits = torch.tensor(logits) / temperature
155
+ probs = torch.softmax(scaled_logits, dim=-1)
156
+ sampled_index = torch.multinomial(probs, 1).item()
157
+ elif sampling_technique == 'greedy':
158
+ sampled_index = torch.argmax(torch.tensor(logits)).item()
159
+ else:
160
+ raise ValueError("Invalid sampling technique. Choose 'inverse_transform', 'exponential_minimum', 'temperature', or 'greedy'.")
161
+
162
+ sampled_word = words[sampled_index]
163
+ return sampled_word