Petr Tsvetkov commited on
Commit
a3d6ea6
β€’
1 Parent(s): 9d14712

- Fix the grazie api

Browse files

- Compute the average CM lengths for the dataset and for the production prompt
- Update the charts

api_wrappers/grazie_wrapper.py CHANGED
@@ -11,7 +11,7 @@ import config
11
  client = GrazieApiGatewayClient(
12
  grazie_agent=GrazieAgent("grazie-toolformers", "v1.0"),
13
  url=GrazieApiGatewayUrls.STAGING,
14
- auth_type=AuthType.SERVICE,
15
  grazie_jwt_token=config.GRAZIE_API_JWT_TOKEN
16
  )
17
 
 
11
  client = GrazieApiGatewayClient(
12
  grazie_agent=GrazieAgent("grazie-toolformers", "v1.0"),
13
  url=GrazieApiGatewayUrls.STAGING,
14
+ auth_type=AuthType.APPLICATION,
15
  grazie_jwt_token=config.GRAZIE_API_JWT_TOKEN
16
  )
17
 
chart_processing.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
generated_message_length_comparison.ipynb CHANGED
@@ -15,23 +15,28 @@
15
  "id": "77d51d55b41735cf"
16
  },
17
  {
18
- "metadata": {},
 
 
 
 
 
19
  "cell_type": "code",
20
  "source": [
21
- "!pip install grazie-api-gateway-client\n",
22
- "!pip install tqdm\n",
23
- "!pip install pandas\n",
24
- "!pip install datasets"
25
  ],
26
  "id": "91fa273e8987f6f6",
27
  "outputs": [],
28
- "execution_count": null
29
  },
30
  {
31
  "metadata": {
32
  "ExecuteTime": {
33
- "end_time": "2024-05-31T16:15:11.790663Z",
34
- "start_time": "2024-05-31T16:15:11.777607Z"
35
  }
36
  },
37
  "cell_type": "code",
@@ -44,13 +49,13 @@
44
  ],
45
  "id": "ce11a4c781c152e",
46
  "outputs": [],
47
- "execution_count": 20
48
  },
49
  {
50
  "metadata": {
51
  "ExecuteTime": {
52
- "end_time": "2024-05-31T16:15:13.223782Z",
53
- "start_time": "2024-05-31T16:15:13.207891Z"
54
  }
55
  },
56
  "cell_type": "code",
@@ -62,25 +67,41 @@
62
  "\treturn PROD_PROMPT.replace(\"$diff\", diff).replace(\"$text\", \"\")\n",
63
  "\n",
64
  "def generate_commit_message_prod(diff):\n",
65
- "\tgenerate_for_prompt(prod_prompt(diff))"
66
  ],
67
  "id": "84a769c8765a7b64",
68
  "outputs": [],
69
- "execution_count": 21
70
  },
71
  {
72
- "metadata": {},
 
 
 
 
 
73
  "cell_type": "code",
74
  "source": "generate_commit_message_prod(\"TEST\")",
75
  "id": "af2f20def94b0490",
76
- "outputs": [],
77
- "execution_count": null
 
 
 
 
 
 
 
 
 
 
 
78
  },
79
  {
80
  "metadata": {
81
  "ExecuteTime": {
82
- "end_time": "2024-05-31T16:15:24.167706Z",
83
- "start_time": "2024-05-31T16:15:16.619781Z"
84
  }
85
  },
86
  "cell_type": "code",
@@ -90,6 +111,16 @@
90
  ],
91
  "id": "a49cabf576c9d692",
92
  "outputs": [
 
 
 
 
 
 
 
 
 
 
93
  {
94
  "data": {
95
  "text/plain": [
@@ -161,26 +192,39 @@
161
  "</div>"
162
  ]
163
  },
164
- "execution_count": 22,
165
  "metadata": {},
166
  "output_type": "execute_result"
167
  }
168
  ],
169
- "execution_count": 22
170
  },
171
  {
172
- "metadata": {},
 
 
 
 
 
173
  "cell_type": "code",
174
  "source": "DATA[\"prediction_prod\"] = DATA.progress_apply(lambda row: generate_commit_message_prod(str(row[\"diff\"])), axis=1)",
175
  "id": "9ded493e087f991d",
176
- "outputs": [],
177
- "execution_count": null
 
 
 
 
 
 
 
 
178
  },
179
  {
180
  "metadata": {
181
  "ExecuteTime": {
182
- "end_time": "2024-05-31T16:15:24.183660Z",
183
- "start_time": "2024-05-31T16:15:24.170584Z"
184
  }
185
  },
186
  "cell_type": "code",
@@ -198,26 +242,52 @@
198
  ]
199
  }
200
  ],
201
- "execution_count": 23
202
  },
203
  {
204
- "metadata": {},
 
 
 
 
 
205
  "cell_type": "code",
206
  "source": [
207
  "prod_avg_length = DATA[\"prediction_prod\"].str.len().mean()\n",
208
  "print(f\"Prod average length: {prod_avg_length}\")"
209
  ],
210
  "id": "ec8b4412410794a4",
211
- "outputs": [],
212
- "execution_count": null
 
 
 
 
 
 
 
 
213
  },
214
  {
215
- "metadata": {},
 
 
 
 
 
216
  "cell_type": "code",
217
  "source": "print(f\"Length ratio (current / prod): {current_avg_length / prod_avg_length})\")",
218
  "id": "10f087784896eca3",
219
- "outputs": [],
220
- "execution_count": null
 
 
 
 
 
 
 
 
221
  }
222
  ],
223
  "metadata": {
 
15
  "id": "77d51d55b41735cf"
16
  },
17
  {
18
+ "metadata": {
19
+ "ExecuteTime": {
20
+ "end_time": "2024-06-20T16:09:07.968406Z",
21
+ "start_time": "2024-06-20T16:09:07.955405Z"
22
+ }
23
+ },
24
  "cell_type": "code",
25
  "source": [
26
+ "# !pip install grazie-api-gateway-client\n",
27
+ "# !pip install tqdm\n",
28
+ "# !pip install pandas\n",
29
+ "# !pip install datasets"
30
  ],
31
  "id": "91fa273e8987f6f6",
32
  "outputs": [],
33
+ "execution_count": 1
34
  },
35
  {
36
  "metadata": {
37
  "ExecuteTime": {
38
+ "end_time": "2024-06-20T16:09:10.353479Z",
39
+ "start_time": "2024-06-20T16:09:07.970405Z"
40
  }
41
  },
42
  "cell_type": "code",
 
49
  ],
50
  "id": "ce11a4c781c152e",
51
  "outputs": [],
52
+ "execution_count": 2
53
  },
54
  {
55
  "metadata": {
56
  "ExecuteTime": {
57
+ "end_time": "2024-06-20T16:09:10.368996Z",
58
+ "start_time": "2024-06-20T16:09:10.354434Z"
59
  }
60
  },
61
  "cell_type": "code",
 
67
  "\treturn PROD_PROMPT.replace(\"$diff\", diff).replace(\"$text\", \"\")\n",
68
  "\n",
69
  "def generate_commit_message_prod(diff):\n",
70
+ "\treturn generate_for_prompt(prod_prompt(diff))"
71
  ],
72
  "id": "84a769c8765a7b64",
73
  "outputs": [],
74
+ "execution_count": 3
75
  },
76
  {
77
+ "metadata": {
78
+ "ExecuteTime": {
79
+ "end_time": "2024-06-20T16:09:10.384590Z",
80
+ "start_time": "2024-06-20T16:09:10.371410Z"
81
+ }
82
+ },
83
  "cell_type": "code",
84
  "source": "generate_commit_message_prod(\"TEST\")",
85
  "id": "af2f20def94b0490",
86
+ "outputs": [
87
+ {
88
+ "data": {
89
+ "text/plain": [
90
+ "\"Certainly! I'll need to see the specific code differences (diffs) you would like to have summarized into a commit message. Please provide the diffs so I can assist you properly.\""
91
+ ]
92
+ },
93
+ "execution_count": 4,
94
+ "metadata": {},
95
+ "output_type": "execute_result"
96
+ }
97
+ ],
98
+ "execution_count": 4
99
  },
100
  {
101
  "metadata": {
102
  "ExecuteTime": {
103
+ "end_time": "2024-06-20T16:09:22.224167Z",
104
+ "start_time": "2024-06-20T16:09:10.388409Z"
105
  }
106
  },
107
  "cell_type": "code",
 
111
  ],
112
  "id": "a49cabf576c9d692",
113
  "outputs": [
114
+ {
115
+ "name": "stderr",
116
+ "output_type": "stream",
117
+ "text": [
118
+ "Using the latest cached version of the dataset since JetBrains-Research/lca-commit-message-generation couldn't be found on the Hugging Face Hub\n",
119
+ "Found the latest cached dataset configuration 'commitchronicle-py-long' at cache\\JetBrains-Research___lca-commit-message-generation\\commitchronicle-py-long\\0.0.0\\58dcef83a63cccebacd3e786afd73181cc9175e5 (last modified on Sun Apr 7 11:16:22 2024).\n",
120
+ "Using the latest cached version of the dataset since JetBrains-Research/lca-results couldn't be found on the Hugging Face Hub\n",
121
+ "Found the latest cached dataset configuration 'cmg_gpt_4_0613' at cache\\JetBrains-Research___lca-results\\cmg_gpt_4_0613\\0.0.0\\4b56bbf7243da371b3e0a42a0c9db1f37af98c39 (last modified on Fri May 31 16:00:33 2024).\n"
122
+ ]
123
+ },
124
  {
125
  "data": {
126
  "text/plain": [
 
192
  "</div>"
193
  ]
194
  },
195
+ "execution_count": 5,
196
  "metadata": {},
197
  "output_type": "execute_result"
198
  }
199
  ],
200
+ "execution_count": 5
201
  },
202
  {
203
+ "metadata": {
204
+ "ExecuteTime": {
205
+ "end_time": "2024-06-20T16:21:20.410778Z",
206
+ "start_time": "2024-06-20T16:09:22.227258Z"
207
+ }
208
+ },
209
  "cell_type": "code",
210
  "source": "DATA[\"prediction_prod\"] = DATA.progress_apply(lambda row: generate_commit_message_prod(str(row[\"diff\"])), axis=1)",
211
  "id": "9ded493e087f991d",
212
+ "outputs": [
213
+ {
214
+ "name": "stderr",
215
+ "output_type": "stream",
216
+ "text": [
217
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 163/163 [11:58<00:00, 4.41s/it]\n"
218
+ ]
219
+ }
220
+ ],
221
+ "execution_count": 6
222
  },
223
  {
224
  "metadata": {
225
  "ExecuteTime": {
226
+ "end_time": "2024-06-20T16:21:20.426781Z",
227
+ "start_time": "2024-06-20T16:21:20.414781Z"
228
  }
229
  },
230
  "cell_type": "code",
 
242
  ]
243
  }
244
  ],
245
+ "execution_count": 7
246
  },
247
  {
248
+ "metadata": {
249
+ "ExecuteTime": {
250
+ "end_time": "2024-06-20T16:21:20.442017Z",
251
+ "start_time": "2024-06-20T16:21:20.429913Z"
252
+ }
253
+ },
254
  "cell_type": "code",
255
  "source": [
256
  "prod_avg_length = DATA[\"prediction_prod\"].str.len().mean()\n",
257
  "print(f\"Prod average length: {prod_avg_length}\")"
258
  ],
259
  "id": "ec8b4412410794a4",
260
+ "outputs": [
261
+ {
262
+ "name": "stdout",
263
+ "output_type": "stream",
264
+ "text": [
265
+ "Prod average length: 352.88957055214723\n"
266
+ ]
267
+ }
268
+ ],
269
+ "execution_count": 8
270
  },
271
  {
272
+ "metadata": {
273
+ "ExecuteTime": {
274
+ "end_time": "2024-06-20T16:21:20.457884Z",
275
+ "start_time": "2024-06-20T16:21:20.444852Z"
276
+ }
277
+ },
278
  "cell_type": "code",
279
  "source": "print(f\"Length ratio (current / prod): {current_avg_length / prod_avg_length})\")",
280
  "id": "10f087784896eca3",
281
+ "outputs": [
282
+ {
283
+ "name": "stdout",
284
+ "output_type": "stream",
285
+ "text": [
286
+ "Length ratio (current / prod): 1.772691712591923)\n"
287
+ ]
288
+ }
289
+ ],
290
+ "execution_count": 9
291
  }
292
  ],
293
  "metadata": {