concedo commited on
Commit
54366a7
1 Parent(s): b8328d4

Clone From https://github.com/LostRuins/datasetexplorer

Browse files
Files changed (1) hide show
  1. index.html +1043 -18
index.html CHANGED
@@ -1,19 +1,1044 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <!--
4
+ This is a JSON Dataset Explorer (Viewer and Editor) made by Concedo/LostRuins
5
+ Please go to https://github.com/LostRuins/DatasetExplorer for updates
6
+ This software is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
7
+ -Concedo
8
+ -->
9
+
10
+ <head>
11
+ <style>
12
+ html {
13
+ font-family: sans-serif;
14
+ -ms-text-size-adjust: 100%;
15
+ -webkit-text-size-adjust: 100%;
16
+ font-size: 10px;
17
+ -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
18
+ }
19
+
20
+ * {
21
+ -webkit-box-sizing: border-box;
22
+ -moz-box-sizing: border-box;
23
+ box-sizing: border-box
24
+ }
25
+
26
+ :after,
27
+ :before {
28
+ -webkit-box-sizing: border-box;
29
+ -moz-box-sizing: border-box;
30
+ box-sizing: border-box
31
+ }
32
+
33
+ .cb
34
+ {
35
+ width:18px;
36
+ height:18px;
37
+ }
38
+
39
+ body {
40
+ margin: 0;
41
+ font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
42
+ font-size: 15px;
43
+ line-height: 1.42857143;
44
+ color: #333;
45
+ background-color: #fff;
46
+ }
47
+
48
+ button,
49
+ input,
50
+ select,
51
+ textarea {
52
+ font-family: inherit;
53
+ font-size: inherit;
54
+ line-height: inherit;
55
+ }
56
+
57
+ a {
58
+ color: #337ab7;
59
+ text-decoration: none;
60
+ }
61
+
62
+ a:focus,
63
+ a:hover {
64
+ color: #23527c;
65
+ text-decoration: underline;
66
+ }
67
+
68
+ a:focus {
69
+ outline: 5px auto -webkit-focus-ring-color;
70
+ outline-offset: -2px;
71
+ }
72
+
73
+ img {
74
+ vertical-align: middle;
75
+ }
76
+
77
+ .nest1
78
+ {
79
+ color:rgb(89, 58, 202);
80
+ }
81
+
82
+ .nest2
83
+ {
84
+ color:rgb(197, 69, 69);
85
+ }
86
+
87
+ .unselectable {
88
+ -webkit-touch-callout: none !important;
89
+ -webkit-user-select: none !important;
90
+ -khtml-user-select: none !important;
91
+ -moz-user-select: none !important;
92
+ -ms-user-select: none !important;
93
+ user-select: none !important;
94
+ }
95
+
96
+ .navbar {
97
+ background-color: #333;
98
+ overflow: hidden;
99
+ position: relative;
100
+ top: 0;
101
+ width: 100%;
102
+ }
103
+
104
+ /* Links inside the navbar */
105
+ .navbar a {
106
+ float: left;
107
+ display: block;
108
+ color: white;
109
+ text-align: center;
110
+ padding: 14px 20px;
111
+ text-decoration: none;
112
+ }
113
+
114
+ /* Change color on hover */
115
+ .navbar a:hover {
116
+ background-color: #ddd;
117
+ color: black;
118
+ }
119
+
120
+ </style>
121
+ <title>Concedo JSON Dataset Explorer</title>
122
+
123
+ <script>
124
+ var db = [];
125
+ var lastleftitem = null;
126
+ var lastselectedidx = 0;
127
+ var currdbidxs = [];
128
+ var selecteddb = {};
129
+
130
+ function formatHtml(unsafe)
131
+ {
132
+ if(!unsafe){return "";}
133
+ return unsafe.toString()
134
+ .replace(/&/g, "&amp;")
135
+ .replace(/</g, "&lt;")
136
+ .replace(/>/g, "&gt;")
137
+ .replace(/"/g, "&quot;")
138
+ .replace(/'/g, "&#039;")
139
+ .replace(/\n/g, '<br>')
140
+ .replace(/\t/g, '&nbsp;&nbsp;&nbsp;&nbsp;');
141
+
142
+ }
143
+ function escapeRegExp(string) {
144
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
145
+ }
146
+ function isArr(obj) {
147
+ // checks for null and undefined
148
+ if (obj == null) {
149
+ return false;
150
+ }
151
+ return (obj.constructor === Array)
152
+ }
153
+ function nextitm()
154
+ {
155
+ if(lastleftitem!=null)
156
+ {
157
+ let sib = lastleftitem.nextSibling;
158
+ if(sib)
159
+ {
160
+ clickItem(sib);
161
+ }
162
+ }
163
+ }
164
+ function clickItem(titleDiv)
165
+ {
166
+ let index = titleDiv.getAttribute('data-index');
167
+ const contentsDiv = document.getElementById('contents');
168
+ lastselectedidx = index;
169
+ contentsDiv.innerHTML = renderPage(db[index],true).text;
170
+ contentsDiv.scrollTop = 0;
171
+ if (lastleftitem) {
172
+ lastleftitem.style.backgroundColor = null;
173
+ }
174
+ lastleftitem = titleDiv;
175
+ titleDiv.style.backgroundColor = '#d3d3d3';
176
+ }
177
+ function countsel()
178
+ {
179
+ let count = 0;
180
+ for (k in selecteddb) if (selecteddb[k]) ++count;
181
+ document.getElementById("selcounter").innerText = `${count} Selected`;
182
+ }
183
+ function selitm(idx)
184
+ {
185
+ let box = document.getElementById(`itm${idx}`);
186
+ if(box)
187
+ {
188
+ selecteddb[idx] = box.checked;
189
+ }
190
+ }
191
+ function selrange(isSel)
192
+ {
193
+ let selstart = 0;
194
+ let selend = currdbidxs.length;
195
+ if(document.getElementById("selectamt").value!="")
196
+ {
197
+ let sv = document.getElementById("selectamt").value;
198
+ if(sv.includes("-"))
199
+ {
200
+ let svs = sv.split("-");
201
+ if(svs.length==2 && svs[0]!="" && svs[1]!="")
202
+ {
203
+ selstart = Math.min(svs[0],selend);
204
+ selend = Math.min(svs[1],selend);
205
+ }
206
+ }
207
+ else
208
+ {
209
+ selstart = 0;
210
+ selend = Math.min(selend,sv);
211
+ }
212
+ }
213
+
214
+ for(let i=selstart;i<selend;++i)
215
+ {
216
+ let box = document.getElementById(`itm${currdbidxs[i]}`);
217
+ if(box)
218
+ {
219
+ box.checked = isSel;
220
+ selitm(currdbidxs[i]);
221
+ }
222
+ }
223
+ countsel();
224
+ }
225
+
226
+ function invertsel()
227
+ {
228
+ for(let i=0;i<currdbidxs.length;++i)
229
+ {
230
+ let box = document.getElementById(`itm${currdbidxs[i]}`);
231
+ if(box)
232
+ {
233
+ box.checked = !box.checked;
234
+ selitm(currdbidxs[i]);
235
+ }
236
+ }
237
+ countsel();
238
+ }
239
+
240
+
241
+ function ngramParser(text, n) {
242
+ const words = text.split(/\s+/).filter(word => word.length > 0);
243
+ const ngrams = {};
244
+ for (let i = 0; i <= words.length - n; i++) {
245
+ const ngram = words.slice(i, i + n).join(' ');
246
+ if (ngrams[ngram]) {
247
+ ngrams[ngram]++;
248
+ } else {
249
+ ngrams[ngram] = 1;
250
+ }
251
+ }
252
+ const sortedNgrams = Object.entries(ngrams).sort((a, b) => b[1] - a[1]);
253
+ return sortedNgrams.map(entry => ({ ng: entry[0], cnt: entry[1] }));
254
+ }
255
+
256
+ function displayngram()
257
+ {
258
+ const contentsDiv = document.getElementById('contents');
259
+ let fulltxt = "";
260
+ let parts = [];
261
+ for (k in selecteddb) {
262
+ if (selecteddb[k]) {
263
+ parts.push(renderPage(db[k],false).text);
264
+ }
265
+ }
266
+ fulltxt = parts.join('\n\n');
267
+
268
+ if(fulltxt=="")
269
+ {
270
+ contentsDiv.innerText = "Error: You must select at least 1 item to calculate N-Grams!";
271
+ contentsDiv.scrollTop = 0;
272
+ return;
273
+ }
274
+
275
+ let pageText = "";
276
+ for(let i=1;i<=5;++i)
277
+ {
278
+ let res = ngramParser(fulltxt,i);
279
+ let lim = Math.min(res.length,100);
280
+ pageText += `[ ${i} Word Sequences ]\n`;
281
+ for(let j=0;j<lim;++j)
282
+ {
283
+ pageText += `${res[j].cnt} - ${res[j].ng}\n`;
284
+ }
285
+ pageText += `\n\n`;
286
+ }
287
+ contentsDiv.innerText = pageText;
288
+ contentsDiv.scrollTop = 0;
289
+ }
290
+
291
+ function endEditElem()
292
+ {
293
+ const contentsDiv = document.getElementById('contents');
294
+ if(lastselectedidx >= db.length)
295
+ {
296
+ return;
297
+ }
298
+ let ebox = document.getElementById('editbox');
299
+ if(ebox && ebox.value)
300
+ {
301
+ try {
302
+ var newitem = JSON.parse(ebox.value);
303
+ db[lastselectedidx] = newitem;
304
+ contentsDiv.innerHTML = renderPage(db[lastselectedidx],true).text;
305
+ contentsDiv.scrollTop = 0;
306
+ } catch (e) {
307
+ console.log("Error decoding text: " + e);
308
+ }
309
+ }
310
+ }
311
+
312
+ function editElem()
313
+ {
314
+ if(lastselectedidx >= db.length)
315
+ {
316
+ return;
317
+ }
318
+ let itm = db[lastselectedidx];
319
+ let itmstr = JSON.stringify(itm,null,2);
320
+ const contentsDiv = document.getElementById('contents');
321
+ contentsDiv.innerHTML = `<button class="unselectable" onclick="return endEditElem()" style="float:right;">Save Edits</button><textarea style="line-height:1.1;width:100%;height:calc(100% - 54px);resize: none;" id="editbox" placeholder="" rows="15"></textarea>`;
322
+ document.getElementById('editbox').value = itmstr;
323
+ contentsDiv.scrollTop = 0;
324
+ }
325
+
326
+ function renderPage(item, useHtml) {
327
+ let tot = "";
328
+ let nodecount = 0;
329
+ let wordcount = 0;
330
+ if(useHtml)
331
+ {
332
+ tot += `<button class="unselectable" onclick="return editElem()" style="float:right;">Edit Item</button>`;
333
+ }
334
+ for (let key in item) {
335
+ if (item.hasOwnProperty(key)) {
336
+ let val = item[key];
337
+ let valmod = false;
338
+ if (isArr(val)) {
339
+ let v2 = "";
340
+ for (let key2 in val) {
341
+ let arritem = val[key2];
342
+ if (arritem.constructor == Object) { //dict test
343
+ for (let key3 in arritem) {
344
+ if (arritem.hasOwnProperty(key3)) {
345
+ let val3 = arritem[key3];
346
+ nodecount += 1;
347
+ if (useHtml) {
348
+ v2 += `<span class='nest2'>[[${formatHtml(key3)}]]</span><br>${formatHtml(val3)}<br><br>`;
349
+ } else {
350
+ v2 += `\n\n${val3}`;
351
+ }
352
+ }
353
+ }
354
+ }
355
+ else if(arritem.constructor == String)
356
+ {
357
+ if (useHtml) {
358
+ v2 += `<br>${formatHtml(arritem)}`;
359
+ } else {
360
+ v2 += `\n${arritem}`;
361
+ }
362
+ }
363
+ }
364
+ valmod = true;
365
+ val = v2;
366
+ }
367
+ else if (val && val.constructor == Object) {
368
+ let v2 = "";
369
+ for (let key3 in val) {
370
+ if (val.hasOwnProperty(key3)) {
371
+ let val3 = val[key3];
372
+ nodecount += 1;
373
+ if (useHtml) {
374
+ v2 += `<span class='nest2'>[[${formatHtml(key3)}]]</span><br>${formatHtml(val3)}<br><br>`;
375
+ } else {
376
+ v2 += `\n\n${val3}`;
377
+ }
378
+ }
379
+ }
380
+ valmod = true;
381
+ val = v2;
382
+ }
383
+ nodecount += 1;
384
+ if (useHtml) {
385
+ tot += `<span class='nest1'>[${formatHtml(key)}]</span><br>${valmod ? val : formatHtml(val)}<br><br>`;
386
+ } else {
387
+ tot += `\n\n${val}`;
388
+ }
389
+
390
+ }
391
+ }
392
+
393
+ if(document.getElementById('minwords').value != "" || document.getElementById('maxwords').value != "" || document.getElementById('showdetail').value=="words")
394
+ {
395
+ wordcount = countWords(tot);
396
+ }
397
+ return {"text":tot,"nodes":nodecount,"words":wordcount};
398
+ }
399
+
400
+ function clearInputs()
401
+ {
402
+ document.getElementById('minlen').value = "";
403
+ document.getElementById('maxlen').value = "";
404
+ document.getElementById('minnodes').value = "";
405
+ document.getElementById('maxnodes').value = "";
406
+ document.getElementById('minwords').value = "";
407
+ document.getElementById('maxwords').value = "";
408
+ document.getElementById('minmatches').value = "";
409
+ document.getElementById('maxmatches').value = "";
410
+ document.getElementById('search').value = "";
411
+ document.getElementById('casesensitive').checked = false;
412
+ document.getElementById('useregex').checked = false;
413
+ document.getElementById('selectamt').value = "";
414
+ }
415
+
416
+ function displayTitles() {
417
+ const tocDiv = document.getElementById('toc');
418
+ const contentsDiv = document.getElementById('contents');
419
+ const minlen = document.getElementById('minlen').value;
420
+ const maxlen = document.getElementById('maxlen').value;
421
+ const minnodes = document.getElementById('minnodes').value;
422
+ const maxnodes = document.getElementById('maxnodes').value;
423
+ const minwords = document.getElementById('minwords').value;
424
+ const maxwords = document.getElementById('maxwords').value;
425
+ const minmatches = document.getElementById('minmatches').value;
426
+ const maxmatches = document.getElementById('maxmatches').value;
427
+ let search = document.getElementById('search').value;
428
+
429
+ contentsDiv.innerText = "";
430
+
431
+ tocDiv.innerText = ''; // Clear previous titles
432
+ let ascindex = 0;
433
+ currdbidxs = [];
434
+
435
+ if (search != "") {
436
+ if (!document.getElementById('useregex').checked) {
437
+ search = escapeRegExp(search);
438
+ }
439
+ }
440
+
441
+ db.forEach((item, index) => {
442
+ const titleDiv = document.createElement('div');
443
+
444
+ let rendered = renderPage(item,false);
445
+ let len = rendered.text.length;
446
+ let matchescount = 0;
447
+
448
+ if (minlen != "") {
449
+ if (len < minlen) {
450
+ return;
451
+ }
452
+ }
453
+ if (maxlen != "") {
454
+ if (len > maxlen) {
455
+ return;
456
+ }
457
+ }
458
+ if (minnodes != "") {
459
+ if (rendered.nodes < minnodes) {
460
+ return;
461
+ }
462
+ }
463
+ if (maxnodes != "") {
464
+ if (rendered.nodes > maxnodes) {
465
+ return;
466
+ }
467
+ }
468
+ if (minwords != "") {
469
+ if (rendered.words < minwords) {
470
+ return;
471
+ }
472
+ }
473
+ if (maxwords != "") {
474
+ if (rendered.words > maxwords) {
475
+ return;
476
+ }
477
+ }
478
+ if (search != "") {
479
+ var exp = new RegExp(search, `g${document.getElementById('casesensitive').checked?"":"i"}`);
480
+ let m = rendered.text.match(exp);
481
+ if(!m)
482
+ {
483
+ return;
484
+ }
485
+ matchescount = m.length;
486
+ }
487
+ if (search != "" && minmatches != "") {
488
+ if (matchescount < minmatches) {
489
+ return;
490
+ }
491
+ }
492
+ if (search != "" && maxmatches != "") {
493
+ if (matchescount > maxmatches) {
494
+ return;
495
+ }
496
+ }
497
+ let shownvalue = "";
498
+ switch(document.getElementById('showdetail').value)
499
+ {
500
+ case "chars":
501
+ shownvalue = len;
502
+ break;
503
+ case "words":
504
+ shownvalue = rendered.words;
505
+ break;
506
+ case "turns":
507
+ shownvalue = rendered.nodes;
508
+ break;
509
+ case "matches":
510
+ shownvalue = matchescount;
511
+ break;
512
+ }
513
+ titleDiv.innerHTML = `<input class="cb" type="checkbox" id="itm${index}" onclick="selitm(${index});countsel();" ${selecteddb[index]?"checked":""}> ${index} - ${shownvalue}`;
514
+ titleDiv.style.cursor = 'pointer';
515
+ titleDiv.setAttribute('data-value',shownvalue);
516
+ titleDiv.setAttribute('data-index',index);
517
+ titleDiv.addEventListener('click', function () {
518
+ clickItem(titleDiv);
519
+ });
520
+ tocDiv.appendChild(titleDiv);
521
+ currdbidxs.push(index);
522
+ ascindex += 1;
523
+ });
524
+
525
+ let sorter = document.getElementById('sortorder').value;
526
+ if(sorter=="1" || sorter=="2")
527
+ {
528
+ let asc = (sorter=="2");
529
+ const childDivs = Array.from(tocDiv.children);
530
+ if(asc)
531
+ {
532
+ childDivs.sort((a, b) => {
533
+ return parseInt(a.getAttribute('data-value')) - parseInt(b.getAttribute('data-value'));
534
+ });
535
+ } else {
536
+ childDivs.sort((b, a) => {
537
+ return parseInt(a.getAttribute('data-value')) - parseInt(b.getAttribute('data-value'));
538
+ });
539
+ }
540
+ currdbidxs = [];
541
+ childDivs.forEach(child => {
542
+ tocDiv.appendChild(child);
543
+ currdbidxs.push(child.getAttribute('data-index'));
544
+ });
545
+ }
546
+
547
+ document.getElementById('rescounter').innerText = `${ascindex} Results`;
548
+
549
+ }
550
+
551
+ var tempfileurl = null;
552
+ var tempfileobj = [];
553
+ function erasesel()
554
+ {
555
+ const contentsDiv = document.getElementById('contents');
556
+ tempfileobj = [];
557
+ for(let i=0;i<currdbidxs.length;++i)
558
+ {
559
+ let ci = currdbidxs[i];
560
+ let box = document.getElementById(`itm${ci}`);
561
+ if(box && box.checked)
562
+ {
563
+ tempfileobj.push(db[ci]);
564
+ }
565
+ }
566
+
567
+ if(tempfileobj.length==0)
568
+ {
569
+ contentsDiv.innerText = "Error: You must select at least 1 item to erase!";
570
+ contentsDiv.scrollTop = 0;
571
+ return;
572
+ }
573
+
574
+ db = db.filter( ( el ) => !tempfileobj.includes( el ) );
575
+ selecteddb = {};
576
+ countsel();
577
+ displayTitles();
578
+ }
579
+ function prunesel()
580
+ {
581
+ const contentsDiv = document.getElementById('contents');
582
+ tempfileobj = [];
583
+ for(let i=0;i<currdbidxs.length;++i)
584
+ {
585
+ let ci = currdbidxs[i];
586
+ let box = document.getElementById(`itm${ci}`);
587
+ if(box && box.checked)
588
+ {
589
+ tempfileobj.push(db[ci]);
590
+ }
591
+ }
592
+
593
+ if(tempfileobj.length==0)
594
+ {
595
+ contentsDiv.innerText = "Error: You must select at least 1 item to keep!";
596
+ contentsDiv.scrollTop = 0;
597
+ return;
598
+ }
599
+
600
+ db = tempfileobj;
601
+ selecteddb = {};
602
+ countsel();
603
+ displayTitles();
604
+ }
605
+
606
+ function exportsel()
607
+ {
608
+ const contentsDiv = document.getElementById('contents');
609
+ tempfileobj = [];
610
+ for(let i=0;i<currdbidxs.length;++i)
611
+ {
612
+ let ci = currdbidxs[i];
613
+ let box = document.getElementById(`itm${ci}`);
614
+ if(box && box.checked)
615
+ {
616
+ tempfileobj.push(db[ci]);
617
+ }
618
+ }
619
+
620
+ if(tempfileobj.length==0)
621
+ {
622
+ contentsDiv.innerText = "Error: Please select at least 1 sample to export!";
623
+ return;
624
+ }
625
+
626
+ var a = document.getElementById("tempfile");
627
+ var file = null;
628
+ try {
629
+ file = new Blob([JSON.stringify(tempfileobj,null,2)], { type: 'application/json' });
630
+ if (tempfileurl) {
631
+ window.URL.revokeObjectURL(tempfileurl);
632
+ }
633
+ tempfileurl = window.URL.createObjectURL(file);
634
+ a.href = tempfileurl;
635
+ a.target = '_blank';
636
+ a.download = "selected_dataset.json";
637
+ setTimeout(function(){a.click()},20);
638
+ } catch (e) {
639
+ contentsDiv.innerText = "File could not be saved. It might be too large.\nTry splitting into multiple files instead.\n\n"+e;
640
+ console.log("Error saving: " + e);
641
+ }
642
+ }
643
+
644
+ let lastfile = null;
645
+ function loadfile()
646
+ {
647
+ if(!lastfile)
648
+ {
649
+ return;
650
+ }
651
+ const replace_existing = (document.getElementById('appendfile').value=="0");
652
+ const merge_existing = (document.getElementById('appendfile').value=="1");
653
+ const subtract_existing = (document.getElementById('appendfile').value=="2");
654
+ const contentsDiv = document.getElementById('contents');
655
+ const dedup = (document.getElementById('dedup').value=="1");
656
+ let newdb = [];
657
+ var filesizemb = lastfile.size/(1024*1024);
658
+
659
+ function postLoad()
660
+ {
661
+ if(replace_existing)
662
+ {
663
+ db = newdb;
664
+ }else if(merge_existing){
665
+ for(let s in newdb)
666
+ {
667
+ db.push(newdb[s]);
668
+ }
669
+ }
670
+ else
671
+ {
672
+ const set = new Set(db.map(item => JSON.stringify(item)));
673
+ for(let s in newdb)
674
+ {
675
+ let newitm = JSON.stringify(newdb[s]);
676
+ if(set.has(newitm))
677
+ {
678
+ set.delete(newitm);
679
+ }
680
+ }
681
+ db = Array.from(set).map(item => JSON.parse(item));
682
+ }
683
+ if(dedup && !subtract_existing)
684
+ {
685
+ const set = new Set(db.map(item => JSON.stringify(item)));
686
+ db = Array.from(set).map(item => JSON.parse(item));
687
+ }
688
+ selecteddb = {};
689
+ countsel();
690
+ displayTitles();
691
+ contentsDiv.innerText = "Dataset File Loaded\nSelect an item to browse contents.";
692
+ }
693
+
694
+ async function streamJsonFile(file) {
695
+ const CHUNK_SIZE = 32 * 1024 * 1024; // 32 MB chunks
696
+ const decoder = new TextDecoder('utf-8');
697
+ const reader = file.stream().getReader();
698
+ const elemrange = (document.getElementById('elemrange').value);
699
+ let elemstart = 0;
700
+ let elemend = 999999999;
701
+ let elemcounter = 0;
702
+ if(elemrange!="")
703
+ {
704
+ if(elemrange.includes("-"))
705
+ {
706
+ let svs = elemrange.split("-");
707
+ if(svs.length==2 && svs[0]!="" && svs[1]!="")
708
+ {
709
+ elemstart = Math.min(svs[0],elemend);
710
+ elemend = Math.min(svs[1],elemend);
711
+ }
712
+ }
713
+ else
714
+ {
715
+ elemstart = 0;
716
+ elemend = Math.min(elemend,elemrange);
717
+ }
718
+ }
719
+
720
+ let buffer = '';
721
+ let accusize = 0;
722
+ let accu = [];
723
+ newdb = [];
724
+ let lastloop = false;
725
+ let errs = null;
726
+
727
+ while (true) {
728
+ const { done, value } = await reader.read();
729
+
730
+ // Decode the chunk to a string
731
+ const chunk = decoder.decode(value, { stream: true });
732
+ accu.push(chunk);
733
+ if (done) {
734
+ lastloop = true;
735
+ }
736
+ accusize += chunk.length;
737
+
738
+ if(accusize<CHUNK_SIZE && !lastloop)
739
+ {
740
+ continue;
741
+ }
742
+
743
+ buffer += accu.join("");
744
+ accu = [];
745
+ accusize = 0;
746
+
747
+ // Process each character in the buffer
748
+ let objstart = 0;
749
+ let objend = 0;
750
+ let bl = buffer.length;
751
+ let inObject = false;
752
+ let inString = false;
753
+ let escapeNextChar = false;
754
+ let curlyBraceCount = 0;
755
+ for (let i = 0; i < bl; i++) {
756
+ const char = buffer[i];
757
+
758
+ if (char === '\\' && inString) {
759
+ escapeNextChar = !escapeNextChar;
760
+ continue;
761
+ }
762
+ if (char === '"' && !escapeNextChar) {
763
+ inString = !inString;
764
+ }
765
+ escapeNextChar = false;
766
+ if (inString) {
767
+ continue;
768
+ }
769
+
770
+ if (char === '{') {
771
+ if (!inObject) {
772
+ inObject = true;
773
+ objstart = i;
774
+ }
775
+ curlyBraceCount++;
776
+ } else if (char === '}') {
777
+ curlyBraceCount--;
778
+ }
779
+
780
+ // When curlyBraceCount returns to 0, we have a complete object
781
+ if (inObject && curlyBraceCount === 0) {
782
+ let jsonstr = buffer.slice(objstart, i + 1);
783
+ objend = i;
784
+ inObject = false;
785
+ try{
786
+ const jsonObject = JSON.parse(jsonstr);
787
+ if(elemcounter>=elemstart && elemcounter<elemend)
788
+ {
789
+ newdb.push(jsonObject);
790
+ }
791
+ ++elemcounter;
792
+ }catch(ex)
793
+ {
794
+ errs = ex;
795
+ console.log(ex);
796
+ }
797
+ }
798
+ }
799
+ if(objstart>0 && objend>0)
800
+ {
801
+ inObject = false;
802
+ buffer = buffer.slice(objend+1); // Remove the processed object from the buffer
803
+ if(newdb.length>=(elemend-elemstart))
804
+ {
805
+ errs = `Loading stopped at ${newdb.length} items. Specified range exceeded.`
806
+ lastloop = true;
807
+ }
808
+ }
809
+ if(lastloop)
810
+ {
811
+ break;
812
+ }
813
+ }
814
+
815
+ if(newdb.length>0)
816
+ {
817
+ postLoad();
818
+ if(errs)
819
+ {
820
+ contentsDiv.innerText = `Dataset partially loaded.\nInterruption occurred while loading dataset file: ${lastfile.name}\n${errs}`;
821
+ }
822
+ }else{
823
+ contentsDiv.innerText = `Error: Could not load dataset file: ${lastfile.name}`;
824
+ }
825
+
826
+ }
827
+
828
+ const reader = new FileReader();
829
+ reader.onload = function (e) {
830
+ let loadok = false;
831
+ try
832
+ {
833
+ //try normal json
834
+ newdb = JSON.parse(e.target.result);
835
+ loadok = true;
836
+ }
837
+ catch(err)
838
+ {
839
+ console.log(err);
840
+ //try jsonl
841
+ try
842
+ {
843
+ const content = e.target.result;
844
+ if(content)
845
+ {
846
+ const lines = content.split('\n');
847
+ newdb = lines.filter(line => line.trim()).map(line => JSON.parse(line));
848
+ loadok = true;
849
+ }
850
+ }
851
+ catch(err2)
852
+ {
853
+ console.log(err2);
854
+ }
855
+ }
856
+
857
+ if(loadok)
858
+ {
859
+ const elemrange = (document.getElementById('elemrange').value);
860
+ let elemstart = 0;
861
+ let elemend = 999999999;
862
+ let elemcounter = 0;
863
+ if(elemrange!="")
864
+ {
865
+ if(elemrange.includes("-"))
866
+ {
867
+ let svs = elemrange.split("-");
868
+ if(svs.length==2 && svs[0]!="" && svs[1]!="")
869
+ {
870
+ elemstart = Math.min(svs[0],elemend);
871
+ elemend = Math.min(svs[1],elemend);
872
+ }
873
+ }
874
+ else
875
+ {
876
+ elemstart = 0;
877
+ elemend = Math.min(elemend,elemrange);
878
+ }
879
+ }
880
+ newdb = newdb.slice(elemstart, elemend);
881
+ postLoad();
882
+ }
883
+ else
884
+ {
885
+ if(filesizemb > 0.01 && (lastfile.type === "application/json" || lastfile.name.endsWith(".jsonl")))
886
+ {
887
+ //try big json
888
+ streamJsonFile(lastfile);
889
+ contentsDiv.innerText = `Attempting Fallback Stream-Load of Massive File:\n${lastfile.name} (${filesizemb.toFixed(0)} mb).\nPlease wait, this may take a long time...`;
890
+ }else{
891
+ contentsDiv.innerText = `Error: Could not load dataset file: ${lastfile.name}`;
892
+ }
893
+
894
+ contentsDiv.scrollTop = 0;
895
+ }
896
+ };
897
+
898
+ if (filesizemb > 999) //files larger than 1gb just use streamload
899
+ {
900
+ //try big json
901
+ streamJsonFile(lastfile);
902
+ contentsDiv.innerText = `Attempting Stream-Load of Massive File:\n${lastfile.name} (${filesizemb.toFixed(0)} mb)\nPlease wait, this may take a long time...`;
903
+ } else {
904
+ reader.readAsText(lastfile);
905
+ }
906
+
907
+ }
908
+ function clearfile()
909
+ {
910
+ lastfile = null;
911
+ document.getElementById('pickfile').value = "";
912
+ }
913
+ function countWords(str) {
914
+ if (str == "") { return 0; }
915
+ const wordPattern = /[a-zA-Z0-9_]+/g;
916
+ const words = str.match(wordPattern);
917
+ if (!words) {
918
+ return 0;
919
+ }
920
+ return words.length;
921
+ }
922
+ function countOccurances(str,search)
923
+ {
924
+ var exp = new RegExp(search, `g${document.getElementById('casesensitive').checked?"":"i"}`);
925
+ var count = (temp.match(exp) || []).length;
926
+ return count;
927
+ }
928
+
929
+ function togglepanel(id)
930
+ {
931
+ const p1 = document.getElementById('panel1');
932
+ const p2 = document.getElementById('panel2');
933
+ const p3 = document.getElementById('panel3');
934
+ const p4 = document.getElementById('panel4');
935
+ p1.style.display = p2.style.display = p3.style.display = p4.style.display = "none";
936
+ document.getElementById(`panel${id}`).style.display = "block";
937
+ }
938
+
939
+ document.addEventListener("DOMContentLoaded", function () {
940
+ const fileInput = document.getElementById('pickfile');
941
+
942
+ fileInput.addEventListener('change', function (event) {
943
+ lastfile = event.target.files[0];
944
+ loadfile();
945
+ });
946
+ });
947
+
948
+ </script>
949
+ </head>
950
+
951
+ <body>
952
+ <div class="navbar">
953
+ <a href="#" onclick="togglepanel(1)">Importing</a>
954
+ <a href="#" onclick="togglepanel(2)">Filtering</a>
955
+ <a href="#" onclick="togglepanel(3)">Selecting</a>
956
+ <a href="#" onclick="togglepanel(4)">Exporting</a>
957
+ <a style="float: right;" href="https://github.com/LostRuins/DatasetExplorer">Concedo JSON Dataset Explorer</a>
958
+ </div>
959
+
960
+ <div style="display:flex;">
961
+ <div style="padding:4px">
962
+ <div id="panel1" style="padding:2px; display: block;">
963
+ <input onclick="clearfile()" style="width: 220px;" id="pickfile" name="file" type="file" accept=".json,.jsonl">
964
+ <a id="tempfile" href="#" style="display:none;"></a>
965
+ <select id="appendfile">
966
+ <option value="0">Replace Existing</option>
967
+ <option value="1">Merge with Existing</option>
968
+ <option value="2">Subtract from Existing</option>
969
+ </select>
970
+ <select id="dedup">
971
+ <option value="0">Allow Duplicates</option>
972
+ <option value="1">Remove Duplicates</option>
973
+ </select>
974
+ LoadRange:
975
+ <input type="text" style="width:84px" placeholder="(Range: All)" value="" id="elemrange">
976
+ </div>
977
+ <div id="panel2" style="display: none;">
978
+ <div style="padding:1px">
979
+ Chars: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minlen">
980
+ <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxlen">
981
+ Turns: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minnodes">
982
+ <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxnodes">
983
+ Words: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minwords">
984
+ <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxwords">
985
+ Matches: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minmatches">
986
+ <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxmatches">
987
+ </div>
988
+ <div style="padding:2px">
989
+ <input type="text" style="width:200px" placeholder="(Search)" value="" id="search">
990
+ <input type="checkbox" id="useregex">
991
+ <label for="useregex"> Uses Regex</label>
992
+ <input type="checkbox" id="casesensitive">
993
+ <label for="casesensitive"> Case Sensitive</label>
994
+ </div>
995
+ <div style="font-size: 15px;">
996
+ <select id="showdetail">
997
+ <option value="chars">Count Characters</option>
998
+ <option value="words">Count Words</option>
999
+ <option value="turns">Count Turns</option>
1000
+ <option value="matches">Count Matches</option>
1001
+ </select>
1002
+ <select id="sortorder" >
1003
+ <option value="0">No Sort</option>
1004
+ <option value="1">Sort Descending</option>
1005
+ <option value="2">Sort Ascending</option>
1006
+ </select>
1007
+ <span style="font-size: 15px;">
1008
+ <button id="filter" onclick="displayTitles()">Filter</button>
1009
+ <button onclick="clearInputs()">Clear Filters</button>
1010
+ </span>
1011
+ </div>
1012
+
1013
+ </div>
1014
+ <div id="panel3" style="padding:2px; display: none;">
1015
+ <div>
1016
+ <input type="text" style="width:84px" placeholder="(Range: All)" value="" id="selectamt">
1017
+ <button id="selectall" onclick="selrange(true)">Select Range</button>
1018
+ <button id="deselectall" onclick="selrange(false)">Deselect Range</button>
1019
+ <button id="invertsel" onclick="invertsel()">Invert Selection</button>
1020
+ </div><div style="padding: 2px; margin-top: 6px;">
1021
+ <button onclick="erasesel()">Erase Selected</button></h3>
1022
+ <button onclick="prunesel()">Prune To Selected</button></h3>
1023
+ <button id="ngram" onclick="displayngram()">Calc. N-Gram</button>
1024
+ </div>
1025
+ </div>
1026
+ <div id="panel4" style="padding:2px; display: none;">
1027
+ <button id="exportsel" onclick="exportsel()">JSON Export Selected</button>
1028
+ </div>
1029
+ </div>
1030
+
1031
+
1032
+ </div>
1033
+ <div style="display:flex;height:calc(100vh - 150px); font-size:20px; margin-top:2px">
1034
+ <div style="height:100%; width:220px; padding:2px; border-right:1px solid #ccc;">
1035
+ <div><span style="font-size: 13px;"> [ <span id="selcounter">0 Selected</span> / <span id="rescounter">0 Results</span> ] </span></div>
1036
+ <div id="toc" style="height:calc(100% - 60px); padding:4px; overflow:auto; width:100%;">No Items</div>
1037
+ <button id="nextitm" style="width: 100%; padding:2px" onclick="nextitm()">Next Item</button>
1038
+ </div>
1039
+
1040
+ <div id="contents" style="height:100%; overflow:auto; width:calc(100% - 220px); padding:8px;">No File Opened</div>
1041
+ </div>
1042
+ </body>
1043
+
1044
  </html>