concedo commited on
Commit
8703dfc
1 Parent(s): f8d3f0a

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +15 -1080
index.html CHANGED
@@ -1,5 +1,3 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
  <!--
4
  This is a JSON Dataset Explorer (Viewer and Editor) made by Concedo/LostRuins
5
  Please go to https://github.com/LostRuins/DatasetExplorer for updates
@@ -7,1082 +5,19 @@ This software is under the AGPL v3.0 License unless otherwise exempted. Please d
7
  -Concedo
8
  -->
9
 
10
- <head>
11
- <style>
12
- html {
13
- font-family: sans-serif;
14
- -ms-text-size-adjust: 100%;
15
- -webkit-text-size-adjust: 100%;
16
- font-size: 10px;
17
- -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
18
- }
19
-
20
- * {
21
- -webkit-box-sizing: border-box;
22
- -moz-box-sizing: border-box;
23
- box-sizing: border-box
24
- }
25
-
26
- :after,
27
- :before {
28
- -webkit-box-sizing: border-box;
29
- -moz-box-sizing: border-box;
30
- box-sizing: border-box
31
- }
32
-
33
- .cb
34
- {
35
- width:18px;
36
- height:18px;
37
- }
38
-
39
- body {
40
  margin: 0;
41
- font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
42
- font-size: 15px;
43
- line-height: 1.42857143;
44
- color: #333;
45
- background-color: #fff;
46
- }
47
-
48
- button,
49
- input,
50
- select,
51
- textarea {
52
- font-family: inherit;
53
- font-size: inherit;
54
- line-height: inherit;
55
- }
56
-
57
- a {
58
- color: #337ab7;
59
- text-decoration: none;
60
- }
61
-
62
- a:focus,
63
- a:hover {
64
- color: #23527c;
65
- text-decoration: underline;
66
- }
67
-
68
- a:focus {
69
- outline: 5px auto -webkit-focus-ring-color;
70
- outline-offset: -2px;
71
- }
72
-
73
- img {
74
- vertical-align: middle;
75
- }
76
-
77
- .nest1
78
- {
79
- color:rgb(89, 58, 202);
80
- }
81
-
82
- .nest2
83
- {
84
- color:rgb(197, 69, 69);
85
- }
86
-
87
- .unselectable {
88
- -webkit-touch-callout: none !important;
89
- -webkit-user-select: none !important;
90
- -khtml-user-select: none !important;
91
- -moz-user-select: none !important;
92
- -ms-user-select: none !important;
93
- user-select: none !important;
94
- }
95
-
96
- .navbar {
97
- background-color: #333;
98
- overflow: hidden;
99
- position: relative;
100
- top: 0;
101
- width: 100%;
102
- }
103
-
104
- /* Links inside the navbar */
105
- .navbar a {
106
- float: left;
107
- display: block;
108
- color: white;
109
- text-align: center;
110
- padding: 12px 14px;
111
- text-decoration: none;
112
- font-size: 14px;
113
- }
114
-
115
- /* Change color on hover */
116
- .navbar a:hover {
117
- background-color: #ddd;
118
- color: black;
119
- }
120
-
121
- </style>
122
- <title>Concedo JSON Dataset Explorer</title>
123
-
124
- <script>
125
- var db = [];
126
- var lastleftitem = null;
127
- var lastselectedidx = 0;
128
- var currdbidxs = [];
129
- var selecteddb = {};
130
-
131
- function formatHtml(unsafe)
132
- {
133
- if(!unsafe){return "";}
134
- return unsafe.toString()
135
- .replace(/&/g, "&amp;")
136
- .replace(/</g, "&lt;")
137
- .replace(/>/g, "&gt;")
138
- .replace(/"/g, "&quot;")
139
- .replace(/'/g, "&#039;")
140
- .replace(/\n/g, '<br>')
141
- .replace(/\t/g, '&nbsp;&nbsp;&nbsp;&nbsp;');
142
-
143
- }
144
- function escapeRegExp(string) {
145
- return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
146
- }
147
- function isArr(obj) {
148
- // checks for null and undefined
149
- if (obj == null) {
150
- return false;
151
- }
152
- return (obj.constructor === Array)
153
- }
154
- function nextitm()
155
- {
156
- if(lastleftitem!=null)
157
- {
158
- let sib = lastleftitem.nextSibling;
159
- if(sib)
160
- {
161
- clickItem(sib);
162
- }
163
- }
164
- }
165
- function clickItem(titleDiv)
166
- {
167
- let index = titleDiv.getAttribute('data-index');
168
- const contentsDiv = document.getElementById('contents');
169
- lastselectedidx = index;
170
- contentsDiv.innerHTML = renderPage(db[index],true).text;
171
- contentsDiv.scrollTop = 0;
172
- if (lastleftitem) {
173
- lastleftitem.style.backgroundColor = null;
174
- }
175
- lastleftitem = titleDiv;
176
- titleDiv.style.backgroundColor = '#d3d3d3';
177
- }
178
- function countsel()
179
- {
180
- let count = 0;
181
- for (k in selecteddb) if (selecteddb[k]) ++count;
182
- document.getElementById("selcounter").innerText = `${count} Selected`;
183
- }
184
- function selitm(idx)
185
- {
186
- let box = document.getElementById(`itm${idx}`);
187
- if(box)
188
- {
189
- selecteddb[idx] = box.checked;
190
- }
191
- }
192
- function selrange(isSel)
193
- {
194
- let selstart = 0;
195
- let selend = currdbidxs.length;
196
- if(document.getElementById("selectamt").value!="")
197
- {
198
- let sv = document.getElementById("selectamt").value;
199
- if(sv.includes("-"))
200
- {
201
- let svs = sv.split("-");
202
- if(svs.length==2 && svs[0]!="" && svs[1]!="")
203
- {
204
- selstart = Math.min(svs[0],selend);
205
- selend = Math.min(svs[1],selend);
206
- }
207
- }
208
- else
209
- {
210
- selstart = 0;
211
- selend = Math.min(selend,sv);
212
- }
213
- }
214
-
215
- for(let i=selstart;i<selend;++i)
216
- {
217
- let box = document.getElementById(`itm${currdbidxs[i]}`);
218
- if(box)
219
- {
220
- box.checked = isSel;
221
- selitm(currdbidxs[i]);
222
- }
223
- }
224
- countsel();
225
- }
226
-
227
- function invertsel()
228
- {
229
- for(let i=0;i<currdbidxs.length;++i)
230
- {
231
- let box = document.getElementById(`itm${currdbidxs[i]}`);
232
- if(box)
233
- {
234
- box.checked = !box.checked;
235
- selitm(currdbidxs[i]);
236
- }
237
- }
238
- countsel();
239
- }
240
-
241
-
242
- function ngramParser(text, n) {
243
- const words = text.split(/\s+/).filter(word => word.length > 0);
244
- const ngrams = {};
245
- for (let i = 0; i <= words.length - n; i++) {
246
- const ngram = words.slice(i, i + n).join(' ');
247
- if (ngrams[ngram]) {
248
- ngrams[ngram]++;
249
- } else {
250
- ngrams[ngram] = 1;
251
- }
252
- }
253
- const sortedNgrams = Object.entries(ngrams).sort((a, b) => b[1] - a[1]);
254
- return sortedNgrams.map(entry => ({ ng: entry[0], cnt: entry[1] }));
255
- }
256
-
257
- function displayngram()
258
- {
259
- const contentsDiv = document.getElementById('contents');
260
- let fulltxt = "";
261
- let parts = [];
262
- for (k in selecteddb) {
263
- if (selecteddb[k]) {
264
- parts.push(renderPage(db[k],false).text);
265
- }
266
- }
267
- fulltxt = parts.join('\n\n');
268
-
269
- if(fulltxt=="")
270
- {
271
- contentsDiv.innerText = "Error: You must select at least 1 item to calculate N-Grams!";
272
- contentsDiv.scrollTop = 0;
273
- return;
274
- }
275
-
276
- let pageText = "";
277
- for(let i=1;i<=5;++i)
278
- {
279
- let res = ngramParser(fulltxt,i);
280
- let lim = Math.min(res.length,100);
281
- pageText += `[ ${i} Word Sequences ]\n`;
282
- for(let j=0;j<lim;++j)
283
- {
284
- pageText += `${res[j].cnt} - ${res[j].ng}\n`;
285
- }
286
- pageText += `\n\n`;
287
- }
288
- contentsDiv.innerText = pageText;
289
- contentsDiv.scrollTop = 0;
290
- }
291
-
292
- function endEditElem()
293
- {
294
- const contentsDiv = document.getElementById('contents');
295
- if(lastselectedidx >= db.length)
296
- {
297
- return;
298
- }
299
- let ebox = document.getElementById('editbox');
300
- if(ebox && ebox.value)
301
- {
302
- try {
303
- var newitem = JSON.parse(ebox.value);
304
- db[lastselectedidx] = newitem;
305
- contentsDiv.innerHTML = renderPage(db[lastselectedidx],true).text;
306
- contentsDiv.scrollTop = 0;
307
- } catch (e) {
308
- console.log("Error decoding text: " + e);
309
- }
310
- }
311
- }
312
-
313
- function editElem()
314
- {
315
- if(lastselectedidx >= db.length)
316
- {
317
- return;
318
- }
319
- let itm = db[lastselectedidx];
320
- let itmstr = JSON.stringify(itm,null,2);
321
- const contentsDiv = document.getElementById('contents');
322
- contentsDiv.innerHTML = `<button class="unselectable" onclick="return endEditElem()" style="float:right;">Save Edits</button><textarea style="line-height:1.1;width:100%;height:calc(100% - 54px);resize: none;" id="editbox" placeholder="" rows="15"></textarea>`;
323
- document.getElementById('editbox').value = itmstr;
324
- contentsDiv.scrollTop = 0;
325
- }
326
-
327
- function renderPage(item, useHtml) {
328
- let tot = "";
329
- let nodecount = 0;
330
- let wordcount = 0;
331
- if(useHtml)
332
- {
333
- tot += `<button class="unselectable" onclick="return editElem()" style="float:right;">Edit Item</button>`;
334
- }
335
- for (let key in item) {
336
- if (item.hasOwnProperty(key)) {
337
- let val = item[key];
338
- let valmod = false;
339
- if (isArr(val)) {
340
- let v2 = "";
341
- for (let key2 in val) {
342
- let arritem = val[key2];
343
- if (arritem.constructor == Object) { //dict test
344
- for (let key3 in arritem) {
345
- if (arritem.hasOwnProperty(key3)) {
346
- let val3 = arritem[key3];
347
- nodecount += 1;
348
- if (useHtml) {
349
- v2 += `<span class='nest2'>[[${formatHtml(key3)}]]</span><br>${formatHtml(val3)}<br><br>`;
350
- } else {
351
- v2 += `\n\n${val3}`;
352
- }
353
- }
354
- }
355
- }
356
- else if(arritem.constructor == String)
357
- {
358
- if (useHtml) {
359
- v2 += `<br>${formatHtml(arritem)}`;
360
- } else {
361
- v2 += `\n${arritem}`;
362
- }
363
- }
364
- }
365
- valmod = true;
366
- val = v2;
367
- }
368
- else if (val && val.constructor == Object) {
369
- let v2 = "";
370
- for (let key3 in val) {
371
- if (val.hasOwnProperty(key3)) {
372
- let val3 = val[key3];
373
- nodecount += 1;
374
- if (useHtml) {
375
- v2 += `<span class='nest2'>[[${formatHtml(key3)}]]</span><br>${formatHtml(val3)}<br><br>`;
376
- } else {
377
- v2 += `\n\n${val3}`;
378
- }
379
- }
380
- }
381
- valmod = true;
382
- val = v2;
383
- }
384
- nodecount += 1;
385
- if (useHtml) {
386
- tot += `<span class='nest1'>[${formatHtml(key)}]</span><br>${valmod ? val : formatHtml(val)}<br><br>`;
387
- } else {
388
- tot += `\n\n${val}`;
389
- }
390
-
391
- }
392
- }
393
-
394
- if(document.getElementById('minwords').value != "" || document.getElementById('maxwords').value != "" || document.getElementById('showdetail').value=="words")
395
- {
396
- wordcount = countWords(tot);
397
- }
398
- return {"text":tot,"nodes":nodecount,"words":wordcount};
399
- }
400
-
401
- function clearInputs()
402
- {
403
- document.getElementById('minlen').value = "";
404
- document.getElementById('maxlen').value = "";
405
- document.getElementById('minnodes').value = "";
406
- document.getElementById('maxnodes').value = "";
407
- document.getElementById('minwords').value = "";
408
- document.getElementById('maxwords').value = "";
409
- document.getElementById('minmatches').value = "";
410
- document.getElementById('maxmatches').value = "";
411
- document.getElementById('search').value = "";
412
- document.getElementById('casesensitive').checked = false;
413
- document.getElementById('useregex').checked = false;
414
- document.getElementById('selectamt').value = "";
415
- }
416
-
417
- function displayTitles() {
418
- const tocDiv = document.getElementById('toc');
419
- const contentsDiv = document.getElementById('contents');
420
- const minlen = document.getElementById('minlen').value;
421
- const maxlen = document.getElementById('maxlen').value;
422
- const minnodes = document.getElementById('minnodes').value;
423
- const maxnodes = document.getElementById('maxnodes').value;
424
- const minwords = document.getElementById('minwords').value;
425
- const maxwords = document.getElementById('maxwords').value;
426
- const minmatches = document.getElementById('minmatches').value;
427
- const maxmatches = document.getElementById('maxmatches').value;
428
- let search = document.getElementById('search').value;
429
-
430
- contentsDiv.innerText = "";
431
-
432
- tocDiv.innerText = ''; // Clear previous titles
433
- let ascindex = 0;
434
- currdbidxs = [];
435
-
436
- if (search != "") {
437
- if (!document.getElementById('useregex').checked) {
438
- search = escapeRegExp(search);
439
- }
440
- }
441
-
442
- db.forEach((item, index) => {
443
- const titleDiv = document.createElement('div');
444
-
445
- let rendered = renderPage(item,false);
446
- let len = rendered.text.length;
447
- let matchescount = 0;
448
-
449
- if (minlen != "") {
450
- if (len < minlen) {
451
- return;
452
- }
453
- }
454
- if (maxlen != "") {
455
- if (len > maxlen) {
456
- return;
457
- }
458
- }
459
- if (minnodes != "") {
460
- if (rendered.nodes < minnodes) {
461
- return;
462
- }
463
- }
464
- if (maxnodes != "") {
465
- if (rendered.nodes > maxnodes) {
466
- return;
467
- }
468
- }
469
- if (minwords != "") {
470
- if (rendered.words < minwords) {
471
- return;
472
- }
473
- }
474
- if (maxwords != "") {
475
- if (rendered.words > maxwords) {
476
- return;
477
- }
478
- }
479
- if (search != "") {
480
- var exp = new RegExp(search, `g${document.getElementById('casesensitive').checked?"":"i"}`);
481
- let m = rendered.text.match(exp);
482
- if(!m)
483
- {
484
- return;
485
- }
486
- matchescount = m.length;
487
- }
488
- if (search != "" && minmatches != "") {
489
- if (matchescount < minmatches) {
490
- return;
491
- }
492
- }
493
- if (search != "" && maxmatches != "") {
494
- if (matchescount > maxmatches) {
495
- return;
496
- }
497
- }
498
- let shownvalue = "";
499
- switch(document.getElementById('showdetail').value)
500
- {
501
- case "chars":
502
- shownvalue = len;
503
- break;
504
- case "words":
505
- shownvalue = rendered.words;
506
- break;
507
- case "turns":
508
- shownvalue = rendered.nodes;
509
- break;
510
- case "matches":
511
- shownvalue = matchescount;
512
- break;
513
- }
514
- titleDiv.innerHTML = `<input class="cb" type="checkbox" id="itm${index}" onclick="selitm(${index});countsel();" ${selecteddb[index]?"checked":""}> ${index} - ${shownvalue}`;
515
- titleDiv.style.cursor = 'pointer';
516
- titleDiv.setAttribute('data-value',shownvalue);
517
- titleDiv.setAttribute('data-index',index);
518
- titleDiv.addEventListener('click', function () {
519
- clickItem(titleDiv);
520
- });
521
- tocDiv.appendChild(titleDiv);
522
- currdbidxs.push(index);
523
- ascindex += 1;
524
- });
525
-
526
- let sorter = document.getElementById('sortorder').value;
527
- if(sorter=="1" || sorter=="2")
528
- {
529
- let asc = (sorter=="2");
530
- const childDivs = Array.from(tocDiv.children);
531
- if(asc)
532
- {
533
- childDivs.sort((a, b) => {
534
- return parseInt(a.getAttribute('data-value')) - parseInt(b.getAttribute('data-value'));
535
- });
536
- } else {
537
- childDivs.sort((b, a) => {
538
- return parseInt(a.getAttribute('data-value')) - parseInt(b.getAttribute('data-value'));
539
- });
540
- }
541
- currdbidxs = [];
542
- childDivs.forEach(child => {
543
- tocDiv.appendChild(child);
544
- currdbidxs.push(child.getAttribute('data-index'));
545
- });
546
- }
547
-
548
- document.getElementById('rescounter').innerText = `${ascindex} Results`;
549
-
550
- }
551
-
552
- var tempfileurl = null;
553
- var tempfileobj = [];
554
- function erasesel()
555
- {
556
- const contentsDiv = document.getElementById('contents');
557
- tempfileobj = [];
558
- for(let i=0;i<currdbidxs.length;++i)
559
- {
560
- let ci = currdbidxs[i];
561
- let box = document.getElementById(`itm${ci}`);
562
- if(box && box.checked)
563
- {
564
- tempfileobj.push(db[ci]);
565
- }
566
- }
567
-
568
- if(tempfileobj.length==0)
569
- {
570
- contentsDiv.innerText = "Error: You must select at least 1 item to erase!";
571
- contentsDiv.scrollTop = 0;
572
- return;
573
- }
574
-
575
- db = db.filter( ( el ) => !tempfileobj.includes( el ) );
576
- selecteddb = {};
577
- countsel();
578
- displayTitles();
579
- }
580
- function prunesel()
581
- {
582
- const contentsDiv = document.getElementById('contents');
583
- tempfileobj = [];
584
- for(let i=0;i<currdbidxs.length;++i)
585
- {
586
- let ci = currdbidxs[i];
587
- let box = document.getElementById(`itm${ci}`);
588
- if(box && box.checked)
589
- {
590
- tempfileobj.push(db[ci]);
591
- }
592
- }
593
-
594
- if(tempfileobj.length==0)
595
- {
596
- contentsDiv.innerText = "Error: You must select at least 1 item to keep!";
597
- contentsDiv.scrollTop = 0;
598
- return;
599
- }
600
-
601
- db = tempfileobj;
602
- selecteddb = {};
603
- countsel();
604
- displayTitles();
605
- }
606
-
607
- function shuffle(array) {
608
- for (let i = array.length - 1; i > 0; i--) {
609
- // Generate a random index
610
- const j = Math.floor(Math.random() * (i + 1));
611
- // Swap elements i and j
612
- [array[i], array[j]] = [array[j], array[i]];
613
- }
614
- return array;
615
- }
616
-
617
- function shufflesel()
618
- {
619
- const contentsDiv = document.getElementById('contents');
620
- tempfileobj = [];
621
- for(let i=0;i<currdbidxs.length;++i)
622
- {
623
- let ci = currdbidxs[i];
624
- let box = document.getElementById(`itm${ci}`);
625
- if(box && box.checked)
626
- {
627
- tempfileobj.push(db[ci]);
628
- }
629
- }
630
-
631
- if(tempfileobj.length==0)
632
- {
633
- contentsDiv.innerText = "Error: You must select at least 1 item to erase!";
634
- contentsDiv.scrollTop = 0;
635
- return;
636
- }
637
-
638
- db = db.filter( ( el ) => !tempfileobj.includes( el ) );
639
- shuffle(tempfileobj);
640
- for(let i=0;i<tempfileobj.length;++i)
641
- {
642
- db.push(tempfileobj[i]);
643
- }
644
- selecteddb = {};
645
- countsel();
646
- displayTitles();
647
- }
648
-
649
- function exportsel()
650
- {
651
- const contentsDiv = document.getElementById('contents');
652
- tempfileobj = [];
653
- for(let i=0;i<currdbidxs.length;++i)
654
- {
655
- let ci = currdbidxs[i];
656
- let box = document.getElementById(`itm${ci}`);
657
- if(box && box.checked)
658
- {
659
- tempfileobj.push(db[ci]);
660
- }
661
- }
662
-
663
- if(tempfileobj.length==0)
664
- {
665
- contentsDiv.innerText = "Error: Please select at least 1 sample to export!";
666
- return;
667
- }
668
-
669
- var a = document.getElementById("tempfile");
670
- var file = null;
671
- try {
672
- file = new Blob([JSON.stringify(tempfileobj,null,2)], { type: 'application/json' });
673
- if (tempfileurl) {
674
- window.URL.revokeObjectURL(tempfileurl);
675
- }
676
- tempfileurl = window.URL.createObjectURL(file);
677
- a.href = tempfileurl;
678
- a.target = '_blank';
679
- a.download = "selected_dataset.json";
680
- setTimeout(function(){a.click()},20);
681
- } catch (e) {
682
- contentsDiv.innerText = "File could not be saved. It might be too large.\nTry splitting into multiple files instead.\n\n"+e;
683
- console.log("Error saving: " + e);
684
- }
685
- }
686
-
687
- let lastfile = null;
688
- function loadfile()
689
- {
690
- if(!lastfile)
691
- {
692
- return;
693
- }
694
- const replace_existing = (document.getElementById('appendfile').value=="0");
695
- const merge_existing = (document.getElementById('appendfile').value=="1");
696
- const subtract_existing = (document.getElementById('appendfile').value=="2");
697
- const contentsDiv = document.getElementById('contents');
698
- const dedup = (document.getElementById('dedup').value=="1");
699
- let newdb = [];
700
- var filesizemb = lastfile.size/(1024*1024);
701
-
702
- function postLoad()
703
- {
704
- if(replace_existing)
705
- {
706
- db = newdb;
707
- }else if(merge_existing){
708
- for(let s in newdb)
709
- {
710
- db.push(newdb[s]);
711
- }
712
- }
713
- else
714
- {
715
- const set = new Set(db.map(item => JSON.stringify(item)));
716
- for(let s in newdb)
717
- {
718
- let newitm = JSON.stringify(newdb[s]);
719
- if(set.has(newitm))
720
- {
721
- set.delete(newitm);
722
- }
723
- }
724
- db = Array.from(set).map(item => JSON.parse(item));
725
- }
726
- if(dedup && !subtract_existing)
727
- {
728
- const set = new Set(db.map(item => JSON.stringify(item)));
729
- db = Array.from(set).map(item => JSON.parse(item));
730
- }
731
- selecteddb = {};
732
- countsel();
733
- displayTitles();
734
- contentsDiv.innerText = "Dataset File Loaded\nSelect an item to browse contents.";
735
- }
736
-
737
- async function streamJsonFile(file) {
738
- const CHUNK_SIZE = 32 * 1024 * 1024; // 32 MB chunks
739
- const decoder = new TextDecoder('utf-8');
740
- const reader = file.stream().getReader();
741
- const elemrange = (document.getElementById('elemrange').value);
742
- let elemstart = 0;
743
- let elemend = 999999999;
744
- let elemcounter = 0;
745
- if(elemrange!="")
746
- {
747
- if(elemrange.includes("-"))
748
- {
749
- let svs = elemrange.split("-");
750
- if(svs.length==2 && svs[0]!="" && svs[1]!="")
751
- {
752
- elemstart = Math.min(svs[0],elemend);
753
- elemend = Math.min(svs[1],elemend);
754
- }
755
- }
756
- else
757
- {
758
- elemstart = 0;
759
- elemend = Math.min(elemend,elemrange);
760
- }
761
- }
762
-
763
- let buffer = '';
764
- let accusize = 0;
765
- let accu = [];
766
- newdb = [];
767
- let lastloop = false;
768
- let errs = null;
769
-
770
- while (true) {
771
- const { done, value } = await reader.read();
772
-
773
- // Decode the chunk to a string
774
- const chunk = decoder.decode(value, { stream: true });
775
- accu.push(chunk);
776
- if (done) {
777
- lastloop = true;
778
- }
779
- accusize += chunk.length;
780
-
781
- if(accusize<CHUNK_SIZE && !lastloop)
782
- {
783
- continue;
784
- }
785
-
786
- buffer += accu.join("");
787
- accu = [];
788
- accusize = 0;
789
-
790
- // Process each character in the buffer
791
- let objstart = 0;
792
- let objend = 0;
793
- let bl = buffer.length;
794
- let inObject = false;
795
- let inString = false;
796
- let escapeNextChar = false;
797
- let curlyBraceCount = 0;
798
- for (let i = 0; i < bl; i++) {
799
- const char = buffer[i];
800
-
801
- if (char === '\\' && inString) {
802
- escapeNextChar = !escapeNextChar;
803
- continue;
804
- }
805
- if (char === '"' && !escapeNextChar) {
806
- inString = !inString;
807
- }
808
- escapeNextChar = false;
809
- if (inString) {
810
- continue;
811
- }
812
-
813
- if (char === '{') {
814
- if (!inObject) {
815
- inObject = true;
816
- objstart = i;
817
- }
818
- curlyBraceCount++;
819
- } else if (char === '}') {
820
- curlyBraceCount--;
821
- }
822
-
823
- // When curlyBraceCount returns to 0, we have a complete object
824
- if (inObject && curlyBraceCount === 0) {
825
- let jsonstr = buffer.slice(objstart, i + 1);
826
- objend = i;
827
- inObject = false;
828
- try{
829
- const jsonObject = JSON.parse(jsonstr);
830
- if(elemcounter>=elemstart && elemcounter<elemend)
831
- {
832
- newdb.push(jsonObject);
833
- }
834
- ++elemcounter;
835
- }catch(ex)
836
- {
837
- errs = ex;
838
- console.log(ex);
839
- }
840
- }
841
- }
842
- if(objstart>0 && objend>0)
843
- {
844
- inObject = false;
845
- buffer = buffer.slice(objend+1); // Remove the processed object from the buffer
846
- if(newdb.length>=(elemend-elemstart))
847
- {
848
- errs = `Loading stopped at ${newdb.length} items. Specified range exceeded.`
849
- lastloop = true;
850
- }
851
- }
852
- if(lastloop)
853
- {
854
- break;
855
- }
856
- }
857
-
858
- if(newdb.length>0)
859
- {
860
- postLoad();
861
- if(errs)
862
- {
863
- contentsDiv.innerText = `Dataset partially loaded.\nInterruption occurred while loading dataset file: ${lastfile.name}\n${errs}`;
864
- }
865
- }else{
866
- contentsDiv.innerText = `Error: Could not load dataset file: ${lastfile.name}`;
867
- }
868
-
869
- }
870
-
871
- const reader = new FileReader();
872
- reader.onload = function (e) {
873
- let loadok = false;
874
- try
875
- {
876
- //try normal json
877
- newdb = JSON.parse(e.target.result);
878
- loadok = true;
879
- }
880
- catch(err)
881
- {
882
- console.log(err);
883
- //try jsonl
884
- try
885
- {
886
- const content = e.target.result;
887
- if(content)
888
- {
889
- const lines = content.split('\n');
890
- newdb = lines.filter(line => line.trim()).map(line => JSON.parse(line));
891
- loadok = true;
892
- }
893
- }
894
- catch(err2)
895
- {
896
- console.log(err2);
897
- }
898
- }
899
-
900
- if(loadok)
901
- {
902
- const elemrange = (document.getElementById('elemrange').value);
903
- let elemstart = 0;
904
- let elemend = 999999999;
905
- let elemcounter = 0;
906
- if(elemrange!="")
907
- {
908
- if(elemrange.includes("-"))
909
- {
910
- let svs = elemrange.split("-");
911
- if(svs.length==2 && svs[0]!="" && svs[1]!="")
912
- {
913
- elemstart = Math.min(svs[0],elemend);
914
- elemend = Math.min(svs[1],elemend);
915
- }
916
- }
917
- else
918
- {
919
- elemstart = 0;
920
- elemend = Math.min(elemend,elemrange);
921
- }
922
- }
923
- newdb = newdb.slice(elemstart, elemend);
924
- postLoad();
925
- }
926
- else
927
- {
928
- if(filesizemb > 0.01 && (lastfile.type === "application/json" || lastfile.name.endsWith(".jsonl")))
929
- {
930
- //try big json
931
- streamJsonFile(lastfile);
932
- contentsDiv.innerText = `Attempting Fallback Stream-Load of Massive File:\n${lastfile.name} (${filesizemb.toFixed(0)} mb).\nPlease wait, this may take a long time...`;
933
- }else{
934
- contentsDiv.innerText = `Error: Could not load dataset file: ${lastfile.name}`;
935
- }
936
-
937
- contentsDiv.scrollTop = 0;
938
- }
939
- };
940
-
941
- if (filesizemb > 999) //files larger than 1gb just use streamload
942
- {
943
- //try big json
944
- streamJsonFile(lastfile);
945
- contentsDiv.innerText = `Attempting Stream-Load of Massive File:\n${lastfile.name} (${filesizemb.toFixed(0)} mb)\nPlease wait, this may take a long time...`;
946
- } else {
947
- reader.readAsText(lastfile);
948
- }
949
-
950
- }
951
- function clearfile()
952
- {
953
- lastfile = null;
954
- document.getElementById('pickfile').value = "";
955
- }
956
- function countWords(str) {
957
- if (str == "") { return 0; }
958
- const wordPattern = /[a-zA-Z0-9_]+/g;
959
- const words = str.match(wordPattern);
960
- if (!words) {
961
- return 0;
962
- }
963
- return words.length;
964
- }
965
- function countOccurances(str,search)
966
- {
967
- var exp = new RegExp(search, `g${document.getElementById('casesensitive').checked?"":"i"}`);
968
- var count = (temp.match(exp) || []).length;
969
- return count;
970
- }
971
-
972
- function togglepanel(id)
973
- {
974
- const p1 = document.getElementById('panel1');
975
- const p2 = document.getElementById('panel2');
976
- const p3 = document.getElementById('panel3');
977
- const p4 = document.getElementById('panel4');
978
- p1.style.display = p2.style.display = p3.style.display = p4.style.display = "none";
979
- document.getElementById(`panel${id}`).style.display = "block";
980
- }
981
-
982
- document.addEventListener("DOMContentLoaded", function () {
983
- const fileInput = document.getElementById('pickfile');
984
-
985
- fileInput.addEventListener('change', function (event) {
986
- lastfile = event.target.files[0];
987
- loadfile();
988
- });
989
- });
990
-
991
- </script>
992
- </head>
993
-
994
- <body>
995
- <div class="navbar">
996
- <a href="#" onclick="togglepanel(1)">Importing</a>
997
- <a href="#" onclick="togglepanel(2)">Filtering</a>
998
- <a href="#" onclick="togglepanel(3)">Selecting</a>
999
- <a href="#" onclick="togglepanel(4)">Exporting</a>
1000
- <a style="float: right;" href="https://github.com/LostRuins/DatasetExplorer">Concedo JSON Dataset Explorer</a>
1001
- </div>
1002
-
1003
- <div style="display:flex;">
1004
- <div style="padding:4px">
1005
- <div id="panel1" style="padding:2px; display: block;">
1006
- <input onclick="clearfile()" style="width: 220px;" id="pickfile" name="file" type="file" accept=".json,.jsonl">
1007
- <a id="tempfile" href="#" style="display:none;"></a>
1008
- <select id="appendfile">
1009
- <option value="0">Replace Existing</option>
1010
- <option value="1">Merge with Existing</option>
1011
- <option value="2">Subtract from Existing</option>
1012
- </select>
1013
- <select id="dedup">
1014
- <option value="0">Allow Duplicates</option>
1015
- <option value="1">Remove Duplicates</option>
1016
- </select>
1017
- LoadRange:
1018
- <input type="text" style="width:84px" placeholder="(Range: All)" value="" id="elemrange">
1019
- </div>
1020
- <div id="panel2" style="display: none;">
1021
- <div style="padding:1px">
1022
- Chars: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minlen">
1023
- <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxlen">
1024
- Turns: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minnodes">
1025
- <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxnodes">
1026
- Words: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minwords">
1027
- <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxwords">
1028
- Matches: <input type="text" style="width:50px" placeholder="(Min)" value="" id="minmatches">
1029
- <input type="text" style="width:50px" placeholder="(Max)" value="" id="maxmatches">
1030
- </div>
1031
- <div style="padding:2px">
1032
- <input type="text" style="width:200px" placeholder="(Search)" value="" id="search">
1033
- <input type="checkbox" id="useregex">
1034
- <label for="useregex"> Uses Regex</label>
1035
- <input type="checkbox" id="casesensitive">
1036
- <label for="casesensitive"> Case Sensitive</label>
1037
- </div>
1038
- <div style="font-size: 15px;">
1039
- <select id="showdetail">
1040
- <option value="chars">Count Characters</option>
1041
- <option value="words">Count Words</option>
1042
- <option value="turns">Count Turns</option>
1043
- <option value="matches">Count Matches</option>
1044
- </select>
1045
- <select id="sortorder" >
1046
- <option value="0">No Sort</option>
1047
- <option value="1">Sort Descending</option>
1048
- <option value="2">Sort Ascending</option>
1049
- </select>
1050
- <span style="font-size: 15px;">
1051
- <button id="filter" onclick="displayTitles()">Filter</button>
1052
- <button onclick="clearInputs()">Clear Filters</button>
1053
- </span>
1054
- </div>
1055
-
1056
- </div>
1057
- <div id="panel3" style="padding:2px; display: none;">
1058
- <div>
1059
- <input type="text" style="width:84px" placeholder="(Range: All)" value="" id="selectamt">
1060
- <button id="selectall" onclick="selrange(true)">Select Range</button>
1061
- <button id="deselectall" onclick="selrange(false)">Deselect Range</button>
1062
- <button id="invertsel" onclick="invertsel()">Invert Selection</button>
1063
- </div><div style="padding: 2px; margin-top: 6px;">
1064
- <button onclick="erasesel()">Erase Selected</button>
1065
- <button onclick="prunesel()">Prune To Selected</button>
1066
- <button onclick="shufflesel()">Shuffle Selected</button>
1067
- <button id="ngram" onclick="displayngram()">Calc. N-Gram</button>
1068
- </div>
1069
- </div>
1070
- <div id="panel4" style="padding:2px; display: none;">
1071
- <button id="exportsel" onclick="exportsel()">JSON Export Selected</button>
1072
- </div>
1073
- </div>
1074
-
1075
-
1076
- </div>
1077
- <div style="display:flex;height:calc(100vh - 150px); font-size:20px; margin-top:2px">
1078
- <div style="height:100%; width:220px; padding:2px; border-right:1px solid #ccc;">
1079
- <div><span style="font-size: 13px;"> [ <span id="selcounter">0 Selected</span> / <span id="rescounter">0 Results</span> ] </span></div>
1080
- <div id="toc" style="height:calc(100% - 60px); padding:4px; overflow:auto; width:100%;">No Items</div>
1081
- <button id="nextitm" style="width: 100%; padding:2px" onclick="nextitm()">Next Item</button>
1082
- </div>
1083
-
1084
- <div id="contents" style="height:100%; overflow:auto; width:calc(100% - 220px); padding:8px;">No File Opened</div>
1085
- </div>
1086
- </body>
1087
-
1088
- </html>
 
 
 
1
  <!--
2
  This is a JSON Dataset Explorer (Viewer and Editor) made by Concedo/LostRuins
3
  Please go to https://github.com/LostRuins/DatasetExplorer for updates
 
5
  -Concedo
6
  -->
7
 
8
+ <iframe
9
+ src="https://lostruins.github.io/datasetexplorer/"
10
+ style="
11
+ position: fixed;
12
+ top: 0px;
13
+ bottom: 0px;
14
+ right: 0px;
15
+ width: 100%;
16
+ border: none;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  margin: 0;
18
+ padding: 0;
19
+ overflow: hidden;
20
+ z-index: 999999;
21
+ height: 100%;
22
+ ">
23
+ </iframe>