gregH's picture
Update assets/js/calibration.js
af2b329 verified
raw
history blame
7.1 kB
$(document).ready(function(){
$('#jailbreak-intro').on('click', 'span', function(e) {
e.preventDefault();
if (!$(this).hasClass('selected')) {
$('#jailbreak-intro-label > span').removeClass('selected');
$(this).addClass('selected');
let cur_img = $(this).text();
var diagram = document.getElementById("jailbreak-intro-img");
diagram.src = "images/metrics/intro-"+cur_img+".png";
}
});
$('#refusal-loss-formula-list').on('click', 'a', function(e) {
e.preventDefault();
if (!$(this).hasClass('selected')) {
$('.formula').hide(200);
$('.formula-list > a').removeClass('selected');
$(this).addClass('selected');
var target = $(this).attr('href');
$(target).show(200);
}
});
var diagram = document.getElementById("reliability-diagram"), jailbreak_results;
$.getJSON("demo_results/jailbreak_defense_results.jsonl", function(data) {jailbreak_results = data;});
$('input[type=radio][name=models]').change(function() {diagram.src = updateFigurePath();});
$('input[type=radio][name=defense]').change(function() {
if(this.value == 'ppl') {
$("#ppl-slider").slider('enable');
$("#gradient-norm-slider").slider('disable');
}else if (this.value== 'gradient_cuff'){
$("#ppl-slider").slider('disable');
$("#gradient-norm-slider").slider('enable');
}else{
$("#ppl-slider").slider('disable');
$("#gradient-norm-slider").slider('disable');
}
diagram.src = updateFigurePath();
});
var bin_handle = $("#gradient-norm-threshold");
$( "#gradient-norm-slider" ).slider({
step: 50,
min: 0,
max: 800,
value: 100,
create: function() {bin_handle.text($(this).slider("value"));},
slide: function(event, ui) {bin_handle.text(ui.value);diagram.src = updateFigurePath(ui.value);},
});
var ts_handle = $("#ppl-threshold");
$("#ppl-slider").slider({
step: 20,
min: 0,
max: 200,
value: 100,
create: function() {ts_handle.text($(this).slider("value"));},
slide: function( event, ui ) {ts_handle.text(ui.value);diagram.src = updateFigurePath($('#gradient-norm-slider').slider("option", "value"),ui.value);},
});
function updateFigurePath(gradient_norm_threshold=$('#gradient-norm-slider').slider("option", "value"),ppl_threshold=$('#ppl-slider').slider("option", "value")) {
var cur_model = $("input[type='radio'][name='models']:checked").val(),
cur_defense = $("input[type='radio'][name='defense']:checked").val();
var asr_board = document.getElementById("asr-value");
var brr_board = document.getElementById("brr-value");
if (cur_defense === "ppl")
{
var asr_gcg=jailbreak_results[cur_defense][cur_model]["adv_harmful_behavior"][ppl_threshold.toString()]["tpr"];
var asr_autodan=jailbreak_results[cur_defense][cur_model]["autodan"][ppl_threshold.toString()]["tpr"];
var asr_pair=jailbreak_results[cur_defense][cur_model]["pair"][ppl_threshold.toString()]["tpr"];
var asr_tap=jailbreak_results[cur_defense][cur_model]["tap"][ppl_threshold.toString()]["tpr"];
var asr_base64=jailbreak_results[cur_defense][cur_model]["base64"][ppl_threshold.toString()]["tpr"];
var asr_lrl_de=jailbreak_results[cur_defense][cur_model]["lrl_de"][ppl_threshold.toString()]["tpr"];
var asr_lrl_fr=jailbreak_results[cur_defense][cur_model]["lrl_fr"][ppl_threshold.toString()]["tpr"];
var asr_lrl_zh_CN=jailbreak_results[cur_defense][cur_model]["lrl_zh-CN"][ppl_threshold.toString()]["tpr"];
var asr_lrl_sv=jailbreak_results[cur_defense][cur_model]["lrl_sv"][ppl_threshold.toString()]["tpr"];
var asr=(asr_gcg+asr_autodan+asr_pair+asr_tap+asr_base64+(asr_lrl_de+asr_lrl_fr+asr_lrl_zh_CN+asr_lrl_sv)/4)/6
asr_board.innerText = parseFloat(asr).toFixed(3);
brr_board.innerText = parseFloat(jailbreak_results[cur_defense][cur_model]["pair"][ppl_threshold.toString()]["fpr"]).toFixed(3);
return "demo_results/ppl_"+cur_model+"_threshold_"+ppl_threshold.toString()+".png"
}
else if (cur_defense === "gradient_cuff")
{
var asr_gcg=jailbreak_results[cur_defense][cur_model]["adv_harmful_behavior"][gradient_norm_threshold.toString()]["tpr"];
var asr_autodan=jailbreak_results[cur_defense][cur_model]["autodan"][gradient_norm_threshold.toString()]["tpr"];
var asr_pair=jailbreak_results[cur_defense][cur_model]["pair"][gradient_norm_threshold.toString()]["tpr"];
var asr_tap=jailbreak_results[cur_defense][cur_model]["tap"][gradient_norm_threshold.toString()]["tpr"];
var asr_base64=jailbreak_results[cur_defense][cur_model]["base64"][gradient_norm_threshold.toString()]["tpr"];
var asr_lrl_de=jailbreak_results[cur_defense][cur_model]["lrl_de"][gradient_norm_threshold.toString()]["tpr"];
var asr_lrl_fr=jailbreak_results[cur_defense][cur_model]["lrl_fr"][gradient_norm_threshold.toString()]["tpr"];
var asr_lrl_zh_CN=jailbreak_results[cur_defense][cur_model]["lrl_zh-CN"][gradient_norm_threshold.toString()]["tpr"];
var asr_lrl_sv=jailbreak_results[cur_defense][cur_model]["lrl_sv"][gradient_norm_threshold.toString()]["tpr"];
var asr=(asr_gcg+asr_autodan+asr_pair+asr_tap+asr_base64+(asr_lrl_de+asr_lrl_fr+asr_lrl_zh_CN+asr_lrl_sv)/4)/6
asr_board.innerText = parseFloat(asr).toFixed(3);
brr_board.innerText = parseFloat(jailbreak_results[cur_defense][cur_model]["pair"][gradient_norm_threshold.toString()]["fpr"]).toFixed(3);
return "demo_results/gradient_cuff_"+cur_model+"_threshold_"+gradient_norm_threshold.toString()+".png"
}else{
var asr_gcg=jailbreak_results[cur_defense][cur_model]["adv_harmful_behavior"]["tpr"];
var asr_autodan=jailbreak_results[cur_defense][cur_model]["autodan"]["tpr"];
var asr_pair=jailbreak_results[cur_defense][cur_model]["pair"]["tpr"];
var asr_tap=jailbreak_results[cur_defense][cur_model]["tap"]["tpr"];
var asr_base64=jailbreak_results[cur_defense][cur_model]["base64"]["tpr"];
var asr_lrl_de=jailbreak_results[cur_defense][cur_model]["lrl_de"]["tpr"];
var asr_lrl_fr=jailbreak_results[cur_defense][cur_model]["lrl_fr"]["tpr"];
var asr_lrl_zh_CN=jailbreak_results[cur_defense][cur_model]["lrl_zh-CN"]["tpr"];
var asr_lrl_sv=jailbreak_results[cur_defense][cur_model]["lrl_sv"]["tpr"];
var asr=(asr_gcg+asr_autodan+asr_pair+asr_tap+asr_base64+(asr_lrl_de+asr_lrl_fr+asr_lrl_zh_CN+asr_lrl_sv)/4)/6
asr_board.innerText = parseFloat(asr).toFixed(3);
brr_board.innerText = parseFloat(jailbreak_results[cur_defense][cur_model]["pair"]["fpr"]).toFixed(3);
return "demo_results/"+cur_defense+"_"+cur_model+".png"
}
}
});