events

Running

File size: 8,737 Bytes

ce78ec6
 
b3feaa3
51294aa
b3feaa3
 
 
 
 
 
 
 
 
3287a07
b3feaa3
3287a07
b3feaa3
f0edf49
b3feaa3
3287a07
ce78ec6
 
bdaa284
ce78ec6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3feaa3
ce78ec6
 
 
278e21a
b3feaa3
 
 
3287a07
a072676
b3feaa3
3287a07
 
b3feaa3
3287a07
 
b3feaa3
 
3287a07
 
ce78ec6
b3feaa3
 
90c9489
ff079ce
b3feaa3
 
 
 
 
ff079ce
b3feaa3
ff079ce
 
 
 
 
 
 
 
 
 
 
90c9489
b3feaa3
90c9489
 
 
b3feaa3

import torch
import sys
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import json

tokenizer = AutoTokenizer.from_pretrained('google/gemma-2-2b-it')

# Configure 4-bit quantization using BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
)

# Load the model with the quantization configuration
model = AutoModelForCausalLM.from_pretrained(
    'google/gemma-2-2b-it',
    device_map="auto",
    quantization_config=quantization_config,
)



# Definir el prompt para generar un JSON con eventos anidados
prompt = (
    "Genera un JSON que describa una serie de eventos consecutivos en un formato similar al siguiente:\n\n"
    "{\n"
    "  \"events\": {\n"
    "    \"event\": {\n"
    "      \"event_number\": 1,\n"
    "      \"name\": \"conflict_start\",\n"
    "      \"description\": \"Tensions escalate between Iran and Israel\",\n"
    "      \"probability\": 70,\n"
    "      \"duration_days\": 30,\n"
    "      \"subevents\": {\n"
    "        \"event\": {\n"
    "          \"event_number\": 2,\n"
    "          \"name\": \"diplomatic_failure\",\n"
    "          \"description\": \"Diplomatic negotiations fail\",\n"
    "          \"probability\": 60,\n"
    "          \"duration_days\": 15,\n"
    "          \"subevents\": {\n"
    "            \"event\": {\n"
    "              \"event_number\": 3,\n"
    "              \"name\": \"military_clash\",\n"
    "              \"description\": \"Initial military clash at the border\",\n"
    "              \"probability\": 50,\n"
    "              \"duration_days\": 10,\n"
    "              \"subevents\": {\n"
    "                \"event\": [\n"
    "                  {\n"
    "                    \"event_number\": 4,\n"
    "                    \"name\": \"escalation\",\n"
    "                    \"description\": \"Conflict escalates into full-scale war\",\n"
    "                    \"probability\": 40,\n"
    "                    \"duration_days\": 180,\n"
    "                    \"subevents\": {\n"
    "                      \"event\": [\n"
    "                        {\n"
    "                          \"event_number\": 5,\n"
    "                          \"name\": \"regional_involvement\",\n"
    "                          \"description\": \"Other Middle Eastern countries get involved\",\n"
    "                          \"probability\": 30,\n"
    "                          \"duration_days\": 365,\n"
    "                          \"subevents\": {\n"
    "                            \"event\": [\n"
    "                              {\n"
    "                                \"event_number\": 6,\n"
    "                                \"name\": \"ceasefire\",\n"
    "                                \"description\": \"International powers broker a ceasefire\",\n"
    "                                \"probability\": 20,\n"
    "                                \"duration_days\": 30\n"
    "                              },\n"
    "                              {\n"
    "                                \"event_number\": 7,\n"
    "                                \"name\": \"prolonged_conflict\",\n"
    "                                \"description\": \"Conflict continues for over a year\",\n"
    "                                \"probability\": 50,\n"
    "                                \"duration_days\": 365\n"
    "                              }\n"
    "                            ]\n"
    "                          }\n"
    "                        },\n"
    "                        {\n"
    "                          \"event_number\": 8,\n"
    "                          \"name\": \"international_intervention\",\n"
    "                          \"description\": \"UN or other international organizations intervene\",\n"
    "                          \"probability\": 25,\n"
    "                          \"duration_days\": 60\n"
    "                        }\n"
    "                      ]\n"
    "                    }\n"
    "                  },\n"
    "                  {\n"
    "                    \"event_number\": 9,\n"
    "                    \"name\": \"containment\",\n"
    "                    \"description\": \"Conflict is contained and doesn't escalate\",\n"
    "                    \"probability\": 30,\n"
    "                    \"duration_days\": 90\n"
    "                  }\n"
    "                ]\n"
    "              }\n"
    "            },\n"
    "            \"event\": {\n"
    "              \"event_number\": 10,\n"
    "              \"name\": \"sanctions\",\n"
    "              \"description\": \"Increased sanctions on Iran\",\n"
    "              \"probability\": 70,\n"
    "              \"duration_days\": 180,\n"
    "              \"subevents\": {\n"
    "                \"event\": [\n"
    "                  {\n"
    "                    \"event_number\": 11,\n"
    "                    \"name\": \"iran_retaliates\",\n"
    "                    \"description\": \"Iran retaliates with cyberattacks\",\n"
    "                    \"probability\": 40,\n"
    "                    \"duration_days\": 60\n"
    "                  },\n"
    "                  {\n"
    "                    \"event_number\": 12,\n"
    "                    \"name\": \"israel_response\",\n"
    "                    \"description\": \"Israel responds with targeted airstrikes\",\n"
    "                    \"probability\": 50,\n"
    "                    \"duration_days\": 60\n"
    "                  }\n"
    "                ]\n"
    "              }\n"
    "            }\n"
    "          }\n"
    "        },\n"
    "        \"event\": {\n"
    "          \"event_number\": 13,\n"
    "          \"name\": \"diplomatic_success\",\n"
    "          \"description\": \"Successful diplomatic negotiations\",\n"
    "          \"probability\": 40,\n"
    "          \"duration_days\": 30,\n"
    "          \"subevents\": {\n"
    "            \"event\": [\n"
    "              {\n"
    "                \"event_number\": 14,\n"
    "                \"name\": \"peace_agreement\",\n"
    "                \"description\": \"Iran and Israel sign a peace agreement\",\n"
    "                \"probability\": 20,\n"
    "                \"duration_days\": 60\n"
    "              },\n"
    "              {\n"
    "                \"event_number\": 15,\n"
    "                \"name\": \"temporary_truce\",\n"
    "                \"description\": \"A temporary truce is established\",\n"
    "                \"probability\": 30,\n"
    "                \"duration_days\": 30\n"
    "              }\n"
    "            ]\n"
    "          }\n"
    "        }\n"
    "      }\n"
    "    }\n"
    "  }\n"
    "}\n\n"
    "Ahora, genera un JSON similar con eventos anidados, pero cambia los detalles y números para hacer que sea con el input que viene a continuacion, respondiendo solo el JSON empezando con <json>:"
)


def generate(event):
    combined_input = f"{prompt} {event}"  # Combine prompt and event
    prompt_msg = [{'role': 'user', 'content': combined_input}]

    inputs = tokenizer.apply_chat_template(
        prompt_msg,
        add_generation_prompt=True,
        return_tensors='pt'
    )

    tokens = model.generate(
        inputs.to(model.device),
        max_new_tokens=1024,
        temperature=0.5,
        do_sample=True
    )


    output_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
    print(output_text)
    user_prompt_length = len(f"<bos><start_of_turn>user\n{prompt}\n{event}<end_of_turn>\n<start_of_turn>model\n")

    json_start_index = output_text.find("<json>")
    json_end_index = output_text.find("</json>")

    if json_start_index != -1 and json_end_index != -1:
        json_string = output_text[max(json_start_index + 6, user_prompt_length):json_end_index].strip()

        # Remove any leading/trailing non-JSON characters (if present)
        if not json_string.startswith("{"):
            first_brace_index = json_string.find("{")
            if first_brace_index != -1:
                json_string = json_string[first_brace_index:]

        if not json_string.endswith("}"):
            last_brace_index = json_string.rfind("}")
            if last_brace_index != -1:
                json_string = json_string[:last_brace_index + 1]

        # Load JSON without validation
        try:
            data = json.loads(json_string)
            return data  # Return the parsed JSON data
        except json.JSONDecodeError as e:
            return f"Error: Invalid JSON - {e}"

    else:
        return "Error: <json> or </json> not found in generated output"