import torch from transformers import AutoProcessor, AutoModelForPreTraining def response(image2, image): processor = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224") model = AutoModelForPreTraining.from_pretrained("google/paligemma-3b-pt-224") # Instruct the model to create a caption in Spanish model_inputs = processor(text="check whether both molecules have the same chemical structure. if yes, output correct and if not, output incorrect", images= [image, image2], return_tensors="pt") input_len = model_inputs["input_ids"].shape[-1] with torch.inference_mode(): generation = model.generate(**model_inputs, max_new_tokens=100, do_sample=False) generation = generation[0][input_len:] decoded = processor.decode(generation, skip_special_tokens=True) return decoded