import gradio as gr from datasets import load_dataset imdb = load_dataset("imdb") from transformers import AutoTokenizer, pipeline tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") def preprocess_function(examples): return tokenizer(examples["text"], truncation=True) tokenized_imdb = imdb.map(preprocess_function, batched=True) from transformers import DataCollatorWithPadding data_collator = DataCollatorWithPadding(tokenizer=tokenizer) from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2) training_args = TrainingArguments( output_dir="./results", learning_rate=2e-5, per_device_train_batch_size=16, per_device_eval_batch_size=16, num_train_epochs=0.01, weight_decay=0.01, ) trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_imdb["train"], eval_dataset=tokenized_imdb["test"], tokenizer=tokenizer, data_collator=data_collator, ) trainer.train() def greet(text): pipe = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model) return pipe(text)[0]['label'] iface = gr.Interface(fn=greet, inputs=gr.inputs.Textbox(placeholder="Please enter the sentence...", lines=5), outputs="text") iface.launch()