File size: 506 Bytes
928b43c 68731ca 928b43c e765db8 68731ca 928b43c 68731ca e765db8 928b43c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import os
from datasets import load_dataset
CACHE_DIR = 'cache'
N_SAMPLES = 15
REMOVED_COMMITS = ['9cc896202dc38d962c01aa2637dbc5bbc3e3dd9b']
def load_data():
df = load_dataset("petrtsv-jb/commit-rewriting-samples",
split="train",
token=os.environ.get('HF_REWRITING_TOKEN'),
cache_dir=CACHE_DIR).to_pandas()
removed_idx = df['hash'].isin(REMOVED_COMMITS)
df = df[~removed_idx]
return df.to_dict('records')[:N_SAMPLES]
|