File size: 1,175 Bytes
305e536 aab3281 305e536 a8a595d 0c136d8 a8a595d 0c136d8 305e536 5f3a4af 305e536 aab3281 5f3a4af 6676c5a 5f3a4af f5faae7 aab3281 e027012 0c136d8 e027012 6676c5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import os
from pathlib import Path
RANDOM_STATE = 42
GRAZIE_API_JWT_TOKEN = os.environ.get("GRAZIE_API_JWT_TOKEN")
GRAZIE_TIMEOUT_SEC = 1.0
HF_TOKEN = os.environ.get('HF_TOKEN')
HF_RAW_DATASET_NAME = "petrtsv-jb/commit-msg-rewriting"
HF_RAW_DATASET_SPLIT = 'train'
HF_FULL_COMMITS_DATASET_NAME = "JetBrains-Research/lca-commit-message-generation"
HF_FULL_COMMITS_DATASET_SUBNAME = "commitchronicle-py-long"
HF_FULL_COMMITS_DATASET_SPLIT = "test"
HF_PREDICTIONS_DATASET_NAME = "JetBrains-Research/lca-results"
HF_PREDICTIONS_DATASET_SUBNAME = "cmg_gpt_4_0613"
HF_PREDICTIONS_DATASET_SPLIT = "test"
HF_SYNTHETIC_DATASET_NAME = "petrtsv-jb/synthetic-commit-msg-rewriting"
HF_SYNTHETIC_DATASET_SPLIT = 'train'
LLM_MODEL = "gpt-4-1106-preview"
CACHE_DIR = Path("cache")
CACHE_DIR.mkdir(exist_ok=True)
OUTPUT_DIR = Path("output")
OUTPUT_DIR.mkdir(exist_ok=True)
END_TO_START_ARTIFACT = OUTPUT_DIR / "end_to_start.csv"
START_TO_END_ARTIFACT = OUTPUT_DIR / "start_to_end.csv"
SYNTHETIC_DATASET_ARTIFACT = OUTPUT_DIR / "synthetic.csv"
METRICS_CORRELATIONS_ARTIFACT = OUTPUT_DIR / "metrics_correlations.csv"
DATA_FOR_LABELING_ARTIFACT = OUTPUT_DIR / "data_for_labeling.csv"
|