Spaces:

waidhoferj
/

dance-classifier

Runtime error

App Files Files Community

waidhoferj commited on Jan 6, 2023

Commit

0030bc6

•

1 Parent(s): 4b8361a

updates

Browse files

Files changed (22) hide show

.gitattributes +1 -0
.gitignore +3 -8
README.md +8 -0
TODO.md +10 -0
assets/song-samples/take_it_to_the_limit.wav +0 -3
assets/song-samples/{alejandro.wav → the_long_day_is_over.wav} +2 -2
audio_utils.py +42 -0
environment.yml +3 -0
models/config/dance-predictor.yaml +2 -8
models/config/train.yaml +46 -23
models/residual.py +36 -19
models/utils.py +33 -9
models/weights/ResidualDancer/config.json +0 -24
models/weights/ResidualDancer/dancer_net.pt +0 -3
assets/song-samples/exs_and_ohs.wav → models/weights/ResidualDancer/weights.ckpt +2 -2
preprocessing/dataset.py +58 -22
preprocessing/pipelines.py +87 -41
preprocessing/preprocess.py +52 -5
requirements.txt +2 -1
scrapers/spotify.py +7 -20
tests.py +22 -0
train.py +50 -177

.gitattributes CHANGED Viewed

@@ -1,2 +1,3 @@
 *.wav filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text

 *.wav filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -1,15 +1,10 @@
 __pycache__
 .DS_Store
-data/samples
-data/spotify-samples
-data/samples-backup.zip
-data/songs.csv
-data/songs_original.csv
 logs
 gradio_cached_examples
 explore.ipynb
 scrapers/auth
 lightning_logs
-data/backup_1.csv
-data/backup.csv
-data/*.zip

 __pycache__
 .DS_Store
+data
 logs
 gradio_cached_examples
 explore.ipynb
 scrapers/auth
 lightning_logs
+.lr_find_*
+.cache

README.md CHANGED Viewed

@@ -11,3 +11,11 @@ pinned: false
 ---
 # Dance Classifier

 ---
 # Dance Classifier
+Classifies the dance style that best accompanies a provided song. Users record or upload an audio clip and the model provides a list of matching dance styles.
+## Getting Started
+1. Download dependencies: `conda env create --file environment.yml`
+2. Open environment: `conda activate dancer-net`
+3. Start the demo application: `python app.py`

TODO.md ADDED Viewed

	@@ -0,0 +1,10 @@

+- ✅ Ensure app.py audio input sounds like training data
+- Verify that the training spectrogram matches the predict spectrogram
+- Count number of example misses in dataset loading
+- Verify windowing and jitter params in Song Dataset
+- Create an attention-based network
+- ✅ Increase parameter count in network
+- Verify that labels really match what is on the music4dance site
+- Read the Medium series about audio DL
+- double check \_rectify_duration
+- Filter out songs that have only one vote

assets/song-samples/take_it_to_the_limit.wav DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c69e0eeb4321c44daaaaf95dd596b1d813b9f7e9b5ef4ac5ae9fe11878d4b13b
-size 5292082

assets/song-samples/{alejandro.wav → the_long_day_is_over.wav} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85f9a65fc4adb1fc0cbdbfafb7f7268a0934d97a120110d3f3a43375e59cba54
-size 5292078

 version https://git-lfs.github.com/spec/v1
+oid sha256:c8f957921bbd5c322f67748aca228dd7ebf9af005692c57d1050299861883214
+size 5290062

audio_utils.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import librosa
+from IPython.display import Audio, display
+import matplotlib.pyplot as plt
+import torch
+SAMPLE_RIR_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/room-response/rm1/impulse/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo.wav"
+SAMPLE_NOISE_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/distractors/rm1/babb/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav"
+def plot_spectrogram(spec, title=None, ylabel="freq_bin", aspect="auto", xmax=None):
+    spec = spec.squeeze(0)
+    spec = spec.numpy()
+    fig, axs = plt.subplots(1, 1)
+    axs.set_title(title or "Spectrogram (db)")
+    axs.set_ylabel(ylabel)
+    axs.set_xlabel("frame")
+    im = axs.imshow(librosa.power_to_db(spec), origin="lower", aspect=aspect)
+    if xmax:
+        axs.set_xlim((0, xmax))
+    fig.colorbar(im, ax=axs)
+    plt.show(block=False)
+def play_audio(waveform, sample_rate):
+    waveform = waveform.numpy()
+    num_channels, num_frames = waveform.shape
+    if num_channels == 1:
+        display(Audio(waveform[0], rate=sample_rate))
+    elif num_channels == 2:
+        display(Audio((waveform[0], waveform[1]), rate=sample_rate))
+    else:
+        raise ValueError("Waveform with more than 2 channels are not supported.")
+def get_rir_sample(path, resample=None, processed=False):
+    rir_raw, sample_rate = torch.load(path)
+    if not processed:
+        return rir_raw, sample_rate
+    rir = rir_raw[:, int(sample_rate*1.01):int(sample_rate*1.3)]
+    rir = rir / torch.norm(rir, p=2)
+    rir = torch.flip(rir, [1])
+    return rir, sample_rate

environment.yml CHANGED Viewed

@@ -15,6 +15,9 @@ dependencies:
   - requests
   - bidict
   - tqdm
   - pip
     - gradio
 prefix: /opt/homebrew/Caskroom/miniforge/base/envs/dancer-net

   - requests
   - bidict
   - tqdm
+  - pytorch-lightning
+  - rich
   - pip
     - gradio
+    - wakepy
 prefix: /opt/homebrew/Caskroom/miniforge/base/envs/dancer-net

models/config/dance-predictor.yaml CHANGED Viewed

@@ -1,20 +1,15 @@
-weight_path: lightning_logs/version_0/checkpoints/epoch=5-step=870.ckpt
 expected_duration: 6
-threshold: 0.5
 resample_frequency: 16000
 device: cpu
 labels:
   - Argentine Tango
-  - Balboa
   - Bachata
-  - Blues
   - Cha Cha
-  - Cumbia
-  - Carolina Shag
   - East Coast Swing
   - Hustle
   - Jive
-  - Lindy Hop
   - Quickstep
   - Rumba
   - Slow Foxtrot
@@ -23,4 +18,3 @@ labels:
   - Slow Waltz
   - Tango (Ballroom)
   - Viennese Waltz
-  - West Coast Swing

+weight_path: models/weights/ResidualDancer/weights.ckpt
 expected_duration: 6
+threshold: 0.4
 resample_frequency: 16000
 device: cpu
 labels:
   - Argentine Tango
   - Bachata
   - Cha Cha
   - East Coast Swing
   - Hustle
   - Jive
   - Quickstep
   - Rumba
   - Slow Foxtrot
   - Slow Waltz
   - Tango (Ballroom)
   - Viennese Waltz

models/config/train.yaml CHANGED Viewed

@@ -1,23 +1,46 @@
-device: mps
-seed: 42
-dance_ids:
-  - ATN
-  - BBA
-  - BCH
-  - BLU
-  - CHA
-  - CMB
-  - CSG
-  - ECS
-  - HST
-  - JIV
-  - LHP
-  - QST
-  - RMB
-  - SFT
-  - SLS
-  - SMB
-  - SWZ
-  - TGO
-  - VWZ
-  - WCS

+global:
+  device: mps
+  seed: 42
+  dance_ids:
+    - ATN
+    - BCH
+    - CHA
+    - ECS
+    - HST
+    - JIV
+    - QST
+    - RMB
+    - SFT
+    - SLS
+    - SMB
+    - SWZ
+    - TGO
+    - VWZ
+    - WCS
+data_module:
+  batch_size: 1024
+  num_workers: 10
+  min_votes: 2
+  song_data_path: data/songs_cleaned.csv
+  song_audio_path: data/samples
+  dataset_kwargs:
+    audio_window_duration: 6
+    audio_window_jitter: 1.5
+    audio_pipeline_kwargs:
+      mask_count: 0 # Don't mask the data
+      snr_mean: 15.0 # Pretty much eliminate the noise
+      freq_mask_size: 10
+      time_mask_size: 80
+trainer:
+  log_every_n_steps: 15
+  accelerator: gpu
+  max_epochs: 50
+  min_epochs: 5
+  fast_dev_run: False
+  track_grad_norm: 2
+  # gradient_clip_val: 0.5
+training_environment:
+  learning_rate: 0.0033
+model:
+  n_channels: 128

models/residual.py CHANGED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 import torchaudio
 import yaml
 from .utils import calculate_metrics
-from preprocessing.pipelines import AudioPipeline
 # Architecture based on: https://github.com/minzwon/sota-music-tagging-models/blob/36aa13b7205ff156cf4dcab60fd69957da453151/training/model.py
@@ -15,6 +15,9 @@ class ResidualDancer(nn.Module):
     def __init__(self,n_channels=128, n_classes=50):
         super().__init__()
         # Spectrogram
         self.spec_bn = nn.BatchNorm2d(1)
@@ -33,7 +36,7 @@ class ResidualDancer(nn.Module):
         self.dense1 = nn.Linear(n_channels*4, n_channels*4)
         self.bn = nn.BatchNorm1d(n_channels*4)
         self.dense2 = nn.Linear(n_channels*4, n_classes)
-        self.dropout = nn.Dropout(0.3)
     def forward(self, x):
         x = self.spec_bn(x)
@@ -88,34 +91,51 @@ class ResBlock(nn.Module):
 class TrainingEnvironment(pl.LightningModule):
-    def __init__(self, model: nn.Module, criterion: nn.Module, learning_rate=1e-4, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.model = model
         self.criterion = criterion
         self.learning_rate = learning_rate
     def training_step(self, batch: tuple[torch.Tensor, torch.TensorType], batch_index: int) -> torch.Tensor:
         features, labels = batch
         outputs = self.model(features)
         loss = self.criterion(outputs, labels)
-        batch_metrics = calculate_metrics(outputs, labels)
-        self.log_dict(batch_metrics)
         return loss
     def validation_step(self, batch:tuple[torch.Tensor, torch.TensorType], batch_index:int):
         x, y = batch
         preds = self.model(x)
-        metrics = calculate_metrics(preds, y, prefix="val_")
-        metrics["val_loss"] = self.criterion(preds, y)
-        self.log_dict(metrics)
     def test_step(self, batch:tuple[torch.Tensor, torch.TensorType], batch_index:int):
         x, y = batch
         preds = self.model(x)
-        self.log_dict(calculate_metrics(preds, y, prefix="test_"))
     def configure_optimizers(self):
-        return torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
 class DancePredictor:
@@ -133,7 +153,8 @@ class DancePredictor:
         self.expected_duration = expected_duration
         self.threshold = threshold
         self.resample_frequency = resample_frequency
-        self.audio_pipeline = AudioPipeline(input_freq=self.resample_frequency)
         self.labels = np.array(labels)
         self.device = device
         self.model = self.get_model(weight_path)
@@ -155,20 +176,16 @@ class DancePredictor:
     @torch.no_grad()
     def __call__(self, waveform: np.ndarray, sample_rate:int) -> dict[str,float]:
-        min_sample_len = sample_rate * self.expected_duration
-        if min_sample_len > len(waveform):
-            raise Exception("You must record for at least 6 seconds")
-        if len(waveform.shape) > 1 and waveform.shape[1] > 1:
             waveform = waveform.transpose(1,0)
-            waveform = waveform.mean(axis=0, keepdims=True)
-        else:
             waveform = np.expand_dims(waveform, 0)
-        waveform = waveform[: ,:min_sample_len]
         waveform = torch.from_numpy(waveform.astype("int16"))
         waveform = torchaudio.functional.apply_codec(waveform,sample_rate, "wav", channels_first=True)
         waveform = torchaudio.functional.resample(waveform, sample_rate,self.resample_frequency)
-        spectrogram = self.audio_pipeline(waveform)
         spectrogram = spectrogram.unsqueeze(0).to(self.device)
         results = self.model(spectrogram)

 import torchaudio
 import yaml
 from .utils import calculate_metrics
+from preprocessing.pipelines import WaveformPreprocessing, AudioToSpectrogram
 # Architecture based on: https://github.com/minzwon/sota-music-tagging-models/blob/36aa13b7205ff156cf4dcab60fd69957da453151/training/model.py
     def __init__(self,n_channels=128, n_classes=50):
         super().__init__()
+        self.n_channels = n_channels
+        self.n_classes = n_classes
         # Spectrogram
         self.spec_bn = nn.BatchNorm2d(1)
         self.dense1 = nn.Linear(n_channels*4, n_channels*4)
         self.bn = nn.BatchNorm1d(n_channels*4)
         self.dense2 = nn.Linear(n_channels*4, n_classes)
+        self.dropout = nn.Dropout(0.2)
     def forward(self, x):
         x = self.spec_bn(x)
 class TrainingEnvironment(pl.LightningModule):
+    def __init__(self, model: nn.Module, criterion: nn.Module, config:dict, learning_rate=1e-4, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.model = model
         self.criterion = criterion
         self.learning_rate = learning_rate
+        self.config=config
+        self.save_hyperparameters({
+            "model": type(model).__name__,
+            "loss": type(criterion).__name__,
+            "config": config,
+             **kwargs
+            })
     def training_step(self, batch: tuple[torch.Tensor, torch.TensorType], batch_index: int) -> torch.Tensor:
         features, labels = batch
         outputs = self.model(features)
         loss = self.criterion(outputs, labels)
+        metrics = calculate_metrics(outputs, labels, prefix="train/", multi_label=True)
+        self.log_dict(metrics, prog_bar=True)
+        # Log spectrograms
+        if batch_index % 100 == 0:
+            tensorboard = self.logger.experiment
+            img_index = torch.randint(0, len(features), (1,)).item()
+            img = features[img_index][0]
+            img = (img - img.min()) / (img.max() - img.min())
+            tensorboard.add_image(f"batch: {batch_index}, element: {img_index}", img, 0, dataformats='HW')
         return loss
     def validation_step(self, batch:tuple[torch.Tensor, torch.TensorType], batch_index:int):
         x, y = batch
         preds = self.model(x)
+        metrics = calculate_metrics(preds, y, prefix="val/", multi_label=True)
+        metrics["val/loss"] = self.criterion(preds, y)
+        self.log_dict(metrics,prog_bar=True)
     def test_step(self, batch:tuple[torch.Tensor, torch.TensorType], batch_index:int):
         x, y = batch
         preds = self.model(x)
+        self.log_dict(calculate_metrics(preds, y, prefix="test/", multi_label=True), prog_bar=True)
     def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
+        # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') {"scheduler": scheduler, "monitor": "val/loss"}
+        return [optimizer]
 class DancePredictor:
         self.expected_duration = expected_duration
         self.threshold = threshold
         self.resample_frequency = resample_frequency
+        self.preprocess_waveform = WaveformPreprocessing(resample_frequency * expected_duration)
+        self.audio_to_spectrogram = AudioToSpectrogram(resample_frequency)
         self.labels = np.array(labels)
         self.device = device
         self.model = self.get_model(weight_path)
     @torch.no_grad()
     def __call__(self, waveform: np.ndarray, sample_rate:int) -> dict[str,float]:
+        if len(waveform.shape) > 1 and waveform.shape[1] < waveform.shape[0]:
             waveform = waveform.transpose(1,0)
+        elif len(waveform.shape) == 1:
             waveform = np.expand_dims(waveform, 0)
         waveform = torch.from_numpy(waveform.astype("int16"))
         waveform = torchaudio.functional.apply_codec(waveform,sample_rate, "wav", channels_first=True)
         waveform = torchaudio.functional.resample(waveform, sample_rate,self.resample_frequency)
+        waveform = self.preprocess_waveform(waveform)
+        spectrogram = self.audio_to_spectrogram(waveform)
         spectrogram = spectrogram.unsqueeze(0).to(self.device)
         results = self.model(spectrogram)

models/utils.py CHANGED Viewed

@@ -4,6 +4,10 @@ import numpy as np
 from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
 class LabelWeightedBCELoss(nn.Module):
     def __init__(self, label_weights:torch.Tensor, reduction="mean"):
         super().__init__()
         self.label_weights = label_weights
@@ -22,17 +26,37 @@ class LabelWeightedBCELoss(nn.Module):
         return self.reduction(losses)
-def calculate_metrics(pred, target, threshold=0.5, prefix="") -> dict[str, torch.Tensor]:
     target = target.detach().cpu().numpy()
     pred = pred.detach().cpu().numpy()
-    pred = np.array(pred > threshold, dtype=float)
     metrics= {
-            'precision': precision_score(y_true=target, y_pred=pred, average='macro', zero_division=0),
-            'recall': recall_score(y_true=target, y_pred=pred, average='macro', zero_division=0),
-            'f1': f1_score(y_true=target, y_pred=pred, average='macro', zero_division=0),
-            'accuracy': accuracy_score(y_true=target, y_pred=pred),
             }
-    if prefix != "":
-        metrics = {prefix + k : v for k, v in metrics.items()}
-    return {k: torch.tensor(v,dtype=torch.float32) for k,v in metrics.items()}

 from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
 class LabelWeightedBCELoss(nn.Module):
+    """
+    Binary Cross Entropy loss that assumes each float in the final dimension is a binary probability distribution.
+    Allows for the weighing of each probability distribution wrt loss.
+    """
     def __init__(self, label_weights:torch.Tensor, reduction="mean"):
         super().__init__()
         self.label_weights = label_weights
         return self.reduction(losses)
+# TODO: Code a onehot
+def calculate_metrics(pred, target, threshold=0.5, prefix="", multi_label=True) -> dict[str, torch.Tensor]:
     target = target.detach().cpu().numpy()
     pred = pred.detach().cpu().numpy()
+    params = {
+            "y_true": target if multi_label else target.argmax(1) ,
+            "y_pred": np.array(pred > threshold, dtype=float) if multi_label else pred.argmax(1),
+            "zero_division": 0,
+            "average":"macro"
+            }
     metrics= {
+            'precision': precision_score(**params),
+            'recall': recall_score(**params),
+            'f1': f1_score(**params),
+            'accuracy': accuracy_score(y_true=params["y_true"], y_pred=params["y_pred"]),
             }
+    return {prefix + k: torch.tensor(v,dtype=torch.float32) for k,v in metrics.items()}
+class EarlyStopping:
+    def __init__(self, patience=0):
+        self.patience = patience
+        self.last_measure = np.inf
+        self.consecutive_increase = 0
+    def step(self, val) -> bool:
+        if self.last_measure <= val:
+            self.consecutive_increase +=1
+        else:
+            self.consecutive_increase = 0
+        self.last_measure = val
+        return self.patience < self.consecutive_increase

models/weights/ResidualDancer/config.json DELETED Viewed

@@ -1,24 +0,0 @@
-{
-    "classes": [
-        "ATN",
-        "BBA",
-        "BCH",
-        "BLU",
-        "CHA",
-        "CMB",
-        "CSG",
-        "ECS",
-        "HST",
-        "JIV",
-        "LHP",
-        "QST",
-        "RMB",
-        "SFT",
-        "SLS",
-        "SMB",
-        "SWZ",
-        "TGO",
-        "VWZ",
-        "WCS"
-    ]
-}

models/weights/ResidualDancer/dancer_net.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1888558eed82a5d99ac1dab55969a9ea36455d11a9370355d1f2b984598d30ff
-size 48453416

assets/song-samples/exs_and_ohs.wav → models/weights/ResidualDancer/weights.ckpt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e53fe157ff687b5464e98c7d0c03d0712527c3a7ed24b6b063a328fcf7bf608
-size 5292082

 version https://git-lfs.github.com/spec/v1
+oid sha256:e107090ff62ac0b79f4f40271e8b1dd6c3d10d8146264ec49df3c8febe99aa23
+size 193651217

preprocessing/dataset.py CHANGED Viewed

@@ -3,87 +3,122 @@ from torch.utils.data import Dataset, DataLoader, random_split
 import numpy as np
 import pandas as pd
 import torchaudio as ta
-from .pipelines import AudioPipeline
 import pytorch_lightning as pl
 from .preprocess import get_examples
 class SongDataset(Dataset):
     def __init__(self,
     audio_paths: list[str],
-    dance_labels: list[np.ndarray],
     audio_duration=30, # seconds
     audio_window_duration=6, # seconds
     ):
         assert audio_duration % audio_window_duration == 0, "Audio window should divide duration evenly."
         self.audio_paths = audio_paths
         self.dance_labels = dance_labels
         audio_info = ta.info(audio_paths[0])
         self.sample_rate = audio_info.sample_rate
         self.audio_window_duration = int(audio_window_duration)
         self.audio_duration = int(audio_duration)
-        self.audio_pipeline = AudioPipeline(input_freq=self.sample_rate)
     def __len__(self):
         return len(self.audio_paths) * self.audio_duration // self.audio_window_duration
-    def __getitem__(self, idx) -> tuple[torch.Tensor, torch.Tensor]:
         waveform = self._waveform_from_index(idx)
         spectrogram = self.audio_pipeline(waveform)
         dance_labels = self._label_from_index(idx)
-        return spectrogram, dance_labels
     def _waveform_from_index(self, idx:int) -> torch.Tensor:
-        audio_file_idx = idx * self.audio_window_duration // self.audio_duration
-        frame_offset = idx % self.audio_duration // self.audio_window_duration
         num_frames = self.sample_rate * self.audio_window_duration
-        waveform, sample_rate = ta.load(self.audio_paths[audio_file_idx], frame_offset=frame_offset, num_frames=num_frames)
         assert sample_rate == self.sample_rate, f"Expected sample rate of {self.sample_rate}. Found {sample_rate}"
         return waveform
     def _label_from_index(self, idx:int) -> torch.Tensor:
-        label_idx =  idx * self.audio_window_duration // self.audio_duration
-        return torch.from_numpy(self.dance_labels[label_idx])
 class DanceDataModule(pl.LightningDataModule):
     def __init__(self,
-    song_data_path="data/songs.csv",
     song_audio_path="data/samples",
     test_proportion=0.15,
     val_proportion=0.1,
     target_classes:list[str]=None,
     batch_size:int=64,
-    num_workers=10
     ):
         super().__init__()
         self.song_data_path = song_data_path
         self.song_audio_path = song_audio_path
         self.val_proportion=val_proportion
         self.test_proportion=test_proportion
-        self.train_proporition= 1.-test_proportion-val_proportion
         self.target_classes=target_classes
         self.batch_size = batch_size
         self.num_workers = num_workers
-        df = pd.read_csv("data/songs.csv")
-        self.x,self.y = get_examples(df, self.song_audio_path,class_list=self.target_classes)
     def setup(self, stage: str):
-        dataset = SongDataset(self.x,self.y)
-        self.train_ds, self.val_ds, self.test_ds = random_split(dataset, [self.train_proporition, self.val_proportion, self.test_proportion])
     def train_dataloader(self):
-        return DataLoader(self.train_ds, batch_size=self.batch_size, num_workers=self.num_workers)
     def val_dataloader(self):
         return DataLoader(self.val_ds, batch_size=self.batch_size, num_workers=self.num_workers)
@@ -92,4 +127,5 @@ class DanceDataModule(pl.LightningDataModule):
         return DataLoader(self.test_ds, batch_size=self.batch_size, num_workers=self.num_workers)
     def get_label_weights(self):
-        return torch.from_numpy(len(self.y) / (len(self.y[0]) * sum(self.y)))

 import numpy as np
 import pandas as pd
 import torchaudio as ta
+from .pipelines import AudioTrainingPipeline
 import pytorch_lightning as pl
 from .preprocess import get_examples
+from sklearn.model_selection import train_test_split
 class SongDataset(Dataset):
     def __init__(self,
     audio_paths: list[str],
+    dance_labels: list[np.ndarray],
     audio_duration=30, # seconds
     audio_window_duration=6, # seconds
+    audio_window_jitter=0.0, # seconds
+    audio_pipeline_kwargs={},
+    resample_frequency=16000
     ):
         assert audio_duration % audio_window_duration == 0, "Audio window should divide duration evenly."
+        assert audio_window_duration > audio_window_jitter, "Jitter should be a small fraction of the audio window duration."
         self.audio_paths = audio_paths
         self.dance_labels = dance_labels
         audio_info = ta.info(audio_paths[0])
         self.sample_rate = audio_info.sample_rate
         self.audio_window_duration = int(audio_window_duration)
+        self.audio_window_jitter = audio_window_jitter
         self.audio_duration = int(audio_duration)
+        self.audio_pipeline = AudioTrainingPipeline(self.sample_rate, resample_frequency, audio_window_duration, **audio_pipeline_kwargs)
     def __len__(self):
         return len(self.audio_paths) * self.audio_duration // self.audio_window_duration
+    def __getitem__(self, idx:int) -> tuple[torch.Tensor, torch.Tensor]:
         waveform = self._waveform_from_index(idx)
+        assert waveform.shape[1] > 10, f"No data found: {self._backtrace_audio_path(idx)}"
         spectrogram = self.audio_pipeline(waveform)
         dance_labels = self._label_from_index(idx)
+        example_is_valid = self._validate_output(spectrogram, dance_labels)
+        if example_is_valid:
+            return spectrogram, dance_labels
+        else:
+            # Try the previous one
+            # This happens when some of the audio recordings are really quiet
+            # This WILL NOT leak into other data partitions because songs belong entirely to a partition
+            return self[idx-1]
+    def _convert_idx(self,idx:int) -> int:
+        return idx * self.audio_window_duration // self.audio_duration
+    def _backtrace_audio_path(self, index:int) -> str:
+        return self.audio_paths[self._convert_idx(index)]
+    def _validate_output(self,x,y):
+        is_finite =  not torch.any(torch.isinf(x))
+        is_numerical = not torch.any(torch.isnan(x))
+        has_data = torch.any(x != 0.0)
+        is_binary = len(torch.unique(y)) < 3
+        return all((is_finite,is_numerical, has_data, is_binary))
     def _waveform_from_index(self, idx:int) -> torch.Tensor:
+        audio_filepath = self.audio_paths[self._convert_idx(idx)]
+        num_windows = self.audio_duration // self.audio_window_duration
+        frame_index = idx % num_windows
+        jitter_start = -self.audio_window_jitter if frame_index > 0 else 0.0
+        jitter_end = self.audio_window_jitter if frame_index != num_windows - 1 else 0.0
+        jitter = int(torch.FloatTensor(1).uniform_(jitter_start, jitter_end) * self.sample_rate)
+        frame_offset = frame_index * self.audio_window_duration * self.sample_rate + jitter
         num_frames = self.sample_rate * self.audio_window_duration
+        waveform, sample_rate = ta.load(audio_filepath, frame_offset=frame_offset, num_frames=num_frames)
         assert sample_rate == self.sample_rate, f"Expected sample rate of {self.sample_rate}. Found {sample_rate}"
         return waveform
     def _label_from_index(self, idx:int) -> torch.Tensor:
+        return torch.from_numpy(self.dance_labels[self._convert_idx(idx)])
 class DanceDataModule(pl.LightningDataModule):
     def __init__(self,
+    song_data_path="data/songs_cleaned.csv",
     song_audio_path="data/samples",
     test_proportion=0.15,
     val_proportion=0.1,
     target_classes:list[str]=None,
+    min_votes=1,
     batch_size:int=64,
+    num_workers=10,
+    dataset_kwargs={}
     ):
         super().__init__()
         self.song_data_path = song_data_path
         self.song_audio_path = song_audio_path
         self.val_proportion=val_proportion
         self.test_proportion=test_proportion
+        self.train_proportion= 1.-test_proportion-val_proportion
         self.target_classes=target_classes
         self.batch_size = batch_size
         self.num_workers = num_workers
+        self.dataset_kwargs = dataset_kwargs
+        df = pd.read_csv(song_data_path)
+        self.x,self.y = get_examples(df, self.song_audio_path,class_list=self.target_classes, multi_label=True, min_votes=min_votes)
     def setup(self, stage: str):
+        train_i, val_i, test_i = random_split(np.arange(len(self.x)), [self.train_proportion, self.val_proportion, self.test_proportion])
+        self.train_ds = self._dataset_from_indices(train_i)
+        self.val_ds = self._dataset_from_indices(val_i)
+        self.test_ds = self._dataset_from_indices(test_i)
+    def _dataset_from_indices(self, idx:list[int]) -> SongDataset:
+        return SongDataset(self.x[idx], self.y[idx], **self.dataset_kwargs)
     def train_dataloader(self):
+        return DataLoader(self.train_ds, batch_size=self.batch_size, num_workers=self.num_workers, shuffle=True)
     def val_dataloader(self):
         return DataLoader(self.val_ds, batch_size=self.batch_size, num_workers=self.num_workers)
         return DataLoader(self.test_ds, batch_size=self.batch_size, num_workers=self.num_workers)
     def get_label_weights(self):
+        n_examples, n_classes = self.y.shape
+        return torch.from_numpy(n_examples / (n_classes * sum(self.y)))

preprocessing/pipelines.py CHANGED Viewed

@@ -1,63 +1,109 @@
 import torch
 from torchaudio import transforms as taT, functional as taF
 import torch.nn as nn
-class AudioPipeline(torch.nn.Module):
-    def __init__(
-        self,
-        input_freq=16000,
-        resample_freq=16000,
-    ):
-        super().__init__()
-        self.resample = taT.Resample(orig_freq=input_freq, new_freq=resample_freq)
-        self.spec = taT.MelSpectrogram(sample_rate=resample_freq, n_mels=64, n_fft=1024)
-        self.to_db = taT.AmplitudeToDB()
-    def forward(self, waveform: torch.Tensor) -> torch.Tensor:
-        if waveform.shape[0] > 1:
-            waveform = waveform.mean(0, keepdim=True)
-        waveform = (waveform - waveform.mean()) / waveform.abs().max()
-        waveform = self.resample(waveform)
-        spectrogram = self.spec(waveform)
-        spectrogram = self.to_db(spectrogram)
-        return spectrogram
-class SpectrogramAugmentationPipeline(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.pipeline = nn.Sequential(
-            taT.FrequencyMasking(80),
-            taT.TimeMasking(80),
-            taT.TimeStretch(80)
-        )
-    def forward(self, spectrogram:torch.Tensor) -> torch.Tensor:
-        return self.pipeline(spectrogram)
-class WaveformAugmentationPipeline(torch.nn.Module):
-    def __init__(self):
         super().__init__()
     def forward(self, waveform:torch.Tensor) -> torch.Tensor:
-        taF.pitch_shift()
-class AudioTrainingPipeline(torch.nn.Module):
-    def __init__(self):
         super().__init__()
-        self.waveform_aug = WaveformAugmentationPipeline()
-        self.spec_aug = SpectrogramAugmentationPipeline()
-        self.audio_preprocessing = AudioPipeline()
-    def forward(self, waveform:torch.Tensor) -> torch.Tensor:
-        x = self.audio_preprocessing(waveform)
-        x = self.spec_aug(x)
-        return x

 import torch
+import torchaudio
 from torchaudio import transforms as taT, functional as taF
 import torch.nn as nn
+NOISE_PATH = "data/augmentation/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav"
+class AudioTrainingPipeline(torch.nn.Module):
+    def __init__(self,
+            input_freq=16000,
+            resample_freq=16000,
+            expected_duration=6,
+            freq_mask_size=10,
+            time_mask_size=80,
+            mask_count = 2,
+            snr_mean=6.0):
+        super().__init__()
+        self.input_freq = input_freq
+        self.snr_mean = snr_mean
+        self.mask_count = mask_count
+        self.noise = self.get_noise()
+        self.resample = taT.Resample(input_freq,resample_freq)
+        self.preprocess_waveform = WaveformPreprocessing(resample_freq * expected_duration)
+        self.audio_to_spectrogram = AudioToSpectrogram(
+            sample_rate=resample_freq,
+        )
+        self.freq_mask = taT.FrequencyMasking(freq_mask_size)
+        self.time_mask = taT.TimeMasking(time_mask_size)
+    def get_noise(self) -> torch.Tensor:
+        noise, sr = torchaudio.load(NOISE_PATH)
+        if noise.shape[0] > 1:
+            noise = noise.mean(0, keepdim=True)
+        if sr != self.input_freq:
+            noise = taF.resample(noise,sr, self.input_freq)
+        return noise
+    def add_noise(self, waveform:torch.Tensor) -> torch.Tensor:
+        num_repeats = waveform.shape[1] // self.noise.shape[1] + 1
+        noise = self.noise.repeat(1,num_repeats)[:, :waveform.shape[1]]
+        noise_power = noise.norm(p=2)
+        signal_power = waveform.norm(p=2)
+        snr_db = torch.normal(self.snr_mean, 1.5, (1,)).clamp_min(1.0)
+        snr = torch.exp(snr_db / 10)
+        scale = snr * noise_power / signal_power
+        noisy_waveform = (scale * waveform + noise) / 2
+        return noisy_waveform
+    def forward(self, waveform:torch.Tensor) -> torch.Tensor:
+        try:
+            waveform = self.resample(waveform)
+        except:
+            print("oops")
+        waveform = self.preprocess_waveform(waveform)
+        waveform = self.add_noise(waveform)
+        spec = self.audio_to_spectrogram(waveform)
+        # Spectrogram augmentation
+        for _ in range(self.mask_count):
+            spec = self.freq_mask(spec)
+            spec = self.time_mask(spec)
+        return spec
+class WaveformPreprocessing(torch.nn.Module):
+    def __init__(self, expected_sample_length:int):
         super().__init__()
+        self.expected_sample_length = expected_sample_length
     def forward(self, waveform:torch.Tensor) -> torch.Tensor:
+        # Take out extra channels
+        if waveform.shape[0] > 1:
+            waveform = waveform.mean(0, keepdim=True)
+        # ensure it is the correct length
+        waveform = self._rectify_duration(waveform)
+        return waveform
+    def _rectify_duration(self,waveform:torch.Tensor):
+        expected_samples = self.expected_sample_length
+        sample_count = waveform.shape[1]
+        if expected_samples == sample_count:
+            return waveform
+        elif expected_samples > sample_count:
+            pad_amount = expected_samples - sample_count
+            return torch.nn.functional.pad(waveform, (0, pad_amount),mode="constant", value=0.0)
+        else:
+            return waveform[:,:expected_samples]
+class AudioToSpectrogram(torch.nn.Module):
+    def __init__(
+        self,
+        sample_rate=16000,
+    ):
         super().__init__()
+        self.spec = taT.MelSpectrogram(sample_rate=sample_rate, n_mels=128, n_fft=1024) # TODO: Change mels to 64
+        self.to_db = taT.AmplitudeToDB()
+    def forward(self, waveform: torch.Tensor) -> torch.Tensor:
+        spectrogram = self.spec(waveform)
+        spectrogram = self.to_db(spectrogram)
+        return spectrogram

preprocessing/preprocess.py CHANGED Viewed

@@ -4,8 +4,9 @@ import re
 import json
 from pathlib import Path
 import os
 import torch
-import torchaudio.transforms as taT
 def url_to_filename(url:str) -> str:
     return f"{url.split('/')[-1]}.wav"
@@ -17,6 +18,35 @@ def get_songs_with_audio(df:pd.DataFrame, audio_dir:str) -> pd.DataFrame:
     df = df[valid_audio]
     return df
 def fix_dance_rating_counts(dance_ratings:pd.Series) -> pd.Series:
     tag_pattern = re.compile("([A-Za-z]+)(\+|-)(\d+)")
     dance_ratings = dance_ratings.apply(lambda v : json.loads(v.replace("'", "\"")))
@@ -64,7 +94,7 @@ def vectorize_multi_label(labels: dict[str,int], unique_labels:np.ndarray) -> np
     probs[probs > 0.0] = 1.0
     return probs
-def get_examples(df:pd.DataFrame, audio_dir:str, class_list=None) -> tuple[list[str], list[np.ndarray]]:
     sampled_songs = get_songs_with_audio(df, audio_dir)
     sampled_songs.loc[:,"DanceRating"] = fix_dance_rating_counts(sampled_songs["DanceRating"])
     if class_list is not None:
@@ -74,11 +104,28 @@ def get_examples(df:pd.DataFrame, audio_dir:str, class_list=None) -> tuple[list[
             if not pd.isna(labels) and any(label in class_list and amt > 0 for label, amt in labels.items())
             else np.nan)
     sampled_songs = sampled_songs.dropna(subset=["DanceRating"])
-    labels = sampled_songs["DanceRating"]
     unique_labels = np.array(get_unique_labels(labels))
-    labels = labels.apply(lambda i : vectorize_multi_label(i, unique_labels))
     audio_paths = [os.path.join(audio_dir, url_to_filename(url)) for url in sampled_songs["Sample"]]
-    return audio_paths, list(labels)

 import json
 from pathlib import Path
 import os
+import torchaudio
 import torch
+from tqdm import tqdm
 def url_to_filename(url:str) -> str:
     return f"{url.split('/')[-1]}.wav"
     df = df[valid_audio]
     return df
+def validate_audio(audio_urls:pd.Series, audio_dir:str) -> pd.Series:
+    """
+    Tests audio urls to ensure that their file exists and the contents is valid.
+    """
+    audio_files = set(os.path.basename(f) for f in Path(audio_dir).iterdir())
+    def is_valid(url):
+        valid_url = type(url) == str and "http" in url
+        if not valid_url:
+            return False
+        filename = url_to_filename(url)
+        if filename not in audio_files:
+            return False
+        try:
+            w, _ = torchaudio.load(os.path.join(audio_dir, filename))
+        except:
+            return False
+        contents_invalid = torch.any(torch.isnan(w)) or torch.any(torch.isinf(w)) or len(torch.unique(w)) <= 2
+        return not contents_invalid
+    idxs = []
+    validations = []
+    for index, url in tqdm(audio_urls.items(), total=len(audio_urls), desc="Audio URLs Validated"):
+        idxs.append(index)
+        validations.append(is_valid(url))
+    return pd.Series(validations, index=idxs)
 def fix_dance_rating_counts(dance_ratings:pd.Series) -> pd.Series:
     tag_pattern = re.compile("([A-Za-z]+)(\+|-)(\d+)")
     dance_ratings = dance_ratings.apply(lambda v : json.loads(v.replace("'", "\"")))
     probs[probs > 0.0] = 1.0
     return probs
+def get_examples(df:pd.DataFrame, audio_dir:str, class_list=None, multi_label=True, min_votes=1) -> tuple[np.ndarray, np.ndarray]:
     sampled_songs = get_songs_with_audio(df, audio_dir)
     sampled_songs.loc[:,"DanceRating"] = fix_dance_rating_counts(sampled_songs["DanceRating"])
     if class_list is not None:
             if not pd.isna(labels) and any(label in class_list and amt > 0 for label, amt in labels.items())
             else np.nan)
     sampled_songs = sampled_songs.dropna(subset=["DanceRating"])
+    vote_mask = sampled_songs["DanceRating"].apply(lambda dances: any(votes >= min_votes for votes in dances.values()))
+    sampled_songs = sampled_songs[vote_mask]
+    labels = sampled_songs["DanceRating"].apply(lambda dances : {dance: votes for dance, votes in dances.items() if votes >= min_votes})
     unique_labels = np.array(get_unique_labels(labels))
+    vectorizer = vectorize_multi_label if multi_label else vectorize_label_probs
+    labels = labels.apply(lambda i : vectorizer(i, unique_labels))
     audio_paths = [os.path.join(audio_dir, url_to_filename(url)) for url in sampled_songs["Sample"]]
+    return np.array(audio_paths), np.stack(labels)
+if __name__ == "__main__":
+    links = pd.read_csv("data/backup_2.csv", index_col="index")
+    df = pd.read_csv("data/songs.csv")
+    l = links["link"].str.strip()
+    l = l.apply(lambda url : url if "http" in url else np.nan)
+    l = l.dropna()
+    df["Sample"].update(l)
+    addna = lambda url :  url if type(url) == str and "http" in url else np.nan
+    df["Sample"] = df["Sample"].apply(addna)
+    is_valid = validate_audio(df["Sample"],"data/samples")
+    df["valid"] = is_valid
+    df.to_csv("data/songs_validated.csv")

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
-torchvision
 torch
 torchaudio
 numpy
 pandas
 seaborn

 torch
+torchvision
 torchaudio
+pytorch-lightning
 numpy
 pandas
 seaborn

scrapers/spotify.py CHANGED Viewed

@@ -49,14 +49,14 @@ def patch_missing_songs(
         if preview_url is not None:
             row["Sample"] = preview_url
         return row
-    backup_file = open("data/backup_1.csv", "a")
     rows = []
     indices = []
     total_rows = len(missing_df)
-    for i, row in tqdm(missing_df.iloc[11121:].iterrows(),total=total_rows):
         patched_row = patch_preview(row)
-        backup_file.write(f"{i}, {patched_row['Sample']}\n")
-        rows.append(patch_preview(row))
         indices.append(i)
@@ -65,23 +65,10 @@ def patch_missing_songs(
     return df
-def download_links():
-    start = 3180
-    with open("data/backup_2.csv") as f:
         links = [x.split(",")[1].strip() for x in f.readlines()]
-    links = links[start:]
     links = [l for l in links if "https" in l]
-    links = links[2680:]
     for link in tqdm(links, "Songs Downloaded"):
-        download_song(link, "data/spotify-samples")
         time.sleep(5e-3) # hopefully wont be rate limited with delay 🤞
-if __name__ == "__main__":
-    df = pd.read_csv("data/songs.csv")
-    patched = patch_missing_songs(df)
-    patched.to_csv("data/last_part.csv")

         if preview_url is not None:
             row["Sample"] = preview_url
         return row
     rows = []
     indices = []
+    after = 18418
+    missing_df = missing_df.iloc[after:]
     total_rows = len(missing_df)
+    for i, row in tqdm(missing_df.iterrows(),total=total_rows):
         patched_row = patch_preview(row)
+        rows.append(patched_row)
         indices.append(i)
     return df
+def download_links_from_backup(backup_file:str, output_dir:str):
+    with open(backup_file) as f:
         links = [x.split(",")[1].strip() for x in f.readlines()]
     links = [l for l in links if "https" in l]
     for link in tqdm(links, "Songs Downloaded"):
+        download_song(link, output_dir)
         time.sleep(5e-3) # hopefully wont be rate limited with delay 🤞

tests.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torchaudio
+import numpy as np
+from audio_utils import play_audio
+from preprocessing.dataset import SongDataset
+def test_audio_splitting():
+    audio_paths = ["data/samples/95f2df65f7450db3b1af29aa77ba7edc6ab52075?cid=7ffadeb2e136495fb5a62d1ac9be8f62.wav"]
+    labels = [np.array([1,0,1,0])]
+    whole_song, sr = torchaudio.load("data/samples/95f2df65f7450db3b1af29aa77ba7edc6ab52075?cid=7ffadeb2e136495fb5a62d1ac9be8f62.wav")
+    ds = SongDataset(audio_paths, labels)
+    song_parts = (ds._waveform_from_index(i) for i in range(len(ds)))
+    print("Sample Parts")
+    for part in song_parts:
+        play_audio(part,sr)
+    print("Whole Sample")
+    play_audio(whole_song,sr)

train.py CHANGED Viewed

@@ -1,196 +1,69 @@
-import datetime
-import os
-import torch
 from torch.utils.data import DataLoader
-import torch.nn as nn
-from tqdm import tqdm
 import pandas as pd
-import numpy as np
-from torch.utils.data import random_split, SubsetRandomSampler
-import json
 from sklearn.model_selection import KFold
-from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
 from preprocessing.dataset import SongDataset
 from preprocessing.preprocess import get_examples
-from models.residual import ResidualDancer
-DEVICE = "mps"
-SEED = 42
-TARGET_CLASSES = ['ATN',
-        'BBA',
-        'BCH',
-        'BLU',
-        'CHA',
-        'CMB',
-        'CSG',
-        'ECS',
-        'HST',
-        'JIV',
-        'LHP',
-        'QST',
-        'RMB',
-        'SFT',
-        'SLS',
-        'SMB',
-        'SWZ',
-        'TGO',
-        'VWZ',
-        'WCS']
-def get_timestamp() -> str:
-    return datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
-class EarlyStopping:
-    def __init__(self, patience=0):
-        self.patience = patience
-        self.last_measure = np.inf
-        self.consecutive_increase = 0
-    def step(self, val) -> bool:
-        if self.last_measure <= val:
-            self.consecutive_increase +=1
-        else:
-            self.consecutive_increase = 0
-        self.last_measure = val
-        return self.patience < self.consecutive_increase
-def calculate_metrics(pred, target, threshold=0.5, prefix=""):
-    target = target.detach().cpu().numpy()
-    pred = pred.detach().cpu().numpy()
-    pred = np.array(pred > threshold, dtype=float)
-    metrics= {
-            'precision': precision_score(y_true=target, y_pred=pred, average='macro', zero_division=0),
-            'recall': recall_score(y_true=target, y_pred=pred, average='macro', zero_division=0),
-            'f1': f1_score(y_true=target, y_pred=pred, average='macro', zero_division=0),
-            'accuracy': accuracy_score(y_true=target, y_pred=pred),
-            }
-    if prefix != "":
-        metrics = {prefix + k : v for k, v in metrics.items()}
-    return metrics
-def evaluate(model:nn.Module, data_loader:DataLoader, criterion, device="mps") -> pd.Series:
-    val_metrics = []
-    for features, labels in (prog_bar := tqdm(data_loader)):
-        features = features.to(device)
-        labels = labels.to(device)
-        with torch.no_grad():
-            outputs = model(features)
-            loss = criterion(outputs, labels)
-        batch_metrics = calculate_metrics(outputs, labels, prefix="val_")
-        batch_metrics["val_loss"] = loss.item()
-        prog_bar.set_description(f'Validation - Loss: {batch_metrics["val_loss"]:.2f}, Accuracy: {batch_metrics["val_accuracy"]:.2f}')
-        val_metrics.append(batch_metrics)
-    return pd.DataFrame(val_metrics).mean()
-def train(
-    model: nn.Module,
-    data_loader: DataLoader,
-    val_loader=None,
-    epochs=3,
-    lr=1e-3,
-    device="mps"):
-    criterion = nn.BCELoss()
-    optimizer = torch.optim.Adam(model.parameters(),lr=lr)
-    early_stop = EarlyStopping(1)
-    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr,
-                                                    steps_per_epoch=int(len(data_loader)),
-                                                    epochs=epochs,
-                                                    anneal_strategy='linear')
-    metrics = []
-    for epoch in range(1,epochs+1):
-        train_metrics = []
-        prog_bar = tqdm(data_loader)
-        for features, labels in prog_bar:
-            features = features.to(device)
-            labels = labels.to(device)
-            optimizer.zero_grad()
-            outputs = model(features)
-            loss = criterion(outputs, labels)
-            loss.backward()
-            optimizer.step()
-            scheduler.step()
-            batch_metrics = calculate_metrics(outputs, labels)
-            batch_metrics["loss"] = loss.item()
-            train_metrics.append(batch_metrics)
-            prog_bar.set_description(f'Training - Epoch: {epoch}/{epochs}, Loss: {batch_metrics["loss"]:.2f}, Accuracy: {batch_metrics["accuracy"]:.2f}')
-        train_metrics = pd.DataFrame(train_metrics).mean()
-        if val_loader is not None:
-            val_metrics = evaluate(model, val_loader, criterion)
-            if early_stop.step(val_metrics["val_f1"]):
-                break
-            epoch_metrics = pd.concat([train_metrics, val_metrics], axis=0)
-        else:
-            epoch_metrics = train_metrics
-        metrics.append(dict(epoch_metrics))
-    return model, metrics
-def cross_validation(seed=42, batch_size=64, k=5, device="mps"):
     df = pd.read_csv("data/songs.csv")
-    x,y = get_examples(df, "data/samples",class_list=TARGET_CLASSES)
     dataset = SongDataset(x,y)
-    splits=KFold(n_splits=k,shuffle=True,random_state=seed)
-    metrics = []
     for fold, (train_idx,val_idx) in enumerate(splits.split(x,y)):
         print(f"Fold {fold+1}")
         train_sampler = SubsetRandomSampler(train_idx)
         test_sampler = SubsetRandomSampler(val_idx)
         train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
         test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
-        n_classes = len(y[0])
-        model = ResidualDancer(n_classes=n_classes).to(device)
-        model, _ = train(model,train_loader, epochs=2, device=device)
-        val_metrics = evaluate(model, test_loader, nn.BCELoss())
-        metrics.append(val_metrics)
-    metrics = pd.DataFrame(metrics)
-    log_dir = os.path.join(
-    "logs", get_timestamp()
-    )
-    os.makedirs(log_dir, exist_ok=True)
-    metrics.to_csv(model.state_dict(), os.path.join(log_dir, "cross_val.csv"))
-def train_model():
-    df = pd.read_csv("data/songs.csv")
-    x,y = get_examples(df, "data/samples",class_list=TARGET_CLASSES)
-    dataset = SongDataset(x,y)
-    train_count = int(len(dataset) * 0.9)
-    datasets = random_split(dataset, [train_count, len(dataset) - train_count], torch.Generator().manual_seed(SEED))
-    data_loaders = [DataLoader(data, batch_size=64, shuffle=True) for data in datasets]
-    train_data, val_data = data_loaders
-    example_spec, example_label = dataset[0]
-    n_classes = len(example_label)
-    model = ResidualDancer(n_classes=n_classes).to(DEVICE)
-    model, metrics = train(model,train_data, val_data, epochs=3, device=DEVICE)
-    log_dir = os.path.join(
-    "logs", get_timestamp()
-    )
-    os.makedirs(log_dir, exist_ok=True)
-    torch.save(model.state_dict(), os.path.join(log_dir, "residual_dancer.pt"))
-    metrics = pd.DataFrame(metrics)
-    metrics.to_csv(os.path.join(log_dir, "metrics.csv"))
-    config = {
-        "classes": TARGET_CLASSES
-    }
-    with open(os.path.join(log_dir, "config.json")) as f:
-        json.dump(config, f)
-    print("Training information saved!")
 if __name__ == "__main__":
-    cross_validation()

 from torch.utils.data import DataLoader
 import pandas as pd
+from torch import nn
+from torch.utils.data import SubsetRandomSampler
 from sklearn.model_selection import KFold
+import pytorch_lightning as pl
+from pytorch_lightning import callbacks as cb
+from models.utils import LabelWeightedBCELoss
 from preprocessing.dataset import SongDataset
 from preprocessing.preprocess import get_examples
+from models.residual import ResidualDancer, TrainingEnvironment
+import yaml
+from preprocessing.dataset import DanceDataModule
+from wakepy import keepawake
+def get_config(filepath:str) -> dict:
+    with open(filepath, "r") as f:
+        config = yaml.safe_load(f)
+    return config
+def cross_validation(config, k=5):
     df = pd.read_csv("data/songs.csv")
+    g_config = config["global"]
+    batch_size = config["data_module"]["batch_size"]
+    x,y = get_examples(df, "data/samples",class_list=g_config["dance_ids"])
     dataset = SongDataset(x,y)
+    splits=KFold(n_splits=k,shuffle=True,random_state=g_config["seed"])
+    trainer = pl.Trainer(accelerator=g_config["device"])
     for fold, (train_idx,val_idx) in enumerate(splits.split(x,y)):
         print(f"Fold {fold+1}")
+        model = ResidualDancer(n_classes=len(g_config["dance_ids"]))
+        train_env = TrainingEnvironment(model,nn.BCELoss())
         train_sampler = SubsetRandomSampler(train_idx)
         test_sampler = SubsetRandomSampler(val_idx)
         train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
         test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
+        trainer.fit(train_env, train_loader)
+        trainer.test(train_env, test_loader)
+def train_model(config:dict):
+    TARGET_CLASSES = config["global"]["dance_ids"]
+    DEVICE = config["global"]["device"]
+    SEED = config["global"]["seed"]
+    pl.seed_everything(SEED, workers=True)
+    data = DanceDataModule(target_classes=TARGET_CLASSES, **config['data_module'])
+    model = ResidualDancer(n_classes=len(TARGET_CLASSES), **config['model'])
+    label_weights = data.get_label_weights().to(DEVICE)
+    criterion = LabelWeightedBCELoss(label_weights) #nn.CrossEntropyLoss(label_weights)
+    train_env = TrainingEnvironment(model, criterion, config)
+    callbacks = [
+        # cb.LearningRateFinder(update_attr=True),
+        cb.EarlyStopping("val/loss", patience=5),
+        cb.StochasticWeightAveraging(1e-2),
+        cb.RichProgressBar()
+    ]
+    trainer = pl.Trainer(
+        callbacks=callbacks,
+        **config["trainer"]
+        )
+    trainer.fit(train_env, datamodule=data)
+    trainer.test(train_env, datamodule=data)
 if __name__ == "__main__":
+    config = get_config("models/config/train.yaml")
+    with keepawake():
+        train_model(config)