keenon · rishiagarwal2000 · Sep 29, 2023 · Oct 12, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 *.DS_Store
 data/
-Geometry/
+Geometry/
+outputs/
diff --git a/AddBiomechanicsDataset.py b/AddBiomechanicsDataset.py
@@ -20,7 +20,7 @@ class OutputDataKeys:
 
 
 class AddBiomechanicsDataset(Dataset):
-    folder_path: str
+    data_path: str
     window_size: int
     stride: int
     device: torch.device
@@ -29,8 +29,8 @@ class AddBiomechanicsDataset(Dataset):
     input_dof_indices: List[int]
     windows: List[Tuple[nimble.biomechanics.SubjectOnDisk, int, int, str]]
 
-    def __init__(self, folder_path: str, window_size: int, stride: int, input_dofs: List[str], device: torch.device = torch.device('cpu')):
-        self.folder_path = folder_path
+    def __init__(self, data_path: str, window_size: int, stride: int, input_dofs: List[str], device: torch.device = torch.device('cpu')):
+        self.data_path = data_path
         self.window_size = window_size
         self.stride = stride
         self.input_dofs = input_dofs
@@ -40,33 +40,40 @@ def __init__(self, folder_path: str, window_size: int, stride: int, input_dofs:
 
         # Walk the folder path, and check for any with the ".bin" extension (indicating that they are AddBiomechanics binary data files)
         num_skipped = 0
-        for root, dirs, files in os.walk(folder_path):
-            for file in files:
-                if file.endswith(".bin"):
-                    # Create a subject object for each file. This will load just the header from this file, and keep that around in memory
-                    subject_path = os.path.join(root, file)
-                    subject = nimble.biomechanics.SubjectOnDisk(
-                        subject_path)
-                    # Add the subject to the list of subjects
-                    self.subjects.append(subject)
-                    # Also, count how many random windows we could select from this subject
-                    for trial in range(subject.getNumTrials()):
-                        probably_missing: List[bool] = subject.getProbablyMissingGRF(trial)
-
-                        trial_length = subject.getTrialLength(trial)
-                        for window_start in range(max(trial_length - (window_size * stride) + 1, 0)):
-                            # Check if any of the frames in this window are probably missing GRF data
-                            # If so, skip this window
-                            skip = False
-                            for i in range(window_start, window_start + window_size):
-                                if probably_missing[i]:
-                                    skip = True
-                                    break
-                            if not skip:
-                                self.windows.append(
-                                    (subject, trial, window_start, subject_path))
-                            else:
-                                num_skipped += 1
+        subject_paths = []
+        if os.path.isdir(data_path):
+            for root, dirs, files in os.walk(data_path):
+                for file in files:
+                    if file.endswith(".bin"):
+                        subject_paths.append(os.path.join(root, file))
+        else:
+            assert data_path.endswith(".bin")
+            subject_paths.append(data_path)
+
+        for subject_path in subject_paths:
+            # Create a subject object for each file. This will load just the header from this file, and keep that around in memory
+            subject = nimble.biomechanics.SubjectOnDisk(
+                subject_path)
+            # Add the subject to the list of subjects
+            self.subjects.append(subject)
+            # Also, count how many random windows we could select from this subject
+            for trial in range(subject.getNumTrials()):
+                probably_missing: List[bool] = subject.getProbablyMissingGRF(trial)
+
+                trial_length = subject.getTrialLength(trial)
+                for window_start in range(max(trial_length - (window_size * stride) + 1, 0)):
+                    # Check if any of the frames in this window are probably missing GRF data
+                    # If so, skip this window
+                    skip = False
+                    for i in range(window_start, window_start + window_size):
+                        if probably_missing[i]:
+                            skip = True
+                            break
+                    if not skip:
+                        self.windows.append(
+                            (subject, trial, window_start, subject_path))
+                    else:
+                        num_skipped += 1
 
         print('Num windows: ' + str(len(self.windows)))
         print('Num skipped due to missing GRF: ' + str(num_skipped))

diff --git a/analyse.py b/analyse.py
@@ -0,0 +1,57 @@
+import torch
+from torch.utils.data import DataLoader
+from main import get_model
+from AddBiomechanicsDataset import AddBiomechanicsDataset
+from LossEvaluator import LossEvaluator
+from typing import Dict, Tuple, List
+import glob
+import pickle
+
+import warnings
+warnings.filterwarnings("ignore")
+
+window_size = 50
+stride = 20
+batch_size = 32
+device = 'cpu'
+
+# Input dofs to train on
+input_dofs = ['knee_angle_l', 'knee_angle_r', 'hip_flexion_l', 'hip_flexion_r', 'hip_adduction_l', 'hip_adduction_r']
+
+# load trained model
+model = get_model()
+load_epoch = 0
+load_batch = 88000
+model_path = f"./outputs/models/epoch_{load_epoch}_batch_{load_batch}.pt"
+checkpoint = torch.load(model_path)
+model.load_state_dict(checkpoint["model_state_dict"])
+
+# analyze a given file
+def analyse_file(file_path):
+    analyse_dataset = AddBiomechanicsDataset(file_path, window_size, stride, input_dofs=input_dofs, device=torch.device(device))
+    analyse_dataloader = DataLoader(analyse_dataset, batch_size=batch_size, shuffle=False)
+
+    analysis_evaluator = LossEvaluator(contact_weight=1.0, com_acc_weight=1e-3, contact_forces_weight=1e-3)
+
+    with torch.no_grad():
+        for i, batch in enumerate(analyse_dataloader):
+            if i % 100 == 0:
+                print('  - Dev Batch ' + str(i) + '/' + str(len(analyse_dataloader)))
+            inputs: Dict[str, torch.Tensor]
+            labels: Dict[str, torch.Tensor]
+            inputs, labels = batch
+            outputs = model(inputs)
+            loss = analysis_evaluator(outputs, labels)
+    return analysis_evaluator
+
+def analyse_folder(folder_path):
+    files = glob.glob(f"{folder_path}/**/*.bin", recursive=True)
+    for i, file in enumerate(files):
+        analysis_evaluator = analyse_file(file)
+        pickle.dump((file, analysis_evaluator), open(f"./outputs/analysis/{i}.pkl", "wb"))
+
+if __name__ == "__main__":
+    # file_path = "/Users/rishi/Documents/Academics/stanford/human-body-dynamics/InferBiomechanics/data/processed/standardized/rajagopal_no_arms/data/protected/us-west-2:43f17b51-2473-445e-8701-feae8881071f/data/S02/4af1b16b78e1fb1a36964be976ad5bb530b1c9f9e9302a04b5d96282a6d80876/4af1b16b78e1fb1a36964be976ad5bb530b1c9f9e9302a04b5d96282a6d80876.bin"
+    # analyse_file(file_path)
+    folder_path = "/Users/rishi/Documents/Academics/stanford/human-body-dynamics/InferBiomechanics/data/processed"
+    analyse_folder(folder_path)
diff --git a/main.py b/main.py
@@ -14,81 +14,94 @@
 stride = 20
 # The batch size is the number of windows we want to load at once, for parallel training and inference on a GPU
 batch_size = 32
-# The number of epochs is the number of times we want to iterate over the entire dataset during training
-epochs = 40
-# Learning rate
-learning_rate = 1e-3
-# learning_rate = 1e-1
+
 device = 'cpu'
 
 # Input dofs to train on
 input_dofs = ['knee_angle_l', 'knee_angle_r', 'hip_flexion_l', 'hip_flexion_r', 'hip_adduction_l', 'hip_adduction_r']
 
-# Create an instance of the dataset
-train_dataset = AddBiomechanicsDataset(
-    './data/train', window_size, stride, input_dofs=input_dofs, device=torch.device(device))
-dev_dataset = AddBiomechanicsDataset(
-    './data/dev', window_size, stride, input_dofs=input_dofs, device=torch.device(device))
-
-# Create a DataLoader to load the data in batches
-train_dataloader = DataLoader(
-    train_dataset, batch_size=batch_size, shuffle=True)
-dev_dataloader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=True)
-
-# Define the model
-# hidden_size = 2 * ((len(input_dofs) * window_size * 3) + (window_size * 3))
-hidden_size = 256
-model = FeedForwardBaseline(len(input_dofs), window_size, hidden_size, dropout_prob=0.1, device=device)
-
-
-# Define the optimizer
-optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
-
-for epoch in range(epochs):
-    # Iterate over the entire training dataset
-    loss_evaluator = LossEvaluator(
-        contact_weight=1.0, com_acc_weight=1e-3, contact_forces_weight=1e-3)
-    for i, batch in enumerate(train_dataloader):
-        inputs: Dict[str, torch.Tensor]
-        labels: Dict[str, torch.Tensor]
-        inputs, labels = batch
-
-        # Clear the gradients
-        optimizer.zero_grad()
-
-        # Forward pass
-        outputs = model(inputs)
-
-        # Compute the loss
-        loss = loss_evaluator(outputs, labels)
-
-        if i % 100 == 0:
-            print('  - Batch '+str(i)+'/'+str(len(train_dataloader)))
-        if i % 1000 == 0:
-            loss_evaluator.print_report()
-
-        # Backward pass
-        loss.backward()
-
-        # Update the model's parameters
-        optimizer.step()
-    # Report training loss on this epoch
-    print('Epoch '+str(epoch)+': ')
-    print('Training Set Evaluation: ')
-    loss_evaluator.print_report()
-
-    # At the end of each epoch, evaluate the model on the dev set
-    dev_loss_evaluator = LossEvaluator(
-        contact_weight=1.0, com_acc_weight=1e-3, contact_forces_weight=1e-3)
-    with torch.no_grad():
-        for i, batch in enumerate(dev_dataloader):
-            if i % 100 == 0:
-                print('  - Dev Batch ' + str(i) + '/' + str(len(dev_dataloader)))
+def get_model():
+    # Define the model
+    # hidden_size = 2 * ((len(input_dofs) * window_size * 3) + (window_size * 3))
+    hidden_size = 256
+    model = FeedForwardBaseline(len(input_dofs), window_size, hidden_size, dropout_prob=0.1, device=device)
+
+    return model
+
+if __name__ == "__main__":
+    model = get_model()
+
+    # Create an instance of the dataset
+    train_dataset = AddBiomechanicsDataset(
+        './data/train', window_size, stride, input_dofs=input_dofs, device=torch.device(device))
+    dev_dataset = AddBiomechanicsDataset(
+        './data/dev', window_size, stride, input_dofs=input_dofs, device=torch.device(device))
+
+    # Create a DataLoader to load the data in batches
+    train_dataloader = DataLoader(
+        train_dataset, batch_size=batch_size, shuffle=True)
+    dev_dataloader = DataLoader(dev_dataset, batch_size=batch_size, shuffle=True)
+
+    # The number of epochs is the number of times we want to iterate over the entire dataset during training
+    epochs = 40
+    # Learning rate
+    learning_rate = 1e-3
+    # learning_rate = 1e-1
+
+    # Define the optimizer
+    optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
+
+    for epoch in range(epochs):
+        # Iterate over the entire training dataset
+        loss_evaluator = LossEvaluator(
+            contact_weight=1.0, com_acc_weight=1e-3, contact_forces_weight=1e-3)
+        for i, batch in enumerate(train_dataloader):
             inputs: Dict[str, torch.Tensor]
             labels: Dict[str, torch.Tensor]
             inputs, labels = batch
+
+            # Clear the gradients
+            optimizer.zero_grad()
+
+            # Forward pass
             outputs = model(inputs)
-            loss = dev_loss_evaluator(outputs, labels)
-    # Report dev loss on this epoch
-    print('Dev Set Evaluation: ')
-    dev_loss_evaluator.print_report()
+
+            # Compute the loss
+            loss = loss_evaluator(outputs, labels)
+
+            if i % 100 == 0:
+                print('  - Batch '+str(i)+'/'+str(len(train_dataloader)))
+            if i % 1000 == 0:
+                loss_evaluator.print_report()
+                model_path = f"./outputs/models/epoch_{epoch}_batch_{i}.pt"
+                torch.save({
+                'epoch': epoch,
+                'model_state_dict': model.state_dict(),
+                'optimizer_state_dict': optimizer.state_dict(),
+                }, model_path)
+
+            # Backward pass
+            loss.backward()
+
+            # Update the model's parameters
+            optimizer.step()
+        # Report training loss on this epoch
+        print('Epoch '+str(epoch)+': ')
+        print('Training Set Evaluation: ')
+        loss_evaluator.print_report()
+
+        # At the end of each epoch, evaluate the model on the dev set
+        dev_loss_evaluator = LossEvaluator(
+            contact_weight=1.0, com_acc_weight=1e-3, contact_forces_weight=1e-3)
+        with torch.no_grad():
+            for i, batch in enumerate(dev_dataloader):
+                if i % 100 == 0:
+                    print('  - Dev Batch ' + str(i) + '/' + str(len(dev_dataloader)))
+                inputs: Dict[str, torch.Tensor]
+                labels: Dict[str, torch.Tensor]
+                inputs, labels = batch
+                outputs = model(inputs)
+                loss = dev_loss_evaluator(outputs, labels)
+        # Report dev loss on this epoch
+        print('Dev Set Evaluation: ')
+        dev_loss_evaluator.print_report()
diff --git a/print_files.py b/print_files.py
@@ -0,0 +1,12 @@
+import os
+import pickle
+
+files_with_acc = []
+for i in range(35):
+    file, analysis_evaluator = pickle.load(open(f"./outputs/analysis/{i}.pkl", "rb"))
+    acc = analysis_evaluator.sum_correct_foot_classifications / analysis_evaluator.sum_timesteps if analysis_evaluator.sum_timesteps else -1
+    files_with_acc.append((file, acc))
+
+
+sorted_files = sorted(files_with_acc, key=lambda x: x[1])
+print(sorted_files)