Convert jam track to notes

The jam track is separated into notes ordered by strings, each of those contains multiple pitches. related to: #7
anthonio9 · Feb 5, 2024 · 457055b · 457055b
1 parent ac8e3b3
commit 457055b
Show file tree

Hide file tree

Showing 2 changed files with 65 additions and 1 deletion.
diff --git a/penn/data/preprocess/core.py b/penn/data/preprocess/core.py
@@ -39,6 +39,10 @@
 JAMS_STRING_IDX = 'data_source'
 JAMS_METADATA = 'file_metadata'
 
+JAMS_FREQ = "frequency"
+JAMS_INDEX = "index"
+JAMS_TIMES = "times"
+
 ###############################################################################
 # Preprocess datasets
 ###############################################################################
@@ -597,3 +601,63 @@ def extract_pitch_array_jams(jam: jams.JAMS, track, uniform=True) -> Tuple[np.nd
     pitch_array = np.vstack(pitch_list)
 
     return pitch_array, time_steps_array
+
+
+def jams_to_notes(jam: jams.JObject):
+    """
+    Parameters:
+        jams object 
+            jams object containing all the information about a track
+    Returns:
+        notes dict 
+            dictionary of notes and their timestamps segregated into strings
+            dict {list [list]}
+    """
+    notes = {}
+
+    # Extract all of the pitch annotations
+    pitch_data_slices = jam.annotations[JAMS_PITCH_HZ]
+
+    # Obtain the number of annotations
+    stack_size = len(pitch_data_slices)
+
+    # Loop through the slices of the stack
+    for slc in range(stack_size):
+        # Extract the pitch list pertaining to this slice
+        slice_pitches = pitch_data_slices[slc]
+
+        # Extract the string label for this slice
+        string = slice_pitches.annotation_metadata[JAMS_STRING_IDX]
+
+        try: 
+            last_index = slice_pitches.data[-1].value[JAMS_INDEX] + 1
+            # prepare empty lists for the notes
+            note_list = [[] for i in range(last_index)]
+            notes_times_list = [[] for i in range(last_index)]
+
+        except IndexError:
+            note_list = []
+            notes_times_list = []
+
+        for pitch in slice_pitches:
+            # Extract the pitch
+            freq = np.array([pitch.value['frequency']])
+
+            # Don't keep track of zero or unvoiced frequencies
+            if np.sum(freq) != 0 and pitch.value['voiced']:
+                note_list[pitch.value[JAMS_INDEX]].append(pitch.value[JAMS_FREQ])
+                notes_times_list[pitch.value[JAMS_INDEX]].append(pitch.time)
+
+        notes[int(string)] = {
+                JAMS_FREQ : note_list,
+                JAMS_TIMES : notes_times_list}
+
+    return notes
+
+
+def notes_to_pitch_array():
+    pass
+
+
+def remove_overhangs(notes_dict: dict):
+    pass
diff --git a/penn/plot/raw_data/core.py b/penn/plot/raw_data/core.py
@@ -226,8 +226,8 @@ def from_data(data_dir, file_stem):
     jams_track = jams.load(str(pitch_file))
     pitch_dict = extract_pitch(jams_track)
     midi_dict = extract_midi(jams_track)
-    df = pd.read_csv('https://raw.githubusercontent.com/jonmmease/plotly_ipywidget_notebooks/master/notebooks/data/cars/cars.csv')
 
+    penn.data.preprocess.jams_to_notes(jams_track)
     # pitch_with_plotly(pitch_dict)
     # edit_with_plotly(pitch_dict)
     # pitch_stft_with_plotly(pitch_dict, audio_file)