From 302d64dca857b6e5ca49a1f21ba1de1debb3cef8 Mon Sep 17 00:00:00 2001 From: chiyu1203 Date: Sat, 19 Oct 2024 21:51:46 +0200 Subject: [PATCH] Introduced analysis for band, load agents of swarm as multi-level dataframe, and multiple condition files in a session --- .vscode/launch.json | 5 +- analysis_methods_dictionary.json | 55 +-- create_analysis_method.py | 7 +- data_exploration_tutorial.ipynb | 85 ++--- locustvr_converter.py | 578 +++++++++++++++++++------------ 5 files changed, 439 insertions(+), 291 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 6b76b4f..89fa42f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -9,7 +9,10 @@ "type": "debugpy", "request": "launch", "program": "${file}", - "console": "integratedTerminal" + "console": "integratedTerminal", + "env": { + "PYDEVD_WARN_SLOW_RESOLVE_TIMEOUT": "3" + } } ] } \ No newline at end of file diff --git a/analysis_methods_dictionary.json b/analysis_methods_dictionary.json index a33161f..4860bea 100644 --- a/analysis_methods_dictionary.json +++ b/analysis_methods_dictionary.json @@ -1,29 +1,30 @@ { - "experiment_name": "swarm", - "overwrite_curated_dataset": true, - "save_output": false, - "time_series_analysis": true, - "filtering_method": "sg_filter", - "plotting_trajectory": true, - "load_individual_data": true, - "select_animals_by_condition": true, - "graph_colour_code": [ - "r", - "b", - "g", - "k", - "c", - "y", - "m", - "r" - ], - "camera_fps": 100, - "trackball_radius_cm": 0.5, - "monitor_fps": 60, - "body_length": 4, - "growth_condition": "G", - "analysis_window": [ - -10, - 10 - ] + "experiment_name": "band", + "overwrite_curated_dataset": true, + "save_output": true, + "agents_shared_across_vrs": false, + "time_series_analysis": false, + "filtering_method": "sg_filter", + "plotting_trajectory": true, + "load_individual_data": true, + "select_animals_by_condition": true, + "graph_colour_code": [ + "r", + "b", + "g", + "k", + "c", + "y", + "m", + "r" + ], + "camera_fps": 100, + "trackball_radius_cm": 0.5, + "monitor_fps": 60, + "body_length": 4, + "growth_condition": "G", + "analysis_window": [ + -10, + 10 + ] } \ No newline at end of file diff --git a/create_analysis_method.py b/create_analysis_method.py index 3e7c34c..8b4ef59 100644 --- a/create_analysis_method.py +++ b/create_analysis_method.py @@ -2,10 +2,11 @@ file_name = "analysis_methods_dictionary.json" analysis_methods = { - "experiment_name": "swarm", + "experiment_name": "band", "overwrite_curated_dataset": True, - "save_output": False, - "time_series_analysis": True, + "save_output": True, + "agents_shared_across_vrs": False, + "time_series_analysis": False, "filtering_method": "sg_filter", "plotting_trajectory": True, "load_individual_data": True, diff --git a/data_exploration_tutorial.ipynb b/data_exploration_tutorial.ipynb index 8eeab17..91643ac 100644 --- a/data_exploration_tutorial.ipynb +++ b/data_exploration_tutorial.ipynb @@ -67,9 +67,10 @@ " \n", "\n", "#Put the folder of your Unity experiment below\n", - "thisDataset =\"D:/MatrexVR_Swarm_Data/RunData\"\n", + "#thisDataset =\"D:/MatrexVR_Swarm_Data/RunData\"\n", "#thisDataset =\"D:/MatrexVR_blackbackground_Data/RunData\"\n", "#thisDataset =\"D:/MatrexVR_grass1_Data/RunData\"\n", + "thisDataset =\"D:/MatrexVR_navigation_Data/RunData\"\n", "#parameter name means independent variable in the experiment\n", "#parameter_name='kappa' \n", "parameter_name='mu'\n", @@ -105,13 +106,14 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "##This cell is used to move data of the thermo-humidity logger to animals'folder\n", "import shutil\n", - "tmp_file_name='matrexVR240824-240901.txt'\n", + "#tmp_file_name='matrexVR240824-240901.txt'\n", + "tmp_file_name='DL220THP_Thermo2_241012_241014.csv'\n", "tmp_source=os.path.join('Z:/Users/chiyu',tmp_file_name)\n", "for this_dir in dir_list:\n", " tmp_new_dir = os.path.join(this_dir,tmp_file_name)\n", @@ -201,15 +203,6 @@ " dir_list.append(folder_path.replace(\"\\\\\", \"/\"))" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dir_list" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -274,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -295,6 +288,9 @@ " print(\"there is a bug\")\n", " return df_all,dfxy_all\n", " for this_dir,this_vr in dir_iterator:\n", + " if Path(this_dir).is_dir()==False:\n", + " print(f'no such a dir exist {this_dir}')\n", + " continue\n", " summary_pattern = f\"VR{this_vr}*score.h5\"\n", " xy_pattern = f\"VR{this_vr}*XY.h5\"\n", " found_result = find_file(Path(this_dir), summary_pattern) \n", @@ -315,6 +311,8 @@ " color_code={0: 0.1,45: 0.4,315: 0.7}\n", " elif scene_name.lower()=='swarm':\n", " color_code={0: 0.1, 45: 0.2, 90: 0.3,135:0.4,180: 0.5, 225: 0.6, 270: 0.7,315:0.8}\n", + " elif scene_name.lower()=='band':\n", + " color_code={0: 0.1, 45: 0.2, 90: 0.3, 270: 0.7,315:0.8}\n", " else:\n", " return Warning('scene name not found')\n", " elif test_parameter == 'agent_speed':\n", @@ -355,23 +353,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "#introduce customised functions\n", "def plot_sercansincos(df,analysis_methods,parameters,parameter_name,vr_num='all'):\n", - " dont_save_output= analysis_methods.get(\"dont_save_output\")\n", + " save_output= analysis_methods.get(\"save_output\")\n", " scene_name=analysis_methods.get(\"experiment_name\")\n", " cos = df[\"cos\"]\n", " sin = df[\"sin\"]\n", " if 'density' in df.columns:\n", " density=df[\"density\"].unique()[0]\n", - " cos_fig_name=f\"{vr_num}_cos_{parameter_name}_{parameters}_density_{int(density)}.svg\"\n", - " sin_fig_name=f\"{vr_num}_sin_{parameter_name}_{parameters}_density_{int(density)}.svg\" \n", + " cos_fig_name=f\"{vr_num}_cos_{scene_name}_{parameter_name}_{parameters}_density_{int(density)}.svg\"\n", + " sin_fig_name=f\"{vr_num}_sin_{scene_name}_{parameter_name}_{parameters}_density_{int(density)}.svg\" \n", " elif scene_name=='choice':\n", - " cos_fig_name=f\"{vr_num}_cos_{parameter_name}_{parameters}_single_target_{df['object_type'].values[0]}.svg\"\n", - " sin_fig_name=f\"{vr_num}_sin_{parameter_name}_{parameters}_single_target_{df['object_type'].values[0]}.svg\"\n", + " cos_fig_name=f\"{vr_num}_cos_{scene_name}_{parameter_name}_{parameters}_single_target_{df['object_type'].values[0]}.svg\"\n", + " sin_fig_name=f\"{vr_num}_sin_{scene_name}_{parameter_name}_{parameters}_single_target_{df['object_type'].values[0]}.svg\"\n", "\n", " fig, ax = plt.subplots(dpi=300, figsize=(1.1,0.25))\n", " #plt.rcParams.update(plt.rcParamsDefault)\n", @@ -392,7 +390,7 @@ " ax.set_yticks([])\n", " plt.ylabel(\"\")\n", " plt.xlabel(\"\") \n", - " if dont_save_output==False:\n", + " if save_output==True:\n", " plt.savefig(cos_fig_name)\n", " fig, ax = plt.subplots(dpi=300, figsize=(1.1,0.25))\n", " plt.subplots_adjust(bottom=0.4)\n", @@ -406,11 +404,11 @@ " plt.ylabel(\"\")\n", " plt.xlabel(\"\")\n", " plt.title(\"r sin\\u03F4\")\n", - " if dont_save_output==False:\n", + " if save_output==True:\n", " plt.savefig(sin_fig_name)\n", " plt.show()\n", "def plot_sercantrajec(dfXY,analysis_methods,parameters,parameter_name,trajec_lim=1000,vr_num='all'):\n", - " dont_save_output= analysis_methods.get(\"dont_save_output\")\n", + " save_output= analysis_methods.get(\"save_output\")\n", " scene_name=analysis_methods.get(\"experiment_name\")\n", " # dfXY[parameter_name].unique()\n", " \n", @@ -418,9 +416,9 @@ " if 'density' in dfXY.columns:\n", " density=dfXY[\"density\"].unique()[0]\n", " print(density)\n", - " fig_name=f\"{vr_num}_summary_trajectory_{parameter_name}_{parameters}_density_{int(density)}.png\"\n", + " fig_name=f\"{vr_num}_summary_trajectory_{scene_name}_{parameter_name}_{parameters}_density_{int(density)}.png\"\n", " else:\n", - " fig_name=f\"{vr_num}_summary_trajectory_{parameter_name}_{parameters}_single_target_{dfXY['object_type'].values[0]}.png\"\n", + " fig_name=f\"{vr_num}_summary_trajectory_{scene_name}_{parameter_name}_{parameters}_single_target_{dfXY['object_type'].values[0]}.png\"\n", "\n", " fig, ax = plt.subplots(figsize=(3,3), dpi=300) \n", " plt.rcParams.update(plt.rcParamsDefault)\n", @@ -440,15 +438,15 @@ " # Here plot agent's trajectory with hardcode parameters\n", " if scene_name=='choice' and dfXY['object_type'].values[0] !='empty_trial':\n", " agent_speed=2\n", - " initial_distance=8\n", + " radial_distance=8\n", " duration=60\n", " travel_direction=parameters*-np.pi/180#the radian circle is clockwise in Unity, so 45 degree should be used as -45 degree in the regular radian circle\n", - " initial_distance_b=np.cos(travel_direction)*initial_distance\n", + " radial_distance_b=np.cos(travel_direction)*radial_distance\n", " delta_cos=np.cumsum(np.repeat(np.cos(travel_direction)*agent_speed, duration))\n", - " agent_cos=initial_distance_b+delta_cos\n", - " initial_distance_b=np.sin(travel_direction)*initial_distance\n", + " agent_cos=radial_distance_b+delta_cos\n", + " radial_distance_b=np.sin(travel_direction)*radial_distance\n", " delta_sin=np.cumsum(np.repeat(np.sin(travel_direction)*agent_speed, duration))\n", - " agent_sin=initial_distance_b+delta_sin\n", + " agent_sin=radial_distance_b+delta_sin\n", " plt.plot(agent_cos, agent_sin, color='k', linewidth=1)\n", "\n", " plt.xlim(-1*trajec_lim, trajec_lim)\n", @@ -457,7 +455,7 @@ " plt.xticks([-1*trajec_lim, 0, trajec_lim])\n", " # Set the aspect ratio to be equal\n", " plt.gca().set_aspect('equal') \n", - " if dont_save_output==False: \n", + " if save_output==True: \n", " plt.savefig(fig_name)\n", " plt.show()" ] @@ -471,7 +469,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -550,7 +548,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -562,6 +560,15 @@ "good_tracking=df_con['loss'] < 0.05" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_con" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -578,17 +585,17 @@ "#plotting trajectory\n", "#differentiate between stim and ISI based on columns in dfxy_con\n", "if analysis_methods.get(\"experiment_name\")==\"choice\":\n", - " stim_or_isi=dfxy_con['initial_distance']\n", - "elif analysis_methods.get(\"experiment_name\")==\"swarm\":\n", + " stim_or_isi=dfxy_con['radial_distance']\n", + "elif analysis_methods.get(\"experiment_name\")==\"swarm\" or analysis_methods.get(\"experiment_name\")==\"band\":\n", " stim_or_isi=dfxy_con['density']\n", "df_stim=dfxy_con.loc[(dfxy_con['VR'].isin(df_con[\"VR\"][good_tracking])) & (stim_or_isi>0)]\n", "for key, grp in df_stim.groupby(parameter_name):\n", " print(f\"{parameter_name}:{key}\")\n", - " plot_sercantrajec(grp,analysis_methods,key,parameter_name,2000)\n", + " plot_sercantrajec(grp,analysis_methods,key,parameter_name,500)\n", "df_isi=dfxy_con.loc[(dfxy_con['VR'].isin(df_con[\"VR\"][good_tracking])) & (stim_or_isi==0)]\n", "for key, grp in df_isi.groupby(parameter_name):\n", " print(f\"{parameter_name}:{key}\")\n", - " plot_sercantrajec(grp,analysis_methods,key,parameter_name,2000)\n", + " plot_sercantrajec(grp,analysis_methods,key,parameter_name,500)\n", "#xy_lim at around 2000 is good for trial lasts around 4 or 5 min\n", "#xy_lim at around 500 is good for trial lasts around 1 min" ] @@ -609,8 +616,8 @@ "#Visualise the distribution of mean angle using seaborn kernel density estimation plot\n", "#differentiate between stim and ISI based on columns in df_con\n", "if analysis_methods.get(\"experiment_name\")==\"choice\":\n", - " stim_or_isi=df_con['initial_distance']\n", - "elif analysis_methods.get(\"experiment_name\")==\"swarm\":\n", + " stim_or_isi=df_con['radial_distance']\n", + "elif analysis_methods.get(\"experiment_name\")==\"swarm\" or analysis_methods.get(\"experiment_name\")==\"band\":\n", " stim_or_isi=df_con['density']\n", "df_stim=df_con[stim_or_isi>0]\n", "for key, grp in df_stim.groupby(parameter_name):\n", @@ -650,7 +657,7 @@ " found_result = find_file(Path(this_dir), locust_pattern) \n", " df = pd.read_hdf(found_result)\n", " if analysis_methods.get(\"experiment_name\")==\"choice\":\n", - " stim_or_isi=df['initial_distance']\n", + " stim_or_isi=df['radial_distance']\n", " elif analysis_methods.get(\"experiment_name\")==\"swarm\":\n", " stim_or_isi=df['density']\n", " df_stim=df[stim_or_isi>0]\n", diff --git a/locustvr_converter.py b/locustvr_converter.py index dcd2e04..c39feb9 100644 --- a/locustvr_converter.py +++ b/locustvr_converter.py @@ -11,6 +11,8 @@ import matplotlib.pyplot as plt import matplotlib as mpl from scipy.signal import savgol_filter +from deepdiff import DeepDiff +from pprint import pprint current_working_directory = Path.cwd() parent_dir = current_working_directory.resolve().parents[0] @@ -52,6 +54,47 @@ def ffill(arr): return out +def reshape_multiagent_data(ts, x=None, y=None, phase=None): + + number_of_duplicates = ts["Timestamp"].drop_duplicates().shape[0] + number_of_instances = int(ts.shape[0] / number_of_duplicates) + agent_id = np.tile(np.arange(number_of_instances), number_of_duplicates) + c_name_list = ["agent" + str(num) for num in agent_id] + if isinstance(y, pd.Series) == False and isinstance(phase, pd.Series) == False: + + test = pd.concat([ts, x, pd.DataFrame(c_name_list)], axis=1) + new_df = test.pivot(index="Timestamp", columns=0, values="X") + elif isinstance(phase, pd.Series) == False: + test = pd.concat([ts, x, y, pd.DataFrame(c_name_list)], axis=1) + new_df = test.pivot(index="Timestamp", columns=0, values=["X", "Z"]) + else: + test = pd.concat([ts, x, y, phase, pd.DataFrame(c_name_list)], axis=1) + new_df = test.pivot( + index="Timestamp", columns=0, values=["X", "Z", "VisibilityPhase"] + ) + # new_df.loc[:, (slice(None), ["agent0"])] to access columns with multi-index + return new_df + + +def reshape_multiagent_data2(df, this_object): + number_of_duplicates = df["Timestamp"].drop_duplicates().shape[0] + number_of_instances = int(df.shape[0] / number_of_duplicates) + agent_id = np.tile( + np.arange(number_of_instances) + number_of_instances * this_object, + number_of_duplicates, + ) + c_name_list = ["agent" + str(num) for num in agent_id] + test = pd.concat([df, pd.DataFrame(c_name_list)], axis=1) + if "VisibilityPhase" in df.columns: + df_values = ["X", "Z", "VisibilityPhase"] + + else: + df_values = ["X", "Z"] + new_df = test.pivot(index="Timestamp", columns=0, values=df_values) + # new_df.loc[:, (slice(None), ["agent0"])] to access columns with multi-index + return new_df + + # Simple solution for bfill provided by financial_physician in comment below def bfill(arr): if arr.ndim == 1: @@ -60,45 +103,116 @@ def bfill(arr): return ffill(arr[:, ::-1])[:, ::-1] -def read_simulated_data(this_file, analysis_methods): - scene_name = analysis_methods.get("experiment_name") +def prepare_data(df, this_range): + ts = df.loc[this_range, "Current Time"] + if ts.empty: + return None + # x, y = df.loc[this_range, ["GameObjectPosX", "GameObjectPosZ"]].T + x = df["GameObjectPosX"][this_range] + y = df["GameObjectPosZ"][this_range] + + xy = np.vstack((x.to_numpy(), y.to_numpy())) + xy = bfill(xy) # Fill missing values for smoother analysis + trial_no = df.loc[this_range, "CurrentTrial"] + return ts, xy, trial_no + + +def load_file(file): + if file.suffix == ".gz": + with gzip.open(file, "rb") as f: + return pd.read_csv(f) + elif file.suffix == ".csv": + return pd.read_csv(file) + +def read_agent_data(this_file, analysis_methods, these_parameters=None): + scene_name = analysis_methods.get("experiment_name") print("read simulated data") + thisDir = this_file.parent if type(this_file) == str: this_file = Path(this_file) - thisDir = this_file.parent - if this_file.suffix == ".gz": - with gzip.open(this_file, "rb") as f: - df = pd.read_csv(f) - elif this_file.suffix == ".csv": - with open(this_file, mode="r") as f: - df = pd.read_csv(f) + + df = load_file(this_file) + print(df.columns) if scene_name.lower() == "swarm": n_locusts = df.columns[6] boundary_size = df.columns[7] - mu = df.columns[8] - kappa = df.columns[9] - agent_speed = df.columns[10] density = int(n_locusts.split(":")[1]) / ( int(boundary_size.split(":")[1]) ** 2 / 10000 - ) # change the unit to m2 - conditions = { - "Density": density, - mu.split(":")[0]: int(mu.split(":")[1]), - kappa.split(":")[0]: float(kappa.split(":")[1]), - agent_speed.split(":")[0]: float(agent_speed.split(":")[1]), - } + ) + if these_parameters == None: + mu = df.columns[8] + kappa = df.columns[9] + agent_speed = df.columns[10] + conditions = { + "density": density, + mu.split(":")[0].lower(): int(mu.split(":")[1]), + kappa.split(":")[0].lower(): float(kappa.split(":")[1]), + "speed": float(agent_speed.split(":")[1]), + } + else: + mu = these_parameters["mu"] + kappa = these_parameters["kappa"] + agent_speed = these_parameters["locustSpeed"] + conditions = { + "density": density, + "mu": mu, + "kappa": kappa, + "speed": agent_speed, + } + # change the unit to m2 + + if len(df) > 0: + result = pd.concat( + [ + pd.to_datetime(df["Timestamp"], format="%Y-%m-%d %H:%M:%S.%f"), + df["X"], + df["Z"], + ], + axis=1, + ) + else: + result = [ + pd.to_datetime(this_file.stem[0:19], format="%Y-%m-%d_%H-%M-%S"), + None, + None, + ] + + elif scene_name.lower() == "band": + conditions = {} + for this_item in range(len(list(these_parameters))): + if list(these_parameters)[this_item] == "position": + conditions["radial_distance"] = these_parameters["position"]["radius"] + conditions["polar_angle"] = these_parameters["position"]["angle"] + else: + conditions[list(these_parameters)[this_item]] = list( + these_parameters.values() + )[this_item] + conditions["density"] = conditions["numberOfInstances"] / ( + conditions["boundaryLengthX"] * conditions["boundaryLengthZ"] / 10000 + ) if len(df) > 0: - ts = pd.to_datetime(df["Timestamp"], format="%Y-%m-%d %H:%M:%S.%f") - x = df["X"] - y = df["Z"] + result = pd.concat( + [ + pd.to_datetime(df["Timestamp"], format="%Y-%m-%d %H:%M:%S.%f"), + df["X"], + df["Z"], + df["VisibilityPhase"], + ], + axis=1, + ) else: - ts = pd.to_datetime(this_file.stem[0:19], format="%Y-%m-%d_%H-%M-%S") - x = None - y = None + result = [ + pd.to_datetime(this_file.stem[0:19], format="%Y-%m-%d_%H-%M-%S"), + None, + None, + None, + ] + elif scene_name.lower() == "choice": conditions = [] + result = [] agent_pattern = "*_Choice_*.json" found_result = find_file(thisDir, agent_pattern) if found_result is None: @@ -116,11 +230,11 @@ def read_simulated_data(this_file, analysis_methods): condition_id = this_file.stem.split("_")[4] tmp = json.loads(f.read()) condition = { - "agent": tmp["objects"][0]["type"], - "distance": tmp["objects"][0]["position"]["radius"], - "heading_angle": tmp["objects"][0]["position"]["angle"], - "walking_direction": tmp["objects"][0]["mu"], - "agent_speed": tmp["objects"][0]["speed"], + "type": tmp["objects"][0]["type"], + "radial_distance": tmp["objects"][0]["position"]["radius"], + "polar_angle": tmp["objects"][0]["position"]["angle"], + "mu": tmp["objects"][0]["mu"], + "speed": tmp["objects"][0]["speed"], } condition_dict[condition_id] = condition # conditions.append(condition) @@ -131,14 +245,12 @@ def read_simulated_data(this_file, analysis_methods): condition_id = found_result.stem.split("_")[4] tmp = json.loads(f.read()) condition = { - "agent": tmp["objects"][0]["type"], - "distance": tmp["objects"][0]["position"]["radius"], - "heading_angle": tmp["objects"][0]["position"]["angle"], - "walking_direction": tmp["objects"][0]["mu"], - "agent_speed": tmp["objects"][0]["speed"], + "type": tmp["objects"][0]["type"], + "radial_distance": tmp["objects"][0]["position"]["radius"], + "polar_angle": tmp["objects"][0]["position"]["angle"], + "mu": tmp["objects"][0]["mu"], + "speed": tmp["objects"][0]["speed"], } - condition_dict[condition_id] = condition - json_pattern = "*sequenceConfig.json" found_result = find_file(thisDir, json_pattern) with open(found_result, "r") as f: @@ -158,56 +270,32 @@ def read_simulated_data(this_file, analysis_methods): # ): ## need to add this condition because I hardcode to make the first empty scene 240 sec # meta_condition = (this_condition, 240) # else: + # meta_condition = (this_condition, tmp["sequences"][i]["duration"]) Can not use dictionary to add duration because dictionary is mutable meta_condition = (this_condition, tmp["sequences"][i]["duration"]) conditions.append(meta_condition) if len(df) > 0: - ts = [] - x = [] - y = [] + v_phase = None for _, entries in df.groupby(["CurrentTrial", "CurrentStep"]): - ts.append( - pd.to_datetime( - entries["Current Time"], format="%Y-%m-%d %H:%M:%S.%f" - ) + + ct = pd.to_datetime( + entries["Current Time"], format="%Y-%m-%d %H:%M:%S.%f" ) - x.append(entries["GameObjectPosX"]) - y.append(entries["GameObjectPosZ"]) + result.append( + pd.concat( + [ct, entries["GameObjectPosX"], entries["GameObjectPosZ"]], + axis=1, + ) + ) # need to add visibility phase here in the future else: - ts = None - x = None - y = None - return ts, x, y, conditions - - -def prepare_data(df, this_range): - ts = df.loc[this_range, "Current Time"] - if ts.empty: - return None - # x, y = df.loc[this_range, ["GameObjectPosX", "GameObjectPosZ"]].T - x = df["GameObjectPosX"][this_range] - y = df["GameObjectPosZ"][this_range] - - xy = np.vstack((x.to_numpy(), y.to_numpy())) - xy = bfill(xy) # Fill missing values for smoother analysis - trial_no = df.loc[this_range, "CurrentTrial"] - return ts, xy, trial_no - - -def load_file(file): - if file.suffix == ".gz": - with gzip.open(file, "rb") as f: - return pd.read_csv(f) - elif file.suffix == ".csv": - return pd.read_csv(file) + result = [None, None, None, None] + return result, conditions def analyse_focal_animal( this_file, analysis_methods, - ts_simulated_animal, - x_simulated_animal, - y_simulated_animal, + df_simulated_animal, conditions, tem_df=None, ): @@ -219,14 +307,15 @@ def analyse_focal_animal( overwrite_curated_dataset = analysis_methods.get("overwrite_curated_dataset", False) time_series_analysis = analysis_methods.get("time_series_analysis", False) scene_name = analysis_methods.get("experiment_name") - alpha_dictionary = {0.1: 0.2, 1.0: 0.4, 10.0: 0.6, 100000.0: 1} analyze_one_session_only = True BODY_LENGTH3 = ( analysis_methods.get("body_length", 4) * 3 ) ## multiple 3 because 3 body length is used for spatial discretisation in Sayin et al. growth_condition = analysis_methods.get("growth_condition") - this_file = Path(this_file) if isinstance(this_file, str) else this_file + if type(this_file) == str: + this_file = Path(this_file) + df = load_file(this_file) # replace 0.0 with np.nan since they are generated during scene-switching ##if upgrading to pandas 3.0 in the future, try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead @@ -241,18 +330,18 @@ def analyse_focal_animal( df["Current Time"] = pd.to_datetime( df["Current Time"], format="%Y-%m-%d %H:%M:%S.%f" ) - experiment_id = re.sub( - r"\s+|:", - "_", - f'{df["VR"][0]} {df["Current Time"][0].strftime("%Y-%m-%d %H:%M:%S")}', - ) - # experiment_id1 = df["VR"][0] + " " + str(df["Current Time"][0]).split(".")[0] - # experiment_id1 = re.sub(r"\s+", "_", experiment_id) - # experiment_id1 = re.sub(r":", "", experiment_id) - file_suffix = "full" if time_series_analysis else "" - curated_file_path = this_file.parent / f"{experiment_id}_XY_{file_suffix}.h5" - summary_file_path = this_file.parent / f"{experiment_id}_score_{file_suffix}.h5" - agent_file_path = this_file.parent / f"{experiment_id}_agent_{file_suffix}.h5" + # experiment_id = re.sub( + # r"\s+|:", + # "_", + # f'{df["VR"][0]} {df["Current Time"][0].strftime("%Y-%m-%d %H:%M:%S")}', + # ) different format + experiment_id = df["VR"][0] + " " + str(df["Current Time"][0]).split(".")[0] + experiment_id = re.sub(r"\s+", "_", experiment_id) + experiment_id = re.sub(r":", "", experiment_id) + file_suffix = "_full" if time_series_analysis else "" + curated_file_path = this_file.parent / f"{experiment_id}_XY{file_suffix}.h5" + summary_file_path = this_file.parent / f"{experiment_id}_score{file_suffix}.h5" + agent_file_path = this_file.parent / f"{experiment_id}_agent{file_suffix}.h5" # need to think about whether to name them the same regardless analysis methods if tem_df is None: @@ -264,8 +353,9 @@ def analyse_focal_animal( f"{frequency_milisecond}L" ).interpolate() # FutureWarning: 'L' is deprecated and will be removed in a future version, please use 'ms' instead. df.set_index("Current Time", drop=False, inplace=True) - aligned_THP = tem_df.reindex(df.index, method="nearest") - df = df.join(aligned_THP.astype(np.float32)) + df = df.join(tem_df.reindex(df.index, method="nearest").astype(np.float32)) + # aligned_THP = tem_df.reindex(df.index, method="nearest") + # df = df.join(aligned_THP.astype(np.float32)) # df = df.join(aligned_THP) del tem_df # if tem_df is not None: @@ -302,18 +392,16 @@ def analyse_focal_animal( ts_across_trials, ) = ([], [], [], []) - # for id in range(len(conditions)): for id, condition in enumerate(conditions): this_range = (df["CurrentStep"] == id) & (df["CurrentTrial"] == 0) # ts = df["Current Time"][this_range].astype("datetime64[ms]") # ts = df["Current Time"][this_range] - data = prepare_data(df, this_range) - if data is None: + ts, xy, trial_no = prepare_data(df, this_range) + if len(ts) == 0: break - ts, xy, trial_no = data - fchop = ts.iloc[0].strftime("%Y-%m-%d_%H%M%S") - if len(trial_no.value_counts()) > 1 & analyze_one_session_only == True: + elif len(trial_no.value_counts()) > 1 & analyze_one_session_only == True: break + fchop = ts.iloc[0].strftime("%Y-%m-%d_%H%M%S") # heading_direction = df["GameObjectRotY"][this_range] # x = df["GameObjectPosX"][this_range].astype(np.float32) # y = df["GameObjectPosZ"][this_range].astype(np.float32) @@ -325,22 +413,20 @@ def analyse_focal_animal( # # trial_no = df["CurrentTrial"][this_range].astype("int16") # trial_no = df["CurrentTrial"][this_range] if scene_name == "choice" and id % 2 > 0: - df_simulated = pd.concat( - [ - ts_simulated_animal[id // 2], - x_simulated_animal[id // 2], - y_simulated_animal[id // 2], - ], - axis=1, - ) + df_simulated = df_simulated_animal[id // 2] df_simulated.set_index("Current Time", inplace=True) df_simulated = df_simulated.reindex(ts.index, method="nearest") - - elapsed_time = ( - (ts - ts.min()).dt.total_seconds().values if time_series_analysis else None - ) + elif ( + scene_name != "choice" + and isinstance(df_simulated_animal[id], pd.DataFrame) == True + ): + these_simulated_agents = df_simulated_animal[id] + these_simulated_agents = these_simulated_agents.reindex( + ts.index, method="nearest" + ) ## Has an error ValueError: cannot reindex on an axis with duplicate labels if time_series_analysis: + elapsed_time = (ts - ts.min()).dt.total_seconds().values if analysis_methods.get("filtering_method") == "sg_filter": X = savgol_filter(xy[0], 59, 3, axis=0) Y = savgol_filter(xy[1], 59, 3, axis=0) @@ -350,6 +436,7 @@ def analyse_focal_animal( loss = np.nan else: ##need to think about whether applying removeNoiseVR only to spatial discretisation or general + elapsed_time = None loss, X, Y = removeNoiseVR(xy[0], xy[1]) loss = 1 - loss if len(X) == 0: @@ -396,47 +483,69 @@ def analyse_focal_animal( std = np.sqrt(2 * (1 - meanVector)) tdist = ( - np.sum(np.sqrt(np.add(np.diff(X) ** 2, np.diff(Y) ** 2))) + np.sum(np.sqrt(np.add(np.square(np.diff(X)), np.square(np.diff(Y))))) if time_series_analysis - else len(X) * BODY_LENGTH3 - ) ##note this distance can be a lot larger than calculating with spatial discretisation + else len(dX) * BODY_LENGTH3 + ) ##The distance calculated based on spatial discretisation should be the shortest f = [fchop] * len(dX) + different_key = None + if isinstance(condition, list): + diff_con = DeepDiff(condition[0], condition[1]) + pprint(diff_con) + different_key = diff_con.affected_root_keys[0] + condition = condition[0] + elif isinstance( + condition, tuple + ): # designed for the choice assay, which use tuple to carry duration + duration = condition[1] # drop the duration from the + condition = condition[0] + else: + pass - if scene_name.lower() == "swarm": - o = [condition["Kappa"]] * len(dX) - d = [condition["Density"]] * len(dX) - mu = [condition["Mu"]] * len(dX) - spe = [condition["LocustSpeed"]] * len(dX) + spe = [condition["speed"]] * len(dX) + mu = [condition["mu"]] * len(dX) + if scene_name.lower() == "swarm" or scene_name.lower() == "band": + order = [condition["kappa"]] * len(dX) + density = [condition["density"]] * len(dX) du = [condition["duration"]] * len(dX) - elif scene_name.lower() == "choice": - if conditions[id][0]["agent"] == "LeaderLocust": - o = ["gn_locust"] * len(dX) - elif conditions[id][0]["agent"] == "": - o = ["empty_trial"] * len(dX) - d = [conditions[id][0]["distance"]] * len(dX) - du = [conditions[id][1]] * len(dX) - f_angle = [conditions[id][0]["heading_angle"]] * len(dX) - mu = [conditions[id][0]["walking_direction"]] * len(dX) - spe = [conditions[id][0]["agent_speed"]] * len(dX) + if scene_name.lower() == "choice" or scene_name.lower() == "band": + polar_angle = [condition["polar_angle"]] * len(dX) + radial_distance = [condition["radial_distance"]] * len(dX) + + if scene_name.lower() == "choice": + if condition["type"] == "LeaderLocust": + object_type = ["gn_moving_locust"] * len(dX) + elif condition["type"] == "": + object_type = ["empty_trial"] * len(dX) + du = [duration] * len(dX) + if scene_name.lower() == "band": + voff = [condition["visibleOffDuration"]] * len(dX) + von = [condition["visibleOnDuration"]] * len(dX) + f_angle = [condition["rotationAngle"]] * len(dX) + object_type = [condition["type"]] * len(dX) groups = [growth_condition] * len(dX) df_curated = pd.DataFrame( {"X": dX, "Y": dY, "fname": f, "mu": mu, "agent_speed": spe, "duration": du} ) - if "elapsed_time" in locals(): + if elapsed_time != None: df_curated["ts"] = list(elapsed_time) if "temperature" in locals(): df_curated["temperature"] = list(temperature) df_curated["humidity"] = list(humidity) - if scene_name.lower() == "swarm": - df_curated["density"] = d - df_curated["kappa"] = o - elif scene_name.lower() == "choice": - df_curated["object_type"] = o - df_curated["initial_distance"] = d - df_curated["heading_angle"] = f_angle - f_angle = [f_angle[0]] + if scene_name.lower() == "swarm" or scene_name.lower() == "band": + df_curated["density"] = density + df_curated["kappa"] = order + if scene_name.lower() == "choice" or scene_name.lower() == "band": + df_curated["type"] = object_type + df_curated["radial_distance"] = radial_distance + df_curated["polar_angle"] = polar_angle + # Probably no need to save the following into curated database but just in case + # if scene_name.lower() == "band": + # df_curated["visibleOffDuration"] = voff + # df_curated["visibleOnDuration"] = von + # df_curated["rotationAngle"] = f_angle ##load information about simulated locusts if scene_name.lower() == "choice": @@ -459,8 +568,12 @@ def analyse_focal_animal( agent_dX = agent_rXY[0][newindex] agent_dY = agent_rXY[1][newindex] - elif scene_name.lower() == "swarm": + elif ( + isinstance(df_simulated_animal[id], pd.DataFrame) == True + and "these_simulated_agents" in locals() + ): Warning("work in progress") + pass if "agent_dX" in locals(): df_agent = pd.DataFrame( { @@ -473,20 +586,22 @@ def analyse_focal_animal( ) if scene_name.lower() == "swarm": print( - "there is a unsovled bug about how to name the number of agent" + "there is a unfixed bug about how to name the number of agent" ) - df_agent["agent_no"] = d + df_agent["agent_num"] = density elif scene_name.lower() == "choice": df_agent["agent_no"] = [0] * len( agent_dX ) # need to figure out a way to solve multiple agents situation. The same method should be applied in the Swarm scene + else: + pass df_summary = pd.DataFrame( { "fname": [f[0]], "loss": [loss], "mu": [mu[0]], - "agent_speed": [spe[0]], + "speed": [spe[0]], "groups": [groups[0]], "mean_angle": [np.float32(meanAngle)], "vector": [np.float32(meanVector)], @@ -498,43 +613,41 @@ def analyse_focal_animal( "duration": [du[0]], } ) - if scene_name.lower() == "swarm": - df_summary["density"] = d - df_summary["kappa"] = o - elif scene_name.lower() == "choice": - df_summary["object_type"] = o - df_summary["initial_distance"] = d - df_summary["heading_angle"] = f_angle + if scene_name.lower() == "swarm" or scene_name.lower() == "band": + df_summary["density"] = [density[0]] + df_summary["kappa"] = [order[0]] + if scene_name.lower() == "choice" or scene_name.lower() == "band": + df_summary["type"] = [object_type[0]] + df_summary["radial_distance"] = [radial_distance[0]] + df_summary["polar_angle"] = [polar_angle[0]] + if scene_name.lower() == "band": + df_summary["visibleOffDuration"] = [voff[0]] + df_summary["visibleOnDuration"] = [von[0]] + df_summary["rotationAngle"] = [f_angle[0]] if plotting_trajectory == True: - if scene_name.lower() == "swarm": + if scene_name.lower() == "swarm" or scene_name.lower() == "band": if df_summary["density"][0] > 0: ## if using plot instead of scatter plot - # ax2.plot( - # dX, dY, color=np.arange(len(dY)), alpha=df_curated.iloc[id]["alpha"] - # ) + ax2.plot(dX, dY) ##blue is earlier colour and yellow is later colour - ax2.scatter( - dX, - dY, - c=np.arange(len(dY)), - marker=".", - alpha=df_summary["kappa"].map(alpha_dictionary)[0], - ) + # ax2.scatter( + # dX, + # dY, + # c=np.arange(len(dY)), + # marker=".", + # ) else: ## if using plot instead of scatter plot - # ax1.plot( - # dX, dY, alpha=df_curated.iloc[id]["alpha"] + ax1.plot(dX, dY) + # ax1.scatter( + # dX, + # dY, + # c=np.arange(len(dY)), + # marker=".", # ) - ax1.scatter( - dX, - dY, - c=np.arange(len(dY)), - marker=".", - alpha=df_summary["kappa"].map(alpha_dictionary)[0], - ) elif scene_name.lower() == "choice": - if df_summary["object_type"][0] == "empty_trial": + if df_summary["type"][0] == "empty_trial": ax1.scatter( dX, dY, @@ -567,6 +680,8 @@ def analyse_focal_animal( data_frame_list = [df_curated, df_summary] for this_name, this_pd in zip(file_list, data_frame_list): store = pd.HDFStore(this_name) + if different_key != None and different_key in this_pd.columns: + this_pd[different_key] = np.nan store.append( "name_of_frame", this_pd, @@ -586,7 +701,6 @@ def analyse_focal_animal( del agent_dX, agent_dY, df_agent trajectory_fig_path = this_file.parent / f"{experiment_id}_trajectory.png" if plotting_trajectory == True and save_output == True: - # plt.show() fig.savefig(trajectory_fig_path) return ( heading_direction_across_trials, @@ -630,25 +744,25 @@ def preprocess_matrex_data(thisDir, json_file): inplace=True, ) num_vr = 4 + agents_shared_across_vrs = analysis_methods.get("agents_shared_across_vrs", False) + scene_name = analysis_methods.get("experiment_name") ## here to load simulated agent's data for i in range(num_vr): - scene_name = analysis_methods.get("experiment_name") - if scene_name.lower() == "swarm": + + if scene_name.lower() == "swarm" or scene_name.lower() == "band": agent_pattern = f"*SimulatedLocustsVR{i+1}*" elif scene_name.lower() == "choice": agent_pattern = "*Leader*" found_result = find_file(thisDir, agent_pattern) if found_result is None: return print(f"file with {agent_pattern} not found") - # elif scene_name.lower() == "choice" and i > 0: - elif scene_name.lower() == "choice" and "ts_simulated_animal" in locals(): + elif agents_shared_across_vrs and "ts_simulated_animal" in locals(): print( - "Information about simulated locusts are shared across rigs in the choice scene, so start analysing focal animals" + "Information about simulated locusts are shared across rigs in the choice scene, so skip the rest of the loop and start analysing focal animals" ) else: - ts_simulated_animal = [] - x_simulated_animal = [] - y_simulated_animal = [] + + df_simulated_animal = [] conditions = [] if isinstance(found_result, list): print( @@ -657,38 +771,63 @@ def preprocess_matrex_data(thisDir, json_file): json_pattern = "*sequenceConfig.json" json_file = find_file(thisDir, json_pattern) with open(json_file, "r") as f: - print(f"load conditions from file {json_file}") - tmp = json.loads(f.read()) - for idx in range(len(tmp["sequences"])): - this_file = found_result[idx] - # for idx, this_file in enumerate(found_result): - ts, x, y, condition = read_simulated_data( - this_file, analysis_methods - ) - ts_simulated_animal.append(ts) - x_simulated_animal.append(x) - y_simulated_animal.append(y) - if scene_name.lower() == "swarm": - condition["duration"] = tmp["sequences"][idx]["duration"] - conditions.append(condition) + print(f"load trial sequence from file {json_file}") + trial_sequence = json.loads(f.read()) + for idx in range(len(trial_sequence["sequences"])): + if "configFile" in trial_sequence["sequences"][idx]["parameters"]: + config_file = trial_sequence["sequences"][idx]["parameters"][ + "configFile" + ] # need to figure out how to deal with swarm data + this_config_file = find_file(thisDir, "*" + config_file) + with open(this_config_file, "r") as f: + print(f"load trial conditions from file {this_config_file}") + trial_condition = json.loads(f.read()) + num_object = len(trial_condition["objects"]) + else: + trial_condition = trial_sequence["sequences"][idx]["parameters"] + num_object = 1 + this_file = found_result[idx * num_object] + result_list = [] + condition_list = [] + for this_object in range(num_object): + if "objects" in trial_condition: + result, condition = read_agent_data( + found_result[idx * num_object + this_object], + analysis_methods, + trial_condition["objects"][this_object], + ) + else: + result, condition = read_agent_data( + found_result[idx * num_object + this_object], + analysis_methods, + trial_condition, + ) + + if isinstance(result, pd.DataFrame) == True: + df_agent = reshape_multiagent_data2(result, this_object) + else: + df_agent = None + result_list.append(df_agent) + condition["duration"] = trial_sequence["sequences"][idx][ + "duration" + ] # may need to add condition to exclude some kind of data from choice assay. + condition_list.append(condition) + if num_object == 1: + conditions.append(condition_list[0]) + df_simulated_animal.append(result_list[0]) + else: + conditions.append(condition_list) + df_simulated_animal.append(result_list) elif len(found_result.stem) > 0: - ts, x, y, condition = read_simulated_data( - found_result, analysis_methods + + ( + df_simulated_animal, + conditions, + ) = read_agent_data( + found_result, + analysis_methods, ) - if scene_name.lower() == "choice": - ts_simulated_animal = ts - x_simulated_animal = x - y_simulated_animal = y - conditions = condition - else: - ts, x, y, condition = read_simulated_data( - found_result, analysis_methods - ) - ts_simulated_animal.append(ts) - x_simulated_animal.append(x) - y_simulated_animal.append(y) - conditions.append(condition) ## here to load focal_animal's data animal_name_pattern = f"*_VR{i+1}*" @@ -702,31 +841,27 @@ def preprocess_matrex_data(thisDir, json_file): ) for this_file in found_result: ( - heading_direction_focal_animal, - x_focal_animal, - y_focal_animal, - ts_focal_animal, + _, + _, + _, + _, ) = analyse_focal_animal( this_file, analysis_methods, - ts_simulated_animal, - x_simulated_animal, - y_simulated_animal, + df_simulated_animal, conditions, tem_df, ) elif len(found_result.stem) > 0: ( - heading_direction_focal_animal, - x_focal_animal, - y_focal_animal, - ts_focal_animal, + _, + _, + _, + _, ) = analyse_focal_animal( found_result, analysis_methods, - ts_simulated_animal, - x_simulated_animal, - y_simulated_animal, + df_simulated_animal, conditions, tem_df, ) @@ -734,7 +869,8 @@ def preprocess_matrex_data(thisDir, json_file): if __name__ == "__main__": # thisDir = r"D:\MatrexVR_Swarm_Data\RunData\20240818_170807" - thisDir = r"D:\MatrexVR_Swarm_Data\RunData\20240824_143943" + # thisDir = r"D:\MatrexVR_Swarm_Data\RunData\20240824_143943" + thisDir = r"D:\MatrexVR_navigation_Data\RunData\20241012_162147" # thisDir = r"D:\MatrexVR_Swarm_Data\RunData\20240826_150826" # thisDir = r"D:\MatrexVR_blackbackground_Data\RunData\20240904_171158" # thisDir = r"D:\MatrexVR_blackbackground_Data\RunData\20240904_151537"