Skip to content

Commit

Permalink
added glitch stuff back to preprocess
Browse files Browse the repository at this point in the history
  • Loading branch information
simrannerval committed Jan 24, 2025
1 parent bf38dd1 commit 51a9a17
Showing 1 changed file with 65 additions and 1 deletion.
66 changes: 65 additions & 1 deletion sotodlib/preprocess/processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1683,7 +1683,69 @@ def process(self, aman, proc_aman):
# if we need extract snippets from aman, here's how to do it:
snippets = gl.extract_snippets(aman, snippet_layouts)

# TODO: save them for later
# TODO: save them for later in a proper way
proc_aman.snippets = snippets
proc_aman.snippet_layouts = snippet_layouts



class GlitchComputeStats(_Preprocess):
"""Compute the summary statistics required to classify each glitch
"""

name = "glitch_compute_stats"

def process(self, aman, proc_aman):

cols_for_stats = self.process_cfgs.get("cols_for_stats",['Number of Detectors',
'Y and X Extent Ratio', 'Mean abs(Correlation)','Mean abs(Time Lag)',
'Y Hist Max and Adjacent/Number of Detectors',
'Within 0.1 of Y Hist Max/Number of Detectors', 'Number of Peaks',
'Start Index', 'Stop Index', 'Start Ctime', 'Stop Ctime'])

##need to figure out a way to save snippet layout too

#df_stats returns dataframe with summary statistics for glitch classification
df_stats = glitch_classification.compute_summary_stats(proc_aman.snippets, cols_for_stats)

# HOW TO SAVE? temporary solution for now

outdir = self.process_cfgs.get("outdir", os.getcwd())

df_name = self.process_cfgs.get("df_name", "df_stats")

df_stats.to_hdf('{}/{}.h5'.format(outdir, df_name), key='df', mode='a')


class GlitchClassification(_Preprocess):
"""Classify glitches using a random forest. Will return the probability of being each
type of glitch: 0: Point Sources, 1: Point Sources + Other 2: Cosmic Rays, 3: Other
"""

name = "classify_glitches"

def process(self, aman, proc_aman):

outdir = self.process_cfgs.get("outdir", os.getcwd())

df_name = self.process_cfgs.get("df_name", "df_stats")

trained_forest_name = self.process_cfgs.get("trained_forest_name", "trained_forest")

trained_forest = pk.load(open('{}/{}.pkl'.format(outdir, trained_forest_name), 'rb'))

classifying_cols = self.process_cfgs.get("columns_for_classifying", ['Number of Detectors', 'Y and X Extent Ratio',
'Mean abs(Correlation)', 'Mean abs(Time Lag)', 'Y Hist Max and Adjacent/Number of Detectors',
'Within 0.1 of Y Hist Max/Number of Detectors', 'Number of Peaks'])

df_stats_t = pd.read_hdf('{}/{}.h5'.format(outdir, df_name))

df_stats = df_stats_t.dropna()

df_w_predictions = glitch_classification.classify_data_forest(df_stats, classifying_cols, trained_forest)

df_w_predictions.to_hdf('{}/{}_w_predictions.h5'.format(outdir, df_name), key='df', mode='w')



_Preprocess.register(SplitFlags)
Expand Down Expand Up @@ -1722,3 +1784,5 @@ def process(self, aman, proc_aman):
_Preprocess.register(FocalplaneNanFlags)
_Preprocess.register(PointingModel)
_Preprocess.register(GlitchAggregate)
_Preprocess.register(GlitchComputeStats)
_Preprocess.register(GlitchClassification)

0 comments on commit 51a9a17

Please sign in to comment.