Skip to content

Commit

Permalink
enh: store dcnum job metadata in output file
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Nov 24, 2023
1 parent 49e4553 commit 319ecf8
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
1 change: 0 additions & 1 deletion src/dcnum/feat/feat_background/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,6 @@ def get_ppkw_from_ppid(bg_ppid):

def process(self):
self.process_approach()

bg_ppid = self.get_ppid()
# Store pipeline information in the image_bg feature
self.h5out["events/image_bg"].attrs["dcnum ppid background"] = bg_ppid
Expand Down
19 changes: 15 additions & 4 deletions src/dcnum/logic/ctrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(self, job: DCNumPipelineJob, *args, **kwargs):
self.job = job
self.ppid, self.pphash, self.ppdict = job.get_ppid(ret_hash=True,
ret_dict=True)
self.event_count = 0

self._data = None
# current job state
Expand Down Expand Up @@ -197,8 +198,18 @@ def run(self):
if self.job["no_basins_in_output"]:
self.task_transfer_basin_data()

# Add the log file to the resulting .rtdc file
with HDF5Writer(self.path_temp_out) as hw:
# Add important metadata
hw.h5.attrs["pipeline:dcnum generation"] = self.ppdict["gen_id"]
hw.h5.attrs["pipeline:dcnum data"] = self.ppdict["dat_id"]
hw.h5.attrs["pipeline:dcnum background"] = self.ppdict["bg_id"]
hw.h5.attrs["pipeline:dcnum segmenter"] = self.ppdict["seg_id"]
hw.h5.attrs["pipeline:dcnum feature"] = self.ppdict["feat_id"]
hw.h5.attrs["pipeline:dcnum gate"] = self.ppdict["gate_id"]
hw.h5.attrs["pipeline:dcnum hash"] = self.pphash
hw.h5.attrs["pipeline:dcnum yield"] = self.event_count
hw.h5.attrs["experiment:event count"] = self.event_count
# Add the log file to the resulting .rtdc file
hw.store_log(
time.strftime("dcnum-process-%Y-%m-%d-%H.%M.%S"),
self.path_log.read_text().split("\n"))
Expand All @@ -225,7 +236,6 @@ def task_background(self):
**self.job["background_kwargs"]) as bic:

bic.process()
bic.h5out.attrs["pipeline:dcnum background"] = bic.get_ppid()
self.logger.info("Finished background computation")

def task_segment_extract(self):
Expand Down Expand Up @@ -307,7 +317,7 @@ def task_segment_extract(self):
pmax = 0.95 # for
while True:
counted_frames = thr_coll.written_frames
counted_events = thr_coll.written_events
self.event_count = thr_coll.written_events
td = time.monotonic() - t0
# set the current status
self._progress = round(
Expand Down Expand Up @@ -346,7 +356,8 @@ def task_segment_extract(self):
logger=self.logger,
name="writer")

if counted_events == 0:
self.event_count = thr_coll.written_events
if self.event_count == 0:
self.logger.error(
f"No events found in {self.data.path}! Please check the "
f"input file or revise your pipeline.")
Expand Down

0 comments on commit 319ecf8

Please sign in to comment.