Skip to content

Commit

Permalink
Merge pull request #4 from lsst-sqre/tickets/DM-41368
Browse files Browse the repository at this point in the history
DM-41368: update prompt processing notebooks
  • Loading branch information
hsinfang authored Jan 9, 2024
2 parents 3093ca2 + 2b42e25 commit c7ebfb6
Show file tree
Hide file tree
Showing 3 changed files with 363 additions and 7 deletions.
333 changes: 333 additions & 0 deletions prompt-processing/groups.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,333 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7c0def35-3c83-4cec-be85-47d6703ee73f",
"metadata": {},
"source": [
"# Prompt Processing Bean Counting for {{ params.date }}"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "52b5ccf4-48c8-4616-bb35-dbb2e41ec57b",
"metadata": {},
"outputs": [],
"source": [
"date = '2023-12-22'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb778a07-569e-4c33-9c06-8f64c5a3d584",
"metadata": {},
"outputs": [],
"source": [
"dayobs = int(date.replace(\"-\", \"\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8a5cf56-490f-404c-86a0-c099545d982e",
"metadata": {},
"outputs": [],
"source": [
"from astropy.time import Time, TimeDelta\n",
"import pandas\n",
"from lsst_efd_client import EfdClient"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "67c79446-b539-497f-9810-21b4bf32d5f6",
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"logger = logging.getLogger(\"analysis\")\n",
"logger.setLevel(logging.DEBUG)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5d733f8-8bbd-4a63-81d9-7451583e43d2",
"metadata": {},
"outputs": [],
"source": [
"async def get_df_from_next_visit_events(date):\n",
" client = EfdClient(\"usdf_efd\")\n",
"\n",
" start = Time(date, scale=\"utc\", format=\"isot\") + TimeDelta(12*60*60, format=\"sec\")\n",
" end = start + TimeDelta(1, format=\"jd\")\n",
" \n",
" topic = \"lsst.sal.ScriptQueue.logevent_nextVisit\"\n",
" df = await client.select_time_series(topic, [\"*\"], start.utc, end.utc)\n",
" all_canceled = await client.select_time_series(topic+\"Canceled\", [\"*\"], start.utc, end.utc)\n",
"\n",
" if df.empty:\n",
" logger.info(f\"No events on {date}\")\n",
" return None\n",
" \n",
" # Only select on-sky AuxTel imaging survey\n",
" df = df.loc[\n",
" (df[\"coordinateSystem\"] == 2)\n",
" & (df[\"salIndex\"] == 2)\n",
" & (df[\"survey\"] == \"AUXTEL_PHOTO_IMAGING\")\n",
" ].set_index(\"groupId\")\n",
" logger.info(f\"There were {len(df)} AUXTEL_PHOTO_IMAGING nextVisit events on {date}\")\n",
" \n",
" # Ignore the explicitly canceled groups\n",
" if not all_canceled.empty:\n",
" canceled = df.index.intersection(all_canceled.set_index(\"groupId\").index).tolist()\n",
" logger.info(\"Removing the canceled events: %s\", canceled)\n",
" df = df.drop(canceled)\n",
"\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "84a6ef03-6e7a-4ec2-a857-ff2971b89578",
"metadata": {},
"outputs": [],
"source": [
"df_efd = await get_df_from_next_visit_events(date)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40c67ddd-4bfa-4f45-8775-bfbe9edf8927",
"metadata": {},
"outputs": [],
"source": [
"df_efd.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38550ad1-0a8b-493c-b1a2-85ac94b70083",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b7b45d8f-09a4-4057-a5da-e8f28b7f27cf",
"metadata": {},
"outputs": [],
"source": [
"from lsst.daf.butler import Butler\n",
"butler = Butler(\"/repo/embargo\", writeable=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "41f8c952-395f-40a8-9891-1415846704b9",
"metadata": {},
"outputs": [],
"source": [
"results = butler.registry.queryDimensionRecords(\n",
" \"exposure\",\n",
" datasets=\"raw\",\n",
" collections=\"LATISS/raw/all\",\n",
" where=\"exposure.science_program IN ('AUXTEL_PHOTO_IMAGING') \"\n",
" \"and instrument='LATISS' and exposure.day_obs=day_obs \",\n",
" bind={\"day_obs\": dayobs},\n",
")\n",
"logger.info(f\"Found {results.count()} raws in {butler} for {dayobs}\")\n",
"\n",
"embargo_records = dict()\n",
"for record in results:\n",
" embargo_records[record.id] = record\n",
"\n",
"df_butler = pandas.DataFrame.from_records([embargo_records[num].toDict() for num in embargo_records]).set_index(\"group_name\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e518792e-590a-4af5-8b21-1c39089c2129",
"metadata": {},
"outputs": [],
"source": [
"df_butler.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3bcd8dfe-86c6-4c41-875a-6b90648367c5",
"metadata": {},
"outputs": [],
"source": [
"groups_no_raw = set(df_efd.index) - set(df_butler.index)\n",
"logger.info(f\"{len(groups_no_raw)} group had records in EFD but no raws in the embargo butler: {groups_no_raw}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ecec251e-466d-46f1-9420-ac0c9e39a6e6",
"metadata": {},
"outputs": [],
"source": [
"df_efd = df_efd.drop(groups_no_raw)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a9032204-01f8-4c48-85aa-92d5b28f9244",
"metadata": {},
"outputs": [],
"source": [
"if len(df_efd) != len(df_butler):\n",
" logger.warning(\"Counts do not match; need attention\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8074d56-f0d2-403b-bc72-1be0824e43d9",
"metadata": {},
"outputs": [],
"source": [
"df_md = pandas.merge(df_efd.reset_index(), df_butler.reset_index(), \n",
" how=\"outer\", left_on=\"groupId\", right_on=\"group_name\",\n",
" suffixes=('_efd', '_butler'),\n",
" validate=\"one_to_one\",).set_index(\"groupId\")\n",
"\n",
"logger.info(f\"{len(df_md)} groups in the table\")\n",
"\n",
"fields_efd = [\"filters\", \"position0\", \"position1\"]\n",
"fields_butler = [\"seq_num\", \"tracking_ra\", \"tracking_dec\", \"physical_filter\", \"target_name\", \"id\"]\n",
"df_md[fields_efd + fields_butler].head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0cc8f214-6ac2-4430-ac45-3cc69ef565c4",
"metadata": {},
"outputs": [],
"source": [
"boring_cols = [\"instrument_efd\", \"instrument_butler\", \"science_program\", \"observation_reason\", \"observation_type\", \n",
" \"cameraAngle\", \"has_simulated\", \"dome\", \"coordinateSystem\", \"rotationSystem\",\n",
" \"private_identity\", \"private_origin\", \"private_revCode\", \"salIndex\", \"totalCheckpoints\",\n",
" \"nimages\"]\n",
"for col in boring_cols:\n",
" if df_md[col].nunique() == 1:\n",
" logger.info(f\"Dropping column {col} with only {df_md[col].unique()}\")\n",
" df_md.drop(columns=[col,], inplace=True) \n",
" else:\n",
" logger.warning(f\"Column {col} has {df_md[col].unique()}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b14d0660-34da-44e2-ab50-3a83f576621c",
"metadata": {},
"outputs": [],
"source": [
"for (col1, col2, name) in ((\"filters\", \"physical_filter\", \"filter\"),):\n",
" if df_md[col1].equals(df_md[col2]):\n",
" df_md.drop(columns=[col1,], inplace=True) \n",
" df_md.rename(columns={col2: name}, inplace=True)\n",
" else:\n",
" print(f\"Some {name} did not match\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55908452-ec11-4adb-8631-827fe1afb9f2",
"metadata": {},
"outputs": [],
"source": [
"for col in (\"filter\", \"exposure_time\", \"survey\", \"day_obs\" ):\n",
" logger.info(f\"Column {col} has {df_md[col].unique()}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "42722584-834e-4220-bff0-edd71e9e6c6c",
"metadata": {},
"outputs": [],
"source": [
"df_md.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7dfbddf8-0243-49fc-bc27-09f8eee6e2ad",
"metadata": {},
"outputs": [],
"source": [
"df_md[\"offset0\"] = df_md[\"position0\"] - df_md[\"tracking_ra\"]\n",
"df_md[\"offset1\"] = df_md[\"position1\"] - df_md[\"tracking_dec\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c5ace6e-4fef-4a0e-a656-e742cb49d2cd",
"metadata": {},
"outputs": [],
"source": [
"df_md[[\"seq_num\", \"position0\", \"tracking_ra\", \"offset0\", \"position1\", \"tracking_dec\", \"offset1\", \"filter\" ]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "36a85638-1f14-4bcf-9c9a-c0cfcedd6f9a",
"metadata": {},
"outputs": [],
"source": [
"df_md[\"offset0\"].describe(), df_md[\"offset1\"].describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec7e51b3-807d-4406-bbab-ff97679559d6",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "LSST",
"language": "python",
"name": "lsst"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
12 changes: 12 additions & 0 deletions prompt-processing/groups.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
title: Prompt Processing Bean Counting
description: Group-based tables and plots of prompt processing for a given night.
authors:
- name: Hsin-Fang Chiang
slack: hsinfang
tags:
- prompt-processing
parameters:
date:
type: string
description: Day of observation (YYYY-MM-DD)
default: "2023-08-29"
25 changes: 18 additions & 7 deletions prompt-processing/timing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,10 @@
},
"outputs": [],
"source": [
"Markdown(\"There were %i uncanceled nextVisit events on %s\"%(len(groups), dayobs))"
"if groups:\n",
" Markdown(\"There were %i uncanceled nextVisit events on %s\"%(len(groups), dayobs))\n",
"else:\n",
" Markdown(\"No records were found\")"
]
},
{
Expand Down Expand Up @@ -298,17 +301,25 @@
},
"outputs": [],
"source": [
"timestamps = get_file_timestamps(butler, \"apdb_marker\", \n",
" collections=[f\"LATISS/prompt/output-{date}/ApPipe/prompt-proto-service-*\"])"
"dataset_type = \"calexp\" #\"apdb_marker\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "827189d8-2f63-4fdd-850f-1fa6204dd132",
"metadata": {
"tags": []
},
"id": "8c135a4e-37e3-4710-8b7a-501d282b6860",
"metadata": {},
"outputs": [],
"source": [
"timestamps = get_file_timestamps(butler, dataset_type, \n",
" collections=[f\"LATISS/prompt/output-{date}/*/prompt-proto-service-*\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f3a42193-0fa6-4f6e-b847-c9042db8e266",
"metadata": {},
"outputs": [],
"source": [
"seconds = []\n",
Expand Down

0 comments on commit c7ebfb6

Please sign in to comment.