-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_team_table.py
64 lines (51 loc) · 1.71 KB
/
create_team_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Creates a synapse table with HTAN teams
import synapseclient
import pandas as pd
# Login to Synapse
syn = synapseclient.Synapse()
syn.login()
# Pagination parameters
limit = 50
offset = 0
all_teams = []
# Loop through paginated results
while True:
result = syn.restGET(f"/teams?fragment=HTAN2&limit={limit}&offset={offset}")
team_list = result.get("results", [])
if not team_list: # Break if no more results
break
all_teams.extend(team_list)
offset += limit # Move to the next page
# Synapse table where teams will be stored
htan_teams_table_id = "syn63714328"
# Query existing table data
query = f"SELECT * FROM {htan_teams_table_id}"
existing_table = syn.tableQuery(query)
existing_df = existing_table.asDataFrame()
# Convert the existing table data to a set of IDs for faster comparison
existing_team_ids = set(existing_df["id"].astype(str))
# Prepare new rows to update the table
new_rows = []
for team in all_teams:
team_id = team["id"]
print(f"Checking team {team_id}...")
if str(team_id) not in existing_team_ids:
team_name = team["name"]
team_creation_date = team["createdOn"]
team_modified_date = team["modifiedOn"]
team_owner_id = team["createdBy"]
team_owner = syn.getUserProfile(team_owner_id)["userName"]
row = [
team_name,
team_id,
team_creation_date,
team_modified_date,
team_owner_id,
]
new_rows.append(row)
# Only send new rows to the table if there are any
if new_rows:
syn.store(synapseclient.Table(htan_teams_table_id, values=new_rows))
print(f"Added {len(new_rows)} new rows to the Synapse table.")
else:
print("No new teams to add.")