Skip to content

Commit

Permalink
add regex true
Browse files Browse the repository at this point in the history
  • Loading branch information
mdsage1 committed Mar 22, 2024
1 parent 2380f11 commit 0be846e
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions apps/openchallenges/edam-etl/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,27 @@ def print_info_statistics(df: pd.DataFrame) -> None:

# Use pandas' vectorized string operations to count occurrences
data_count = (
df["class_id"].str.contains(data_pattern, case=False, na=False).sum()
df["class_id"]
.str.contains(data_pattern, case=False, na=False, regex=True)
.sum()
)
operation_count = (
df["class_id"].str.contains(operation_pattern, case=False, na=False).sum()
)
format_count = (
df["class_id"].str.contains(format_pattern, case=False, na=False).sum()
df["class_id"]
.str.contains(format_pattern, case=False, na=False, regex=True)
.sum()
)
topic_count = (
df["class_id"].str.contains(topic_pattern, case=False, na=False).sum()
df["class_id"]
.str.contains(topic_pattern, case=False, na=False, regex=True)
.sum()
)
identifier_count = (
df["class_id"].str.contains(identifier_pattern, case=False, na=False).sum()
df["class_id"]
.str.contains(identifier_pattern, case=False, na=False, regex=True)
.sum()
)

# Calculate 'other' count by subtracting the specific counts from the total
Expand Down

0 comments on commit 0be846e

Please sign in to comment.