Skip to content

Commit

Permalink
Merge pull request #40 from Changwanseo/development
Browse files Browse the repository at this point in the history
0.3.23.5 pull
  • Loading branch information
Changwanseo authored Jul 25, 2024
2 parents 227af86 + bc5252f commit f905b43
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
24 changes: 24 additions & 0 deletions funvip/src/concatenate.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,30 @@ def concatenate_df(V, path, opt):
# Leave non-empty dataframes
if isinstance(V.dict_gene_SR[gene], pd.DataFrame):
gene_list.append(gene)

df = deepcopy(V.dict_gene_SR[gene].set_index(["qseqid", "sseqid"]))

"""
df = deepcopy(
V.dict_gene_SR[gene]
.set_index(["qseqid", "sseqid"])
.drop(
columns=[
"pident",
"length",
"mismatch",
"gaps",
"qstart",
"qend",
"sstart",
"send",
"evalue",
"bitscore",
"subject_group",
]
)
)
"""
df_list.append(df)

if len(df_list) <= 0:
Expand All @@ -179,6 +202,7 @@ def concatenate_df(V, path, opt):
df_multigene_regression_ori = pd.concat(df_list, axis=1)

# Drop unnecessary columns for processing

df_multigene_regression_ori.drop(
columns=[
"pident",
Expand Down
19 changes: 19 additions & 0 deletions funvip/src/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def calculate_pident(df):
# mismatch, gaps, and bitscore were calculated as sum of fragments
# evalues were calculated by multiplying them, because they are probability

"""
df = df.groupby(["qseqid", "sseqid"], dropna=True, as_index=False).aggregate(
{
"qseqid": lambda x: set(x),
Expand All @@ -104,6 +105,24 @@ def calculate_pident(df):
"bitscore": np.sum,
}
)
"""
# This part should be tested, especially in previous versions
df = df.groupby(["qseqid", "sseqid"], dropna=True, as_index=False).aggregate(
{
"qseqid": lambda x: set(x),
"sseqid": lambda x: set(x),
"pident": lambda x: tuple(x),
"length": lambda x: tuple(x),
"mismatch": "sum",
"gaps": "sum",
"qstart": lambda x: tuple(x),
"qend": lambda x: tuple(x),
"sstart": lambda x: tuple(x),
"send": lambda x: tuple(x),
"evalue": "prod",
"bitscore": "sum",
}
)

# Calculate pident
df["pident"] = df.apply(calculate_pident, axis=1)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "FunVIP"
version = "0.3.23.4"
version = "0.3.23.5"
description = "Fungal Validation & Identification Pipeline"
authors = [{name = "Changwan Seo", email = "[email protected]"}]
urls = { "Homepage" = "https://github.com/Changwanseo/FunVIP" }
Expand Down

0 comments on commit f905b43

Please sign in to comment.