Skip to content

Commit

Permalink
Merge pull request #28 from IBM/code-dev
Browse files Browse the repository at this point in the history
Minor modifications + Bug fix with label name
  • Loading branch information
RaulFD-creator authored Oct 30, 2024
2 parents d898241 + bd92d7a commit 704ade5
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 16 deletions.
6 changes: 3 additions & 3 deletions autopeptideml/autopeptideml.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ def train_test_partition(
independent hold-out evaluation sets.
:param df: Dataset to partition with the following columns
`id`, `sequence`, and `labels`.
`id`, `sequence`, and `Y`.
:type df: pd.DataFrame
:param threshold: Maximum sequence identity allowed between sequences
in training and evaluation sets, defaults to 0.3
Expand Down Expand Up @@ -477,7 +477,7 @@ def train_test_partition(
df=df,
similarity_metric=alignment,
field_name='sequence',
label_name='labels',
label_name='Y',
denominator=denominator,
test_size=test_size,
threshold=threshold,
Expand Down Expand Up @@ -553,7 +553,7 @@ def train_val_partition(
df=df,
similarity_metric=alignment,
field_name='sequence',
label_name='labels',
label_name='Y',
threads=self.threads,
denominator=denominator,
threshold=threshold,
Expand Down
28 changes: 15 additions & 13 deletions autopeptideml/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,12 @@ def main():

if args.test_partition == 'True' and args.splits is None:
datasets = apml.train_test_partition(
df,
args.test_threshold,
args.test_size,
args.test_alignment,
os.path.join(args.outputdir, 'splits')
df=df,
threshold=args.test_threshold,
test_size=args.test_size,
denominator='n_aligned',
alignment=args.test_alignment,
outputdir=os.path.join(args.outputdir, 'splits')
)
else:
datasets = {
Expand All @@ -111,18 +112,18 @@ def main():

if args.val_partition == 'True' and args.folds is None:
folds = apml.train_val_partition(
datasets['train'],
args.val_method,
args.val_threshold,
args.val_alignment,
args.val_n_folds,
os.path.join(args.outputdir, 'folds')
df=datasets['train'],
method=args.val_method,
threshold=args.val_threshold,
alignment=args.val_alignment,
n_folds=args.val_n_folds,
outputdir=os.path.join(args.outputdir, 'folds')
)
else:
folds = [
{'train': pd.read_csv(os.path.join(args.folds, f'train_{i}.csv')),
'val': pd.read_csv(os.path.join(args.folds, f'val_{i}.csv'))}
for i in range(args.val_n_folds)
for i in range(args.val_n_folds)
]

id2rep = apml.compute_representations(datasets, re)
Expand All @@ -148,10 +149,11 @@ def main():
if args.verbose is True:
print(results)


def predict():
args = parse_cli_predict()

re = RepresentationEngine(args.plm, args.plm_batch_size)
re = RepresentationEngine(args.plm, args.plm_batch_size)
apml = AutoPeptideML(args.verbose, args.threads, 1)
df = apml.curate_dataset(args.dataset, args.outputdir)
apml.predict(df, re, args.ensemble, args.outputdir)

0 comments on commit 704ade5

Please sign in to comment.